Skip to content

Commit 9c9a5f8

Browse files
committed
feat(website): add chrome crawl_chrome_send thread safe
1 parent 99dcb92 commit 9c9a5f8

File tree

11 files changed

+485
-60
lines changed

11 files changed

+485
-60
lines changed

Cargo.lock

+6-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ name = "chrome_remote"
138138
path = "chrome_remote.rs"
139139
required-features = ["spider/sync", "spider/chrome"]
140140

141+
[[example]]
142+
name = "chrome_sendable"
143+
path = "chrome_sendable.rs"
144+
required-features = ["spider/sync", "spider/chrome"]
145+
141146
[[example]]
142147
name = "real_world"
143148
path = "real_world.rs"

examples/chrome_sendable.rs

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//! cargo run --example chrome_sendable --features="chrome chrome_intercept"
2+
3+
extern crate spider;
4+
use crate::spider::tokio::io::AsyncWriteExt;
5+
use spider::features::chrome_common::RequestInterceptConfiguration;
6+
use spider::tokio;
7+
use spider::website::Website;
8+
use std::io::Result;
9+
10+
async fn crawl_website(website: &Website, url: &str) -> Result<()> {
11+
let start = crate::tokio::time::Instant::now();
12+
13+
website.crawl_chrome_send(Some(url)).await;
14+
15+
println!(
16+
"Time elapsed in website.crawl({}) is: {:?}",
17+
url,
18+
start.elapsed(),
19+
);
20+
21+
Ok(())
22+
}
23+
24+
#[tokio::main]
25+
async fn main() -> Result<()> {
26+
let mut stdout = tokio::io::stdout();
27+
let mut website = Website::default();
28+
29+
website
30+
.with_limit(5)
31+
.with_chrome_intercept(RequestInterceptConfiguration::new(true))
32+
.with_stealth(true)
33+
.with_fingerprint(true)
34+
.with_chrome_connection(Some("http://127.0.0.1:9222/json/version".into()));
35+
36+
website.configure_setup().await;
37+
38+
let mut rx2 = website.subscribe(16).unwrap();
39+
40+
let handle = tokio::spawn(async move {
41+
while let Ok(page) = rx2.recv().await {
42+
let _ = stdout
43+
.write_all(
44+
format!(
45+
"- {} -- Bytes transferred {:?} -- HTML Size {:?}\n",
46+
page.get_url(),
47+
page.bytes_transferred.unwrap_or_default(),
48+
page.get_html_bytes_u8().len()
49+
)
50+
.as_bytes(),
51+
)
52+
.await;
53+
}
54+
});
55+
56+
let _ = tokio::join!(
57+
crawl_website(&website, "https://choosealicense.com"),
58+
crawl_website(&website, "https://jeffmendez.com"),
59+
crawl_website(&website, "https://example.com"),
60+
);
61+
62+
drop(website);
63+
64+
let _ = handle.await;
65+
66+
Ok(())
67+
}

spider/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider"
3-
version = "2.32.6"
3+
version = "2.32.8"
44
authors = [
55
"j-mendez <[email protected]>"
66
]

spider/src/features/chrome.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::time::Duration;
2+
13
use crate::utils::log;
24
use crate::{configuration::Configuration, tokio_stream::StreamExt};
35
use chromiumoxide::cdp::browser_protocol::browser::{
@@ -7,6 +9,7 @@ use chromiumoxide::cdp::browser_protocol::{
79
browser::BrowserContextId, network::CookieParam, target::CreateTargetParams,
810
};
911
use chromiumoxide::error::CdpError;
12+
use chromiumoxide::handler::REQUEST_TIMEOUT;
1013
use chromiumoxide::page::DISABLE_DIALOGS;
1114
use chromiumoxide::Page;
1215
use chromiumoxide::{handler::HandlerConfig, Browser, BrowserConfig};
@@ -86,7 +89,7 @@ pub fn get_browser_config(
8689
.disable_default_args()
8790
.request_timeout(match request_timeout.as_ref() {
8891
Some(timeout) => **timeout,
89-
_ => Default::default(),
92+
_ => Duration::from_millis(REQUEST_TIMEOUT),
9093
});
9194

9295
let builder = if cache_enabled {
@@ -156,7 +159,7 @@ pub fn get_browser_config(
156159
.no_sandbox()
157160
.request_timeout(match request_timeout.as_ref() {
158161
Some(timeout) => **timeout,
159-
_ => Default::default(),
162+
_ => Duration::from_millis(REQUEST_TIMEOUT),
160163
})
161164
.with_head();
162165

@@ -221,7 +224,7 @@ fn create_handler_config(config: &Configuration) -> HandlerConfig {
221224
HandlerConfig {
222225
request_timeout: match config.request_timeout.as_ref() {
223226
Some(timeout) => **timeout,
224-
_ => Default::default(),
227+
_ => Duration::from_millis(REQUEST_TIMEOUT),
225228
},
226229
request_intercept: config.chrome_intercept.enabled,
227230
cache_enabled: config.cache,
@@ -716,10 +719,10 @@ impl BrowserController {
716719
// assume close will always happen.
717720
self.closed = true;
718721
if let Some(id) = self.browser.2.take() {
722+
let _ = self.browser.0.quit_incognito_context_base(id).await;
719723
if let Some(handler) = self.browser.1.take() {
720724
// we have to quit the context until https://chromedevtools.github.io/devtools-protocol/tot/Target/#method-createBrowserContext
721725
// disposeOnDetach comes out of Experimental.
722-
let _ = self.browser.0.quit_incognito_context_base(id).await;
723726
handler.abort();
724727
}
725728
}

0 commit comments

Comments
 (0)