-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
106 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
use crate::*; | ||
|
||
use tokio::sync::Mutex; | ||
|
||
pub struct Engine { | ||
spiders: Mutex<Vec<Box<dyn Spider>>>, | ||
downloader_middleware: Mutex<Vec<Box<dyn DownloaderMiddleware>>>, | ||
item_middleware: Mutex<Vec<Box<dyn ItemMiddleware>>>, | ||
item_pipeline: Mutex<Vec<Box<dyn ItemPipeline>>>, | ||
} | ||
|
||
impl Engine { | ||
pub async fn add_spider(&self, spider: Box<dyn Spider>) { | ||
self.spiders.lock().await.push(spider); | ||
} | ||
|
||
pub async fn append_downloader_middleware(&self, middleware: Box<dyn DownloaderMiddleware>) { | ||
self.downloader_middleware.lock().await.push(middleware); | ||
} | ||
|
||
pub async fn append_item_middleware(&self, middleware: Box<dyn ItemMiddleware>) { | ||
self.item_middleware.lock().await.push(middleware); | ||
} | ||
|
||
pub async fn append_item_pipeline(&self, pipeline: Box<dyn ItemPipeline>) { | ||
self.item_pipeline.lock().await.push(pipeline); | ||
} | ||
|
||
pub async fn start(&self) { | ||
let spiders: Vec<Box<dyn Spider>> = std::mem::take(self.spiders.lock().await.as_mut()); | ||
|
||
for spider in spiders { | ||
loop { | ||
let next = spider.next_request().await; | ||
if let Ok(val) = next { | ||
let Some(rep) = val else { | ||
break; | ||
}; | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
pub type MiniScrapyResult<T> = Result<T, MiniScrapyError>; | ||
|
||
#[derive(Debug)] | ||
pub enum MiniScrapyError {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,7 @@ | ||
use std::sync::Arc; | ||
mod engine; | ||
mod error; | ||
mod traits; | ||
|
||
use async_trait::async_trait; | ||
use reqwest::{Request, Response}; | ||
use serde_json::Value; | ||
|
||
#[async_trait] | ||
pub trait Spider { | ||
async fn next(&self) -> Option<Request>; | ||
async fn parse(&self, response: Response) -> Option<Value>; | ||
} | ||
|
||
pub enum DownloaderMiddlewareReturn { | ||
None, | ||
Ignore, | ||
Request(Request), | ||
Response(Response), | ||
} | ||
|
||
#[async_trait] | ||
pub trait DownloaderMiddleware { | ||
async fn process_request( | ||
&self, | ||
request: Request, | ||
_spider: &dyn Spider, | ||
) -> DownloaderMiddlewareReturn { | ||
DownloaderMiddlewareReturn::Request(request) | ||
} | ||
async fn process_response( | ||
&self, | ||
response: Response, | ||
_spider: &dyn Spider, | ||
) -> DownloaderMiddlewareReturn { | ||
DownloaderMiddlewareReturn::Response(response) | ||
} | ||
} | ||
|
||
#[async_trait] | ||
pub trait ItemMiddleware { | ||
async fn process_item(&self, item: Value, spider: &dyn Spider); | ||
async fn open_spider(&self, _spider: &dyn Spider) {} | ||
async fn close_spider(&self, _spider: &dyn Spider) {} | ||
} | ||
|
||
#[async_trait] | ||
pub trait Pipeline { | ||
async fn open_spider(&self, spider: Arc<dyn Spider>); | ||
async fn process_item(&self, item: Value); | ||
async fn close_spider(&self); | ||
} | ||
pub use engine::*; | ||
pub use error::*; | ||
pub use traits::*; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
use async_trait::async_trait; | ||
use reqwest::{Request, Response}; | ||
use serde_json::Value; | ||
|
||
use crate::MiniScrapyResult; | ||
|
||
#[async_trait] | ||
pub trait Spider { | ||
fn name(&self) -> &str; | ||
async fn next_request(&self) -> MiniScrapyResult<Option<Request>>; | ||
async fn parse(&self, response: Response) -> Option<Value>; | ||
} | ||
|
||
pub enum DownloaderMiddlewareReturn { | ||
None, | ||
Ignore, | ||
Request(Request), | ||
Response(Response), | ||
} | ||
|
||
#[async_trait] | ||
pub trait DownloaderMiddleware { | ||
async fn process_request( | ||
&self, | ||
request: Request, | ||
_spider: &dyn Spider, | ||
) -> DownloaderMiddlewareReturn { | ||
DownloaderMiddlewareReturn::Request(request) | ||
} | ||
async fn process_response( | ||
&self, | ||
response: Response, | ||
_spider: &dyn Spider, | ||
) -> DownloaderMiddlewareReturn { | ||
DownloaderMiddlewareReturn::Response(response) | ||
} | ||
} | ||
|
||
#[async_trait] | ||
pub trait ItemMiddleware { | ||
async fn process_item(&self, item: Value, spider: &dyn Spider); | ||
async fn open_spider(&self, _spider: &dyn Spider) {} | ||
async fn close_spider(&self, _spider: &dyn Spider) {} | ||
} | ||
|
||
#[async_trait] | ||
pub trait ItemPipeline { | ||
async fn open_spider(&self, spider: &dyn Spider); | ||
async fn process_item(&self, item: Value, spider: &dyn Spider); | ||
async fn close_spider(&self, spider: &dyn Spider); | ||
} |