Skip to content

Commit bce0020

Browse files
authored
Merge pull request #58 from oiwn/dev
Dev
2 parents fe54a1a + c1932e7 commit bce0020

File tree

8 files changed

+124
-36
lines changed

8 files changed

+124
-36
lines changed

.amc.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
delimiter = "---"
2-
extensions = [".rs", ".js", ".py", ".toml"] # Add any file extensions you want to process
2+
extensions = [".rs", ".toml"]
33
llm_prompt = """
44
This is a collection of source code files from a project. Each file is separated by '---' delimiters.
55
The files include Git metadata showing their last modification details.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ tokio = { version = "1.42", features = ["full"] }
3434
# Utils
3535
async-trait = { version = "0.1" }
3636
uuid = { version = "1.11", features = ["v4", "serde"] }
37-
rand = { version = "0.8" }
37+
rand = { version = "0.8" }

capp/Cargo.toml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ http-body-util = "0.1"
5656
bytes = "1.6"
5757
pin-project-lite = "0.2"
5858
dotenvy = "0.15"
59-
scraper = "0.21"
59+
scraper = "0.22"
6060
rand = "0.8"
6161
md5 = "0.7"
6262
url = "2.5"
@@ -70,3 +70,13 @@ healthcheck = ["dep:reqwest"]
7070
redis = ["capp-queue/redis", "dep:rustis"]
7171
mongodb = ["capp-queue/mongodb", "dep:mongodb"]
7272
postgres = ["capp-queue/postgres", "dep:sqlx"]
73+
74+
[[example]]
75+
name = "basic"
76+
path = "../examples/basic.rs"
77+
78+
# [[example]]
79+
# name = "hackernews"
80+
# path = "../examples/hackernews/main.rs"
81+
82+

capp/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
//! - `task`: Definitions and utilities for working with tasks.
4545
pub mod manager;
4646
pub mod prelude;
47+
#[cfg(feature = "http")]
48+
pub use config::http;
4749

4850
// re-export
4951
pub use async_trait;

capp/src/manager/worker.rs

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use tokio::sync::{
88
broadcast,
99
mpsc::{self, error::TryRecvError},
1010
};
11+
use tracing::{debug, error, info, info_span, warn};
1112

1213
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1314
pub struct WorkerId(usize);
@@ -82,11 +83,7 @@ where
8283
// Implement limiting amount of tasks per worker
8384
if let Some(limit) = self.options.task_limit {
8485
if self.stats.tasks_processed >= limit {
85-
tracing::info!(
86-
"[{}] task_limit reached: {}",
87-
self.worker_id,
88-
limit
89-
);
86+
warn!("[{}] task_limit reached: {}", self.worker_id, limit);
9087
return Ok(false);
9188
}
9289
};
@@ -110,11 +107,9 @@ where
110107
task.set_succeed();
111108
self.queue.set(&task).await.unwrap();
112109
self.queue.ack(&task.task_id).await.unwrap();
113-
tracing::info!(
110+
info!(
114111
"[{}] Task {} succeed: {:?}",
115-
self.worker_id,
116-
&task.task_id,
117-
&task.payload
112+
self.worker_id, &task.task_id, &task.payload
118113
);
119114

120115
// record stats on success
@@ -125,22 +120,16 @@ where
125120
task.set_retry(&err.to_string());
126121
if task.retries < self.options.max_retries {
127122
self.queue.push(&task).await.unwrap();
128-
tracing::error!(
123+
error!(
129124
"[{}] Task {} failed, retrying ({}): {:?}",
130-
self.worker_id,
131-
&task.task_id,
132-
&task.retries,
133-
&err
125+
self.worker_id, &task.task_id, &task.retries, &err
134126
);
135127
} else {
136128
task.set_dlq("Max retries");
137129
self.queue.nack(&task).await.unwrap();
138-
tracing::error!(
130+
error!(
139131
"[{}] Task {} failed, max reties ({}): {:?}",
140-
self.worker_id,
141-
&task.task_id,
142-
&task.retries,
143-
&err
132+
self.worker_id, &task.task_id, &task.retries, &err
144133
);
145134
}
146135

@@ -150,7 +139,7 @@ where
150139
}
151140
}
152141
Err(TaskQueueError::QueueEmpty) => {
153-
tracing::warn!("[{}] No tasks found, waiting...", self.worker_id);
142+
warn!("[{}] No tasks found, waiting...", self.worker_id);
154143
// wait for a while till try to fetch task
155144
tokio::time::sleep(self.options.no_task_found_delay).await;
156145
}
@@ -220,13 +209,13 @@ pub async fn worker_wrapper<Data, Comp, Ctx>(
220209
'worker: loop {
221210
tokio::select! {
222211
_ = terminate.recv() => {
223-
tracing::info!("Terminating immediately");
212+
info!("Terminating immediately");
224213
return;
225214
},
226215
run_result = worker.run(), if !should_stop => {
227216
match commands.try_recv() {
228217
Ok(WorkerCommand::Shutdown) => {
229-
tracing::error!("[{}] Shutdown received", worker_id);
218+
error!("[{}] Shutdown received", worker_id);
230219
should_stop = true;
231220
}
232221
Err(TryRecvError::Disconnected) => break 'worker,
@@ -245,15 +234,12 @@ pub async fn worker_wrapper<Data, Comp, Ctx>(
245234

246235
// If a stop command was received, finish any ongoing work and then exit.
247236
if should_stop {
248-
tracing::info!(
249-
"[{}] Completing current task before stopping.",
250-
worker_id
251-
);
237+
info!("[{}] Completing current task before stopping.", worker_id);
252238
break;
253239
}
254240
}
255241

256-
tracing::info!("completed");
242+
info!("completed");
257243
}
258244

259245
/// This wrapper used to create new Worker setup internal logging
@@ -287,7 +273,7 @@ pub async fn worker_wrapper_old<Data, Comp, Ctx>(
287273
let mut should_stop = false;
288274

289275
// setup spans
290-
let span = tracing::info_span!("worker", _id = %worker_id);
276+
let span = info_span!("worker", _id = %worker_id);
291277
let _enter = span.enter();
292278

293279
'worker: loop {

examples/basic.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ use capp::prelude::{
55
use capp::{
66
config::Configurable,
77
manager::{WorkersManager, WorkersManagerOptionsBuilder},
8-
queue::{AbstractTaskQueue, InMemoryTaskQueue, TaskQueue},
9-
task::Task,
8+
queue::{
9+
AbstractTaskQueue, InMemoryTaskQueue, JsonSerializer, Task, TaskQueue,
10+
},
1011
};
1112
use serde::{Deserialize, Serialize};
1213
use std::{path, sync::Arc};
@@ -76,7 +77,8 @@ impl Computation<TaskData, Context> for DivisionComputation {
7677
/// total tasks = 9
7778
/// number of failed tasks = 4
7879
async fn make_storage() -> impl TaskQueue<TaskData> + Send + Sync {
79-
let storage = InMemoryTaskQueue::new();
80+
let storage: InMemoryTaskQueue<TaskData, JsonSerializer> =
81+
InMemoryTaskQueue::new();
8082

8183
for i in 1..=5 {
8284
let task: Task<TaskData> = Task::new(TaskData {

notes.org

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#+title: Notes
22

33
* Major Tasks
4+
** Shared State (to run like a server with REST?)
5+
** tui interface
6+
** consider to use https://crates.io/crates/config instead of current configuration?
47
** figure if it can get rid of returning yaml_value in favour of some generic configuration type
58
** DONE fix RoundRobin queue
69
i think it's done
710
** Redis backend should return RedisError, now custom ones.
8-
** TODO mongodb queue
9-
** TODO postgres queue
11+
** DONE mongodb queue
12+
** DONE postgres queue
1013
** TODO benchmarks criterion
1114

1215
* Tasks

review.md

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Code Review: CAPP (Comprehensive Asynchronous Parallel Processing) Framework
2+
3+
## Overview
4+
CAPP is a Rust framework for building distributed task processing systems,
5+
with a particular focus on web crawlers. The codebase demonstrates strong Rust
6+
practices and a well-thought-out architecture.
7+
8+
## Architecture Analysis
9+
10+
### Core Components
11+
12+
1. **Task Queue System**
13+
- Multiple backend implementations (Redis, MongoDB, Postgres, In-Memory)
14+
- Generic task handling with serialization support
15+
- Dead Letter Queue (DLQ) for failed tasks
16+
- Round-robin task distribution capability
17+
18+
2. **Worker Management**
19+
- Concurrent worker execution with configurable limits
20+
- Graceful shutdown handling
21+
- Per-worker statistics tracking
22+
- Task retry mechanism with configurable policies
23+
24+
3. **Configuration System**
25+
- YAML-based configuration
26+
- Proxy support with round-robin and random selection
27+
- Environment variable integration
28+
- Flexible HTTP client configuration
29+
30+
### Design Patterns
31+
32+
1. **Builder Pattern**
33+
- Effectively used for WorkerOptions and WorkersManagerOptions
34+
- Clean configuration initialization
35+
- Clear default values
36+
37+
2. **Trait-based Abstraction**
38+
- `TaskQueue` trait for storage backends
39+
- `Computation` trait for task processing
40+
- `TaskSerializer` for data serialization
41+
42+
3. **Error Handling**
43+
- Custom error types with thiserror
44+
- Proper error propagation
45+
- Contextual error messages
46+
47+
## Strengths
48+
49+
1. **Modularity**
50+
- Clean separation between components
51+
- Feature flags for optional components
52+
- Well-defined interfaces
53+
54+
2. **Concurrency Control**
55+
- Proper use of tokio for async operations
56+
- Thread-safe shared state handling
57+
- Graceful shutdown mechanisms
58+
59+
3. **Testing**
60+
- Comprehensive test coverage
61+
- Integration tests for each backend
62+
- Mock implementations for testing
63+
64+
## Areas for Improvement
65+
66+
1. **Documentation**
67+
- While generally good, some public APIs lack detailed examples
68+
- More inline documentation for complex algorithms would be helpful
69+
- Consider adding architecture diagrams
70+
71+
2. **Error Handling Enhancements**
72+
```rust
73+
// Current:
74+
pub enum TaskQueueError {
75+
QueueError(String),
76+
SerdeError(String),
77+
// ...
78+
}
79+
80+
// Suggestion: Add more context
81+
pub enum TaskQueueError {
82+
QueueError { message: String, context: String },
83+
SerdeError { message: String, data_type: String },
84+
// ...
85+
}

0 commit comments

Comments
 (0)