Skip to content

Commit b08ef8c

Browse files
authored
Merge branch 'main' into main
2 parents 303c981 + 80666ca commit b08ef8c

File tree

5 files changed

+91
-46
lines changed

5 files changed

+91
-46
lines changed

Cargo.lock

Lines changed: 11 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ default-run = "kirby"
1010
argparse = "0.2.2"
1111
aws_lambda_events = "0.16.0"
1212
enum-map = { version = "0.4.1", features = ["serde"] }
13-
flate2 = { version = "1.0", features = ["zlib"], default-features = false }
13+
flate2 = { version = "1.0", features = ["zlib-rs"], default-features = false }
1414
lambda_runtime = { version = "0.13.0", features = ["tracing"] }
1515
lazy_static = "1.1.0"
1616
log = "0.4.5"

README.md

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,29 @@ This is... very good. For comparison, a Python script that used AWS Glue to do s
1414

1515
Then Rust got more optimized and Apple released the M1, and it got still faster. Finally, and I found the [profile-guided optimization](https://doc.rust-lang.org/rustc/profile-guided-optimization.html) docs, and it improved even more than I thought was still possible.
1616

17+
Most recently, it also turned out there was [a highly contended mutex around the regular expressions](https://github.com/rubytogether/kirby/pull/37) and that bought the multi-core version something like 40-60% more speed.
18+
1719
### Wait, _how_ fast?
1820

19-
~525 records/second/cpu in Python on Apache Spark in AWS Glue
20-
~14,000 records/second/cpu in Ruby on a 2018 Intel MacBook Pro
21-
~353,000 records/second/cpu in Rust on a 2018 Intel MacBook Pro
22-
~550,000 records/second/cpu in Rust on a 2021 M1 MacBook Pro
23-
~638,000 records/second/cpu in Rust on M1 with profile-guided optimization
21+
~525 records/second/cpu in Python on Apache Spark in AWS Glue
22+
~14,000 records/second/cpu in Ruby on a 2018 Intel MacBook Pro
23+
~353,000 records/second/cpu in Rust on a 2018 Intel MacBook Pro
24+
~550,000 records/second/cpu in Rust on a 2021 M1 MacBook Pro
25+
~638,000 records/second/cpu in Rust on a 2021 M1 with PGO
26+
~935,500 records/second/cpu in Rust on a 2025 M4 Max MacBook Pro
27+
~983,500 records/second/cpu in Rust on a 2025 M4 Max with PGO
28+
~1,240,000 records/second/cpu in Rust on a 2024 Ryzen 9 9950X with PGO
2429

2530
### Are you kidding me?
2631

27-
No. The latest version (which I am now benchmarking without also running `cargo build` 🤦🏻‍♂️) can parse records really, really fast.
32+
No. The latest version can parse records really, really fast.
2833

29-
~4,200 records/second in Python with 8 worker instances on AWS Glue
30-
~1,085,000 records/second in Rust with rayon on an 8-core Intel MacBook Pro
31-
~3,195,000 records/second in Rust with rayon on a 10-core M1 MacBook Pro
32-
~3,583,000 records/second in Rust with rayon on M1 with profile-guided optimization
34+
~4,200 records/second in Python with 8 worker instances on AWS Glue
35+
~1,085,000 records/second in Rust with rayon on an 8-core Intel MacBook Pro
36+
~3,195,000 records/second in Rust with rayon on a 10-core M1 MacBook Pro
37+
~3,583,000 records/second in Rust with rayon on M1 with PGO
38+
~10,789,000 records/second in Rust with rayon on a 16-core M4 Max with PGO
39+
~22,559,000 records/second in Rust with rayon on a 32-core Ryzen 9 9950X with PGO
3340

3441
### What does it calculate?
3542

bin/bench

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ if [[ -n "${PGO:-}" ]]; then
2222

2323
# STEP 1: Build the instrumented binaries
2424
RUSTFLAGS="-Cprofile-generate=/tmp/pgo-data -Cllvm-args=-vp-counters-per-site=4" \
25-
cargo build --release
25+
cargo build --release --bin kirby
2626

2727
# STEP 2: Run the instrumented binaries with some typical data
28-
ls logs/* | xargs -L1 target/release/kirby
28+
ls logs/*.gz | xargs -I{} target/release/kirby "{}"
2929

3030
# STEP 3: Merge the `.profraw` files into a `.profdata` file
3131
$LLVM_PROFDATA merge -o /tmp/pgo-data/merged.profdata /tmp/pgo-data

src/lib.rs

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ pub fn print_unknown_user_agents(path: &str, opts: &Options) {
132132
});
133133
}
134134

135-
pub fn count_line(ctx: &user_agent::ParseCtx, times: &mut TimeMap, line: String) {
135+
pub fn count_line(ctx: &user_agent::ParseCtx, times: &mut TimeMap, line: &str) {
136136
let r: request::Request = serde_json::from_str(&line).unwrap();
137137

138138
if duplicate_request(&r) {
@@ -157,13 +157,26 @@ pub fn count_line(ctx: &user_agent::ParseCtx, times: &mut TimeMap, line: String)
157157
}
158158
}
159159

160-
pub fn stream_stats(stream: Box<dyn BufRead>, opts: &Options) -> TimeMap {
160+
pub fn stream_stats<'a>(mut stream: Box<dyn BufRead + 'a>, opts: &Options) -> TimeMap {
161161
let mut times = TimeMap::default();
162162
let mut lineno = 0;
163163

164164
let ctx = user_agent::ParseCtx::new();
165+
let mut line = String::with_capacity(1024*1024);
166+
167+
loop {
168+
line.clear();
169+
match stream.read_line(&mut line) {
170+
Ok(0) => break,
171+
Ok(_) => {}
172+
Err(e) => {
173+
if opts.verbose {
174+
eprintln!("Failed to read line:\n {}", e);
175+
}
176+
continue;
177+
}
178+
}
165179

166-
stream.lines().for_each(|line| {
167180
if opts.verbose {
168181
lineno += 1;
169182
if lineno % 100_000 == 0 {
@@ -172,17 +185,8 @@ pub fn stream_stats(stream: Box<dyn BufRead>, opts: &Options) -> TimeMap {
172185
}
173186
}
174187

175-
match line {
176-
Ok(l) => {
177-
count_line(&ctx, &mut times, l);
178-
}
179-
Err(e) => {
180-
if opts.verbose {
181-
eprintln!("Failed to read line:\n {}", e);
182-
}
183-
}
184-
}
185-
});
188+
count_line(&ctx, &mut times, line.as_str());
189+
}
186190

187191
if opts.verbose {
188192
println!();
@@ -196,3 +200,45 @@ pub fn file_stats(path: &str, opts: &Options) -> TimeMap {
196200
let file_stream = file::reader(path, opts);
197201
stream_stats(file_stream, opts)
198202
}
203+
204+
205+
#[cfg(test)]
206+
mod tests {
207+
extern crate test;
208+
209+
use super::*;
210+
use std::fs::File;
211+
use std::io::BufReader;
212+
use test::Bencher;
213+
214+
#[test]
215+
fn test_stream_stats() {
216+
let file = File::open("test/sample_500.log").unwrap();
217+
let reader = BufReader::new(file);
218+
let opts = Options {
219+
verbose: false,
220+
unknown: false,
221+
paths: vec![],
222+
};
223+
let times = stream_stats(Box::new(reader), &opts);
224+
assert_eq!(times.len(), 45);
225+
}
226+
227+
#[bench]
228+
fn bench_stream_stats_sample_500(b: &mut Bencher) {
229+
let mut logs = Vec::new();
230+
File::open("test/sample_500.log")
231+
.unwrap()
232+
.read_to_end(&mut logs)
233+
.unwrap();
234+
let opts = Options {
235+
verbose: false,
236+
unknown: false,
237+
paths: vec![],
238+
};
239+
b.iter(|| {
240+
let reader = Box::new(BufReader::new(logs.as_slice()));
241+
stream_stats(reader, &opts);
242+
});
243+
}
244+
}

0 commit comments

Comments
 (0)