Skip to content

Commit 7be93a4

Browse files
committed
CLI option to take input from stdin
Signed-off-by: qjerome <[email protected]>
1 parent e7a1bf7 commit 7be93a4

File tree

1 file changed

+40
-22
lines changed

1 file changed

+40
-22
lines changed

src/bin/poppy.rs

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
use std::{cmp::max, fmt::Display, fs::File, io::BufRead, mem::size_of, sync::Arc, thread};
1+
use std::{
2+
cmp::max,
3+
fmt::Display,
4+
fs::File,
5+
io::{self, BufRead},
6+
mem::size_of,
7+
sync::Arc,
8+
thread,
9+
};
210

311
use anyhow::anyhow;
412
use clap::Parser;
@@ -81,6 +89,9 @@ struct Insert {
8189
/// Show progress information
8290
#[clap(short, long)]
8391
progress: bool,
92+
/// Reads input from stdin
93+
#[clap(long)]
94+
stdin: bool,
8495
/// The number of jobs to use to insert into the bloom filter. The original
8596
/// filter is copied into the memory of each job so you can expect the memory
8697
/// of the whole process to be N times the size of the (uncompressed) bloom filter.
@@ -125,22 +136,30 @@ fn main() -> Result<(), anyhow::Error> {
125136
let bloom_file = File::open(&o.file)?;
126137
let bf = Arc::new(std::sync::Mutex::new(BloomFilter::from_reader(bloom_file)?));
127138

128-
let mut handles = vec![];
129-
let files = o.inputs.clone();
139+
// if we pipe in entries via stdin
140+
if o.stdin {
141+
let mut bf = bf.lock().unwrap();
142+
for line in std::io::BufReader::new(io::stdin()).lines() {
143+
bf.insert(line?)
144+
}
145+
}
130146

131-
let batches = files.chunks(max(files.len() / o.jobs, 1));
147+
// processing files if any
148+
if !o.inputs.is_empty() {
149+
let mut handles = vec![];
150+
let files = o.inputs.clone();
132151

133-
for batch in batches {
134-
let shared = Arc::clone(&bf);
135-
let batch: Vec<String> = batch.to_vec();
136-
let mut copy = shared
137-
.lock()
138-
.map_err(|e| anyhow!("failed to lock mutex: {}", e))?
139-
.clone();
152+
let batches = files.chunks(max(files.len() / o.jobs, 1));
140153

141-
let h = thread::spawn(move || {
142-
{
143-
//println!("Processing batch of {} files", batch.len());
154+
for batch in batches {
155+
let shared = Arc::clone(&bf);
156+
let batch: Vec<String> = batch.to_vec();
157+
let mut copy = shared
158+
.lock()
159+
.map_err(|e| anyhow!("failed to lock mutex: {}", e))?
160+
.clone();
161+
162+
let h = thread::spawn(move || {
144163
for input in batch {
145164
if o.progress {
146165
println!("processing file: {input}");
@@ -158,18 +177,17 @@ fn main() -> Result<(), anyhow::Error> {
158177
shared.union(&copy)?;
159178

160179
Ok::<(), anyhow::Error>(())
161-
}
162-
});
163-
handles.push(h)
164-
}
180+
});
181+
handles.push(h)
182+
}
165183

166-
for h in handles {
167-
h.join().expect("failed to join thread")?;
184+
for h in handles {
185+
h.join().expect("failed to join thread")?;
186+
}
168187
}
169188

170189
let mut output = File::create(o.file)?;
171-
let b = bf.lock().unwrap();
172-
b.write(&mut output)?;
190+
bf.lock().unwrap().write(&mut output)?;
173191
}
174192
Command::Check(o) => {
175193
let bloom_file = File::open(&o.file)?;

0 commit comments

Comments
 (0)