1
- use std:: { cmp:: max, fmt:: Display , fs:: File , io:: BufRead , mem:: size_of, sync:: Arc , thread} ;
1
+ use std:: {
2
+ cmp:: max,
3
+ fmt:: Display ,
4
+ fs:: File ,
5
+ io:: { self , BufRead } ,
6
+ mem:: size_of,
7
+ sync:: Arc ,
8
+ thread,
9
+ } ;
2
10
3
11
use anyhow:: anyhow;
4
12
use clap:: Parser ;
@@ -81,6 +89,9 @@ struct Insert {
81
89
/// Show progress information
82
90
#[ clap( short, long) ]
83
91
progress : bool ,
92
+ /// Reads input from stdin
93
+ #[ clap( long) ]
94
+ stdin : bool ,
84
95
/// The number of jobs to use to insert into the bloom filter. The original
85
96
/// filter is copied into the memory of each job so you can expect the memory
86
97
/// of the whole process to be N times the size of the (uncompressed) bloom filter.
@@ -125,22 +136,30 @@ fn main() -> Result<(), anyhow::Error> {
125
136
let bloom_file = File :: open ( & o. file ) ?;
126
137
let bf = Arc :: new ( std:: sync:: Mutex :: new ( BloomFilter :: from_reader ( bloom_file) ?) ) ;
127
138
128
- let mut handles = vec ! [ ] ;
129
- let files = o. inputs . clone ( ) ;
139
+ // if we pipe in entries via stdin
140
+ if o. stdin {
141
+ let mut bf = bf. lock ( ) . unwrap ( ) ;
142
+ for line in std:: io:: BufReader :: new ( io:: stdin ( ) ) . lines ( ) {
143
+ bf. insert ( line?)
144
+ }
145
+ }
130
146
131
- let batches = files. chunks ( max ( files. len ( ) / o. jobs , 1 ) ) ;
147
+ // processing files if any
148
+ if !o. inputs . is_empty ( ) {
149
+ let mut handles = vec ! [ ] ;
150
+ let files = o. inputs . clone ( ) ;
132
151
133
- for batch in batches {
134
- let shared = Arc :: clone ( & bf) ;
135
- let batch: Vec < String > = batch. to_vec ( ) ;
136
- let mut copy = shared
137
- . lock ( )
138
- . map_err ( |e| anyhow ! ( "failed to lock mutex: {}" , e) ) ?
139
- . clone ( ) ;
152
+ let batches = files. chunks ( max ( files. len ( ) / o. jobs , 1 ) ) ;
140
153
141
- let h = thread:: spawn ( move || {
142
- {
143
- //println!("Processing batch of {} files", batch.len());
154
+ for batch in batches {
155
+ let shared = Arc :: clone ( & bf) ;
156
+ let batch: Vec < String > = batch. to_vec ( ) ;
157
+ let mut copy = shared
158
+ . lock ( )
159
+ . map_err ( |e| anyhow ! ( "failed to lock mutex: {}" , e) ) ?
160
+ . clone ( ) ;
161
+
162
+ let h = thread:: spawn ( move || {
144
163
for input in batch {
145
164
if o. progress {
146
165
println ! ( "processing file: {input}" ) ;
@@ -158,18 +177,17 @@ fn main() -> Result<(), anyhow::Error> {
158
177
shared. union ( & copy) ?;
159
178
160
179
Ok :: < ( ) , anyhow:: Error > ( ( ) )
161
- }
162
- } ) ;
163
- handles. push ( h)
164
- }
180
+ } ) ;
181
+ handles. push ( h)
182
+ }
165
183
166
- for h in handles {
167
- h. join ( ) . expect ( "failed to join thread" ) ?;
184
+ for h in handles {
185
+ h. join ( ) . expect ( "failed to join thread" ) ?;
186
+ }
168
187
}
169
188
170
189
let mut output = File :: create ( o. file ) ?;
171
- let b = bf. lock ( ) . unwrap ( ) ;
172
- b. write ( & mut output) ?;
190
+ bf. lock ( ) . unwrap ( ) . write ( & mut output) ?;
173
191
}
174
192
Command :: Check ( o) => {
175
193
let bloom_file = File :: open ( & o. file ) ?;
0 commit comments