11//! FragGeneScanRs executable
22#![ allow( non_snake_case) ]
33
4+ use std:: collections:: VecDeque ;
45use std:: fs:: File ;
56use std:: io;
67use std:: io:: { Read , Write } ;
@@ -101,6 +102,10 @@ fn main() -> Result<()> {
101102 . value_name ( "nucleotide_file" )
102103 . takes_value ( true )
103104 . help ( "Output predicted genes to this file (supersedes -o)." ) )
105+ . arg ( Arg :: with_name ( "unordered" )
106+ . short ( "u" )
107+ . long ( "unordered" )
108+ . help ( "Do not preserve record order in output (faster)." ) )
104109 . get_matches ( ) ;
105110
106111 let ( global, locals) = hmm:: get_train_from_file (
@@ -123,52 +128,66 @@ fn main() -> Result<()> {
123128 ( None , None ) => None ,
124129 } ;
125130
126- let metastream: Option < File > = match (
131+ let metastream: Option < Box < dyn Write + Send > > = match (
127132 matches. value_of ( "meta-file" ) ,
128133 matches. value_of ( "output-prefix" ) ,
129134 ) {
130- ( Some ( filename) , _) => Some ( File :: create ( filename) ?) ,
135+ ( Some ( filename) , _) => Some ( Box :: new ( File :: create ( filename) ?) ) ,
131136 ( None , Some ( "stdout" ) ) => None ,
132- ( None , Some ( filename) ) => Some ( File :: create ( filename. to_owned ( ) + ".out" ) ?) ,
137+ ( None , Some ( filename) ) => Some ( Box :: new ( File :: create ( filename. to_owned ( ) + ".out" ) ?) ) ,
133138 ( None , None ) => None ,
134139 } ;
135140
136- let dnastream: Option < File > = match (
141+ let dnastream: Option < Box < dyn Write + Send > > = match (
137142 matches. value_of ( "nucleotide-file" ) ,
138143 matches. value_of ( "output-prefix" ) ,
139144 ) {
140- ( Some ( filename) , _) => Some ( File :: create ( filename) ?) ,
145+ ( Some ( filename) , _) => Some ( Box :: new ( File :: create ( filename) ?) ) ,
141146 ( None , Some ( "stdout" ) ) => None ,
142- ( None , Some ( filename) ) => Some ( File :: create ( filename. to_owned ( ) + ".ffn" ) ?) ,
147+ ( None , Some ( filename) ) => Some ( Box :: new ( File :: create ( filename. to_owned ( ) + ".ffn" ) ?) ) ,
143148 ( None , None ) => None ,
144149 } ;
145150
146151 if aastream. is_none ( ) && metastream. is_none ( ) && dnastream. is_none ( ) {
147152 aastream = Some ( Box :: new ( io:: stdout ( ) ) ) ;
148153 }
149154
150- run (
151- global,
152- locals,
153- inputseqs,
154- aastream,
155- metastream,
156- dnastream,
157- matches. value_of ( "complete" ) . unwrap ( ) == "1" ,
158- matches. is_present ( "formatted" ) ,
159- usize:: from_str_radix ( matches. value_of ( "thread-num" ) . unwrap ( ) , 10 ) ?,
160- ) ?;
155+ if matches. is_present ( "unordered" ) {
156+ run (
157+ global,
158+ locals,
159+ inputseqs,
160+ aastream. map ( UnbufferingBuffer :: new) ,
161+ metastream. map ( UnbufferingBuffer :: new) ,
162+ dnastream. map ( UnbufferingBuffer :: new) ,
163+ matches. value_of ( "complete" ) . unwrap ( ) == "1" ,
164+ matches. is_present ( "formatted" ) ,
165+ usize:: from_str_radix ( matches. value_of ( "thread-num" ) . unwrap ( ) , 10 ) ?,
166+ ) ?;
167+ } else {
168+ run (
169+ global,
170+ locals,
171+ inputseqs,
172+ aastream. map ( SortingBuffer :: new) ,
173+ metastream. map ( SortingBuffer :: new) ,
174+ dnastream. map ( SortingBuffer :: new) ,
175+ matches. value_of ( "complete" ) . unwrap ( ) == "1" ,
176+ matches. is_present ( "formatted" ) ,
177+ usize:: from_str_radix ( matches. value_of ( "thread-num" ) . unwrap ( ) , 10 ) ?,
178+ ) ?;
179+ }
161180
162181 Ok ( ( ) )
163182}
164183
165- fn run < R : Read + Send , W : Write + Send > (
184+ fn run < R : Read + Send , W : WritingBuffer + Send > (
166185 global : Box < hmm:: Global > ,
167186 locals : Vec < hmm:: Local > ,
168187 inputseqs : R ,
169- aastream : Option < W > ,
170- metastream : Option < File > ,
171- dnastream : Option < File > ,
188+ aa_buffer : Option < W > ,
189+ meta_buffer : Option < W > ,
190+ dna_buffer : Option < W > ,
172191 whole_genome : bool ,
173192 formatted : bool ,
174193 thread_num : usize ,
@@ -177,12 +196,14 @@ fn run<R: Read + Send, W: Write + Send>(
177196 . num_threads ( thread_num)
178197 . build_global ( ) ?;
179198
180- let aastream = aastream. map ( Mutex :: new) ;
181- let metastream = metastream. map ( Mutex :: new) ;
182- let dnastream = dnastream. map ( Mutex :: new) ;
199+ let meta_buffer = meta_buffer. map ( Mutex :: new) ;
200+ let dna_buffer = dna_buffer. map ( Mutex :: new) ;
201+ let aa_buffer = aa_buffer. map ( Mutex :: new) ;
202+
183203 Chunked :: new ( 100 , fasta:: Reader :: new ( inputseqs) . into_records ( ) )
204+ . enumerate ( )
184205 . par_bridge ( )
185- . map ( |recordvec| {
206+ . map ( |( index , recordvec) | {
186207 let mut metabuf = Vec :: new ( ) ;
187208 let mut dnabuf = Vec :: new ( ) ;
188209 let mut aabuf = Vec :: new ( ) ;
@@ -197,24 +218,24 @@ fn run<R: Read + Send, W: Write + Send>(
197218 nseq,
198219 whole_genome,
199220 ) ;
200- if metastream . is_some ( ) {
221+ if meta_buffer . is_some ( ) {
201222 read_prediction. meta ( & mut metabuf) ?;
202223 }
203- if dnastream . is_some ( ) {
224+ if dna_buffer . is_some ( ) {
204225 read_prediction. dna ( & mut dnabuf, formatted) ?;
205226 }
206- if aastream . is_some ( ) {
227+ if aa_buffer . is_some ( ) {
207228 read_prediction. protein ( & mut aabuf, whole_genome) ?;
208229 }
209230 }
210- if let Some ( metastream ) = & metastream {
211- metastream . lock ( ) . unwrap ( ) . write_all ( & metabuf) ?;
231+ if let Some ( buffer ) = & meta_buffer {
232+ buffer . lock ( ) . unwrap ( ) . add ( index , metabuf) ?;
212233 }
213- if let Some ( dnastream ) = & dnastream {
214- dnastream . lock ( ) . unwrap ( ) . write_all ( & dnabuf) ?;
234+ if let Some ( buffer ) = & dna_buffer {
235+ buffer . lock ( ) . unwrap ( ) . add ( index , dnabuf) ?;
215236 }
216- if let Some ( aastream ) = & aastream {
217- aastream . lock ( ) . unwrap ( ) . write_all ( & aabuf) ?;
237+ if let Some ( buffer ) = & aa_buffer {
238+ buffer . lock ( ) . unwrap ( ) . add ( index , aabuf) ?;
218239 }
219240 Ok ( ( ) )
220241 } )
@@ -251,3 +272,56 @@ impl<I: Iterator> Iterator for Chunked<I> {
251272 }
252273 }
253274}
275+
276+ trait WritingBuffer {
277+ fn add ( & mut self , index : usize , item : Vec < u8 > ) -> Result < ( ) > ;
278+ }
279+
280+ struct SortingBuffer < W : Write + Send > {
281+ next : usize ,
282+ queue : VecDeque < Option < Vec < u8 > > > ,
283+ stream : W ,
284+ }
285+
286+ impl < W : Write + Send > SortingBuffer < W > {
287+ fn new ( stream : W ) -> Self {
288+ SortingBuffer {
289+ next : 0 ,
290+ queue : VecDeque :: new ( ) ,
291+ stream : stream,
292+ }
293+ }
294+ }
295+
296+ impl < W : Write + Send > WritingBuffer for SortingBuffer < W > {
297+ fn add ( & mut self , index : usize , item : Vec < u8 > ) -> Result < ( ) > {
298+ while self . next + self . queue . len ( ) <= index {
299+ self . queue . push_back ( None ) ;
300+ }
301+ self . queue [ index - self . next ] = Some ( item) ;
302+
303+ while self . queue . front ( ) . map ( Option :: is_some) . unwrap_or ( false ) {
304+ let item = self . queue . pop_front ( ) . unwrap ( ) . unwrap ( ) ;
305+ self . next += 1 ;
306+ self . stream . write_all ( & item) ?;
307+ }
308+ Ok ( ( ) )
309+ }
310+ }
311+
312+ struct UnbufferingBuffer < W : Write + Send > {
313+ stream : W ,
314+ }
315+
316+ impl < W : Write + Send > UnbufferingBuffer < W > {
317+ fn new ( stream : W ) -> Self {
318+ UnbufferingBuffer { stream }
319+ }
320+ }
321+
322+ impl < W : Write + Send > WritingBuffer for UnbufferingBuffer < W > {
323+ fn add ( & mut self , _: usize , item : Vec < u8 > ) -> Result < ( ) > {
324+ self . stream . write_all ( & item) ?;
325+ Ok ( ( ) )
326+ }
327+ }
0 commit comments