Skip to content

Commit 3a281cc

Browse files
author
Felix Van der Jeugt
committed
Merge branch 'in-order-output'
* in-order-output: allow choice between ordered and unordered output remove obsolete mutexes implement OutputBuffer for in-order output
2 parents 76e2bc3 + 3d7c414 commit 3a281cc

File tree

4 files changed

+133
-35
lines changed

4 files changed

+133
-35
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "frag_gene_scan_rs"
3-
version = "0.3.2"
3+
version = "0.4.0"
44
authors = ["Felix Van der Jeugt <[email protected]>"]
55
edition = "2018"
66

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ where:
8686
* `-r train_file_dir` can change the directory containing the training
8787
files, so you can put it anywhere on your system.
8888

89+
* `-u` can be used for some additional speed when using multithreading. The
90+
output will no longer be in the same order as the input (as in FGS and
91+
FGS+).
92+
8993
The complete list of options will be printed when running
9094
`FragGeneScanRs --help`.
9195

src/bin/FragGeneScanRs.rs

Lines changed: 108 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! FragGeneScanRs executable
22
#![allow(non_snake_case)]
33

4+
use std::collections::VecDeque;
45
use std::fs::File;
56
use std::io;
67
use std::io::{Read, Write};
@@ -101,6 +102,10 @@ fn main() -> Result<()> {
101102
.value_name("nucleotide_file")
102103
.takes_value(true)
103104
.help("Output predicted genes to this file (supersedes -o)."))
105+
.arg(Arg::with_name("unordered")
106+
.short("u")
107+
.long("unordered")
108+
.help("Do not preserve record order in output (faster)."))
104109
.get_matches();
105110

106111
let (global, locals) = hmm::get_train_from_file(
@@ -123,52 +128,66 @@ fn main() -> Result<()> {
123128
(None, None) => None,
124129
};
125130

126-
let metastream: Option<File> = match (
131+
let metastream: Option<Box<dyn Write + Send>> = match (
127132
matches.value_of("meta-file"),
128133
matches.value_of("output-prefix"),
129134
) {
130-
(Some(filename), _) => Some(File::create(filename)?),
135+
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
131136
(None, Some("stdout")) => None,
132-
(None, Some(filename)) => Some(File::create(filename.to_owned() + ".out")?),
137+
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".out")?)),
133138
(None, None) => None,
134139
};
135140

136-
let dnastream: Option<File> = match (
141+
let dnastream: Option<Box<dyn Write + Send>> = match (
137142
matches.value_of("nucleotide-file"),
138143
matches.value_of("output-prefix"),
139144
) {
140-
(Some(filename), _) => Some(File::create(filename)?),
145+
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
141146
(None, Some("stdout")) => None,
142-
(None, Some(filename)) => Some(File::create(filename.to_owned() + ".ffn")?),
147+
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".ffn")?)),
143148
(None, None) => None,
144149
};
145150

146151
if aastream.is_none() && metastream.is_none() && dnastream.is_none() {
147152
aastream = Some(Box::new(io::stdout()));
148153
}
149154

150-
run(
151-
global,
152-
locals,
153-
inputseqs,
154-
aastream,
155-
metastream,
156-
dnastream,
157-
matches.value_of("complete").unwrap() == "1",
158-
matches.is_present("formatted"),
159-
usize::from_str_radix(matches.value_of("thread-num").unwrap(), 10)?,
160-
)?;
155+
if matches.is_present("unordered") {
156+
run(
157+
global,
158+
locals,
159+
inputseqs,
160+
aastream.map(UnbufferingBuffer::new),
161+
metastream.map(UnbufferingBuffer::new),
162+
dnastream.map(UnbufferingBuffer::new),
163+
matches.value_of("complete").unwrap() == "1",
164+
matches.is_present("formatted"),
165+
usize::from_str_radix(matches.value_of("thread-num").unwrap(), 10)?,
166+
)?;
167+
} else {
168+
run(
169+
global,
170+
locals,
171+
inputseqs,
172+
aastream.map(SortingBuffer::new),
173+
metastream.map(SortingBuffer::new),
174+
dnastream.map(SortingBuffer::new),
175+
matches.value_of("complete").unwrap() == "1",
176+
matches.is_present("formatted"),
177+
usize::from_str_radix(matches.value_of("thread-num").unwrap(), 10)?,
178+
)?;
179+
}
161180

162181
Ok(())
163182
}
164183

165-
fn run<R: Read + Send, W: Write + Send>(
184+
fn run<R: Read + Send, W: WritingBuffer + Send>(
166185
global: Box<hmm::Global>,
167186
locals: Vec<hmm::Local>,
168187
inputseqs: R,
169-
aastream: Option<W>,
170-
metastream: Option<File>,
171-
dnastream: Option<File>,
188+
aa_buffer: Option<W>,
189+
meta_buffer: Option<W>,
190+
dna_buffer: Option<W>,
172191
whole_genome: bool,
173192
formatted: bool,
174193
thread_num: usize,
@@ -177,12 +196,14 @@ fn run<R: Read + Send, W: Write + Send>(
177196
.num_threads(thread_num)
178197
.build_global()?;
179198

180-
let aastream = aastream.map(Mutex::new);
181-
let metastream = metastream.map(Mutex::new);
182-
let dnastream = dnastream.map(Mutex::new);
199+
let meta_buffer = meta_buffer.map(Mutex::new);
200+
let dna_buffer = dna_buffer.map(Mutex::new);
201+
let aa_buffer = aa_buffer.map(Mutex::new);
202+
183203
Chunked::new(100, fasta::Reader::new(inputseqs).into_records())
204+
.enumerate()
184205
.par_bridge()
185-
.map(|recordvec| {
206+
.map(|(index, recordvec)| {
186207
let mut metabuf = Vec::new();
187208
let mut dnabuf = Vec::new();
188209
let mut aabuf = Vec::new();
@@ -197,24 +218,24 @@ fn run<R: Read + Send, W: Write + Send>(
197218
nseq,
198219
whole_genome,
199220
);
200-
if metastream.is_some() {
221+
if meta_buffer.is_some() {
201222
read_prediction.meta(&mut metabuf)?;
202223
}
203-
if dnastream.is_some() {
224+
if dna_buffer.is_some() {
204225
read_prediction.dna(&mut dnabuf, formatted)?;
205226
}
206-
if aastream.is_some() {
227+
if aa_buffer.is_some() {
207228
read_prediction.protein(&mut aabuf, whole_genome)?;
208229
}
209230
}
210-
if let Some(metastream) = &metastream {
211-
metastream.lock().unwrap().write_all(&metabuf)?;
231+
if let Some(buffer) = &meta_buffer {
232+
buffer.lock().unwrap().add(index, metabuf)?;
212233
}
213-
if let Some(dnastream) = &dnastream {
214-
dnastream.lock().unwrap().write_all(&dnabuf)?;
234+
if let Some(buffer) = &dna_buffer {
235+
buffer.lock().unwrap().add(index, dnabuf)?;
215236
}
216-
if let Some(aastream) = &aastream {
217-
aastream.lock().unwrap().write_all(&aabuf)?;
237+
if let Some(buffer) = &aa_buffer {
238+
buffer.lock().unwrap().add(index, aabuf)?;
218239
}
219240
Ok(())
220241
})
@@ -251,3 +272,56 @@ impl<I: Iterator> Iterator for Chunked<I> {
251272
}
252273
}
253274
}
275+
276+
trait WritingBuffer {
277+
fn add(&mut self, index: usize, item: Vec<u8>) -> Result<()>;
278+
}
279+
280+
struct SortingBuffer<W: Write + Send> {
281+
next: usize,
282+
queue: VecDeque<Option<Vec<u8>>>,
283+
stream: W,
284+
}
285+
286+
impl<W: Write + Send> SortingBuffer<W> {
287+
fn new(stream: W) -> Self {
288+
SortingBuffer {
289+
next: 0,
290+
queue: VecDeque::new(),
291+
stream: stream,
292+
}
293+
}
294+
}
295+
296+
impl<W: Write + Send> WritingBuffer for SortingBuffer<W> {
297+
fn add(&mut self, index: usize, item: Vec<u8>) -> Result<()> {
298+
while self.next + self.queue.len() <= index {
299+
self.queue.push_back(None);
300+
}
301+
self.queue[index - self.next] = Some(item);
302+
303+
while self.queue.front().map(Option::is_some).unwrap_or(false) {
304+
let item = self.queue.pop_front().unwrap().unwrap();
305+
self.next += 1;
306+
self.stream.write_all(&item)?;
307+
}
308+
Ok(())
309+
}
310+
}
311+
312+
struct UnbufferingBuffer<W: Write + Send> {
313+
stream: W,
314+
}
315+
316+
impl<W: Write + Send> UnbufferingBuffer<W> {
317+
fn new(stream: W) -> Self {
318+
UnbufferingBuffer { stream }
319+
}
320+
}
321+
322+
impl<W: Write + Send> WritingBuffer for UnbufferingBuffer<W> {
323+
fn add(&mut self, _: usize, item: Vec<u8>) -> Result<()> {
324+
self.stream.write_all(&item)?;
325+
Ok(())
326+
}
327+
}

src/gene.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,26 @@ impl ReadPrediction {
1717
}
1818
}
1919

20+
pub fn append_to(
21+
&self,
22+
aabuf: &mut Option<Vec<u8>>,
23+
metabuf: &mut Option<Vec<u8>>,
24+
dnabuf: &mut Option<Vec<u8>>,
25+
formatted: bool,
26+
whole_genome: bool,
27+
) -> Result<(), GeneError> {
28+
if let Some(metabuf) = metabuf {
29+
self.meta(&mut *metabuf)?;
30+
}
31+
if let Some(dnabuf) = dnabuf {
32+
self.dna(&mut *dnabuf, formatted)?;
33+
}
34+
if let Some(aabuf) = aabuf {
35+
self.protein(&mut *aabuf, whole_genome)?;
36+
}
37+
Ok(())
38+
}
39+
2040
pub fn meta(&self, buf: &mut Vec<u8>) -> Result<(), GeneError> {
2141
if !self.genes.is_empty() {
2242
buf.append(&mut format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())

0 commit comments

Comments
 (0)