Skip to content

Commit 76e2bc3

Browse files
author
Felix Van der Jeugt
committed
Merge branch 'input-blocks'
* input-blocks: write to buffers in chunks
2 parents 9d59aac + b032ce8 commit 76e2bc3

File tree

3 files changed

+81
-53
lines changed

3 files changed

+81
-53
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "frag_gene_scan_rs"
3-
version = "0.3.1"
3+
version = "0.3.2"
44
authors = ["Felix Van der Jeugt <[email protected]>"]
55
edition = "2018"
66

src/bin/FragGeneScanRs.rs

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -180,30 +180,74 @@ fn run<R: Read + Send, W: Write + Send>(
180180
let aastream = aastream.map(Mutex::new);
181181
let metastream = metastream.map(Mutex::new);
182182
let dnastream = dnastream.map(Mutex::new);
183-
fasta::Reader::new(inputseqs)
184-
.into_records()
183+
Chunked::new(100, fasta::Reader::new(inputseqs).into_records())
185184
.par_bridge()
186-
.map(|record| {
187-
let fasta::OwnedRecord { mut head, seq } = record?;
188-
head = head.into_iter().take_while(u8::is_ascii_graphic).collect();
189-
let nseq: Vec<Nuc> = seq.into_iter().map(Nuc::from).collect();
190-
let read_prediction = viterbi(
191-
&global,
192-
&locals[count_cg_content(&nseq)],
193-
head,
194-
nseq,
195-
whole_genome,
196-
);
185+
.map(|recordvec| {
186+
let mut metabuf = Vec::new();
187+
let mut dnabuf = Vec::new();
188+
let mut aabuf = Vec::new();
189+
for record in recordvec {
190+
let fasta::OwnedRecord { mut head, seq } = record?;
191+
head = head.into_iter().take_while(u8::is_ascii_graphic).collect();
192+
let nseq: Vec<Nuc> = seq.into_iter().map(Nuc::from).collect();
193+
let read_prediction = viterbi(
194+
&global,
195+
&locals[count_cg_content(&nseq)],
196+
head,
197+
nseq,
198+
whole_genome,
199+
);
200+
if metastream.is_some() {
201+
read_prediction.meta(&mut metabuf)?;
202+
}
203+
if dnastream.is_some() {
204+
read_prediction.dna(&mut dnabuf, formatted)?;
205+
}
206+
if aastream.is_some() {
207+
read_prediction.protein(&mut aabuf, whole_genome)?;
208+
}
209+
}
197210
if let Some(metastream) = &metastream {
198-
read_prediction.print_meta(&mut *metastream.lock().unwrap())?; // TODO lock together content
211+
metastream.lock().unwrap().write_all(&metabuf)?;
199212
}
200213
if let Some(dnastream) = &dnastream {
201-
read_prediction.print_dna(&mut *dnastream.lock().unwrap(), formatted)?;
214+
dnastream.lock().unwrap().write_all(&dnabuf)?;
202215
}
203216
if let Some(aastream) = &aastream {
204-
read_prediction.print_protein(whole_genome, &mut *aastream.lock().unwrap())?;
217+
aastream.lock().unwrap().write_all(&aabuf)?;
205218
}
206219
Ok(())
207220
})
208221
.collect()
209222
}
223+
224+
struct Chunked<I: Iterator> {
225+
size: usize,
226+
iterator: I,
227+
}
228+
229+
impl<I: Iterator> Chunked<I> {
230+
fn new(size: usize, iterator: I) -> Self {
231+
Chunked { size, iterator }
232+
}
233+
}
234+
235+
impl<I: Iterator> Iterator for Chunked<I> {
236+
type Item = Vec<I::Item>;
237+
238+
fn next(&mut self) -> Option<Self::Item> {
239+
let mut items = Vec::with_capacity(self.size);
240+
for _ in 0..self.size {
241+
if let Some(item) = self.iterator.next() {
242+
items.push(item);
243+
} else {
244+
break;
245+
}
246+
}
247+
if items.is_empty() {
248+
None
249+
} else {
250+
Some(items)
251+
}
252+
}
253+
}

src/gene.rs

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
use std::fs::File;
2-
use std::io;
3-
use std::io::Write;
4-
51
extern crate thiserror;
62
use thiserror::Error;
73

@@ -21,30 +17,26 @@ impl ReadPrediction {
2117
}
2218
}
2319

24-
pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> {
20+
pub fn meta(&self, buf: &mut Vec<u8>) -> Result<(), GeneError> {
2521
if !self.genes.is_empty() {
26-
file.write_all(&format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())?;
22+
buf.append(&mut format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())
2723
}
2824
for gene in &self.genes {
29-
gene.print_meta(file)?;
25+
gene.meta(buf);
3026
}
3127
Ok(())
3228
}
3329

34-
pub fn print_dna(&self, file: &mut File, formatted: bool) -> Result<(), GeneError> {
30+
pub fn dna(&self, buf: &mut Vec<u8>, formatted: bool) -> Result<(), GeneError> {
3531
for gene in &self.genes {
36-
gene.print_dna(file, &self.head, formatted)?;
32+
gene.dna(buf, &self.head, formatted)?;
3733
}
3834
Ok(())
3935
}
4036

41-
pub fn print_protein<W: Write>(
42-
&self,
43-
whole_genome: bool,
44-
file: &mut W,
45-
) -> Result<(), GeneError> {
37+
pub fn protein(&self, buf: &mut Vec<u8>, whole_genome: bool) -> Result<(), GeneError> {
4638
for gene in &self.genes {
47-
gene.print_protein(file, &self.head, whole_genome)?;
39+
gene.protein(buf, &self.head, whole_genome)?;
4840
}
4941
Ok(())
5042
}
@@ -63,9 +55,9 @@ pub struct Gene {
6355
}
6456

6557
impl Gene {
66-
pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> {
67-
file.write_all(
68-
&format!(
58+
pub fn meta(&self, buf: &mut Vec<u8>) {
59+
buf.append(
60+
&mut format!(
6961
"{}\t{}\t{}\t{}\t{:.6}\tI:{}\tD:{}\n",
7062
self.metastart,
7163
self.end,
@@ -82,16 +74,10 @@ impl Gene {
8274
.collect::<String>()
8375
)
8476
.into_bytes(),
85-
)?;
86-
Ok(())
77+
);
8778
}
8879

89-
pub fn print_dna(
90-
&self,
91-
file: &mut File,
92-
head: &Vec<u8>,
93-
formatted: bool,
94-
) -> Result<(), GeneError> {
80+
pub fn dna(&self, buf: &mut Vec<u8>, head: &Vec<u8>, formatted: bool) -> Result<(), GeneError> {
9581
let dna: Vec<u8> = match (self.forward_strand, formatted) {
9682
(true, true) => self.dna.iter().map(|&n| u8::from(n)).collect(),
9783
(true, false) => self
@@ -110,8 +96,8 @@ impl Gene {
11096
.collect(),
11197
};
11298

113-
file.write_all(
114-
&format!(
99+
buf.append(
100+
&mut format!(
115101
">{}_{}_{}_{}\n{}\n",
116102
std::str::from_utf8(head)?,
117103
self.start,
@@ -120,14 +106,14 @@ impl Gene {
120106
std::str::from_utf8(&dna)?,
121107
)
122108
.into_bytes(),
123-
)?;
109+
);
124110

125111
Ok(())
126112
}
127113

128-
pub fn print_protein<W: Write>(
114+
pub fn protein(
129115
&self,
130-
file: &mut W,
116+
buf: &mut Vec<u8>,
131117
head: &Vec<u8>,
132118
whole_genome: bool,
133119
) -> Result<(), GeneError> {
@@ -167,8 +153,8 @@ impl Gene {
167153
}
168154
}
169155

170-
file.write_all(
171-
&format!(
156+
buf.append(
157+
&mut format!(
172158
">{}_{}_{}_{}\n{}\n",
173159
std::str::from_utf8(head)?,
174160
self.start,
@@ -177,15 +163,13 @@ impl Gene {
177163
std::str::from_utf8(&protein)?,
178164
)
179165
.into_bytes(),
180-
)?;
166+
);
181167
Ok(())
182168
}
183169
}
184170

185171
#[derive(Error, Debug)]
186172
pub enum GeneError {
187-
#[error("could not write to file")]
188-
IoError(#[from] io::Error),
189173
#[error("could not convert header back to UTF-8")]
190174
Utf8Error(#[from] std::str::Utf8Error),
191175
}

0 commit comments

Comments
 (0)