Skip to content

Commit 9d54bec

Browse files
committed
moving average and base-quality
1 parent fe8af74 commit 9d54bec

File tree

5 files changed

+138
-2
lines changed

5 files changed

+138
-2
lines changed

src/bin/commands/trimmer.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ pub(crate) struct TrimmerOpts {
1919
#[clap(long, short = 't', default_value = "5")]
2020
threads: usize,
2121

22+
/// Minimum base-quality to keep a base when trimming tails.
23+
#[clap(long, short = 'q', default_value = "20")]
24+
trim_tail_quality: u8,
25+
26+
/// Window size for moving average when trimming tails.
27+
#[clap(long, short = 'w', default_value = "20")]
28+
trim_tail_window: u8,
29+
2230
/// Level of compression to use to compress outputs.
2331
#[clap(long, short = 'c', default_value = "5")]
2432
compression_level: usize,

src/bin/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ pub mod commands;
55
use anyhow::Result;
66
use clap::Parser;
77
use commands::command::Command;
8-
use commands::{demux::Demux, trimmer::Trimmer};
8+
use commands::{demux::Demux, trimmer::TrimmerOpts};
99
use enum_dispatch::enum_dispatch;
1010
use env_logger::Env;
1111

@@ -23,7 +23,7 @@ struct Args {
2323
#[command(version)]
2424
enum Subcommand {
2525
Demux(Demux),
26-
Trimmer(Trimmer),
26+
Trimmer(TrimmerOpts),
2727
}
2828

2929
fn main() -> Result<()> {

src/lib/base_quality.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use super::moving_average::MovingAverage;
2+
3+
use std::ops::Range;
4+
5+
pub(crate) fn find_oscillating_quals(bqs: &[u8]) -> Range<usize> {
6+
return 0..0;
7+
}
8+
9+
pub(crate) enum Tail {
10+
Left,
11+
Right,
12+
Both,
13+
}
14+
15+
/// Uses a moving average to return a range of high quality bases.
16+
/// If all bases are high-quality, the range is the full read.
17+
pub(crate) fn find_high_quality_bases(
18+
bqs: &[u8],
19+
min_quality: u8,
20+
window: u8,
21+
tail: Tail,
22+
) -> Range<usize> {
23+
let mut left = 0;
24+
let mut right = bqs.len();
25+
if matches!(tail, Tail::Left | Tail::Both) {
26+
let mut ma = MovingAverage::<u8>::new(window as usize);
27+
for &bq in bqs {
28+
let mean = ma.push(bq);
29+
if mean >= min_quality as f64 {
30+
break;
31+
}
32+
left += 1;
33+
}
34+
}
35+
if matches!(tail, Tail::Right | Tail::Both) {
36+
let mut ma = MovingAverage::<u8>::new(window as usize);
37+
for &bq in bqs.iter().rev() {
38+
let mean = ma.push(bq);
39+
if mean >= min_quality as f64 {
40+
break;
41+
}
42+
right -= 1;
43+
}
44+
}
45+
left..right
46+
}
47+
48+
#[cfg(test)]
49+
mod tests {
50+
use super::*;
51+
52+
#[test]
53+
fn test_find_hq_all() {
54+
let bqs = b"IIIIIIII";
55+
let range = find_high_quality_bases(bqs, 'I' as u8, 3, Tail::Both);
56+
assert_eq!(range, 0..bqs.len());
57+
}
58+
59+
#[test]
60+
fn test_find_hq_ends() {
61+
let bqs = b"EIIIIIIE";
62+
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Both);
63+
assert_eq!(range, 1..bqs.len() - 1);
64+
65+
let bqs = b"EIIIIIIE";
66+
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Left);
67+
assert_eq!(range, 1..bqs.len());
68+
69+
let bqs = b"EIIIIIIE";
70+
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Right);
71+
assert_eq!(range, 0..bqs.len() - 1);
72+
}
73+
}

src/lib/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
pub mod barcode_matching;
2+
pub mod base_quality;
3+
pub mod moving_average;
24
pub mod pair_overlap;
35
pub mod samples;
46

src/lib/moving_average.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/// A simple moving average calculator.
2+
/// Only requires that T is convertable to f64.
3+
/// Uses space of window * size_of(T) bytes.
4+
pub(crate) struct MovingAverage<T> {
5+
window: usize,
6+
values: Vec<T>,
7+
sum: f64,
8+
idx: usize,
9+
count: usize,
10+
}
11+
12+
impl<T: Copy + Default + std::convert::Into<f64>> MovingAverage<T> {
13+
/// create a new moving average calculator with a window of `window` values.
14+
pub fn new(window: usize) -> Self {
15+
Self { window, values: vec![T::default(); window], sum: 0.0, idx: 0, count: 0 }
16+
}
17+
18+
/// push a new value into the moving average calculator and get the new mean.
19+
pub fn push(&mut self, value: T) -> f64 {
20+
let old_value = self.values[self.idx];
21+
self.values[self.idx] = value;
22+
self.sum = self.sum + value.into() - old_value.into();
23+
self.idx = (self.idx + 1) % self.window;
24+
self.count += 1;
25+
self.mean()
26+
}
27+
28+
/// get the current mean.
29+
#[inline]
30+
pub fn mean(&self) -> f64 {
31+
self.sum / (self.count.min(self.window) as f64)
32+
}
33+
}
34+
35+
// write some tests for the calculator
36+
#[cfg(test)]
37+
mod tests {
38+
use super::*;
39+
40+
#[test]
41+
fn test_moving_average() {
42+
let window_size = 3;
43+
let mut ma = MovingAverage::new(window_size);
44+
// NOTE the first value is always the mean
45+
// we use min of values added and window size to calculate the mean
46+
assert_eq!(ma.push(1), 1 as f64 / 1 as f64);
47+
assert_eq!(ma.push(2), (1 + 2) as f64 / 2 as f64);
48+
assert_eq!(ma.push(3), (1 + 2 + 3) as f64 / window_size as f64);
49+
assert_eq!(ma.push(4), (2 + 3 + 4) as f64 / window_size as f64);
50+
assert_eq!(ma.push(5), (3 + 4 + 5) as f64 / window_size as f64);
51+
assert_eq!(ma.push(6), (4 + 5 + 6) as f64 / window_size as f64);
52+
}
53+
}

0 commit comments

Comments
 (0)