Skip to content

Commit

Permalink
moving average and base-quality
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Aug 18, 2023
1 parent fe8af74 commit 9d54bec
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 2 deletions.
8 changes: 8 additions & 0 deletions src/bin/commands/trimmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ pub(crate) struct TrimmerOpts {
#[clap(long, short = 't', default_value = "5")]
threads: usize,

/// Minimum base-quality to keep a base when trimming tails.
#[clap(long, short = 'q', default_value = "20")]
trim_tail_quality: u8,

/// Window size for moving average when trimming tails.
#[clap(long, short = 'w', default_value = "20")]
trim_tail_window: u8,

/// Level of compression to use to compress outputs.
#[clap(long, short = 'c', default_value = "5")]
compression_level: usize,
Expand Down
4 changes: 2 additions & 2 deletions src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub mod commands;
use anyhow::Result;
use clap::Parser;
use commands::command::Command;
use commands::{demux::Demux, trimmer::Trimmer};
use commands::{demux::Demux, trimmer::TrimmerOpts};
use enum_dispatch::enum_dispatch;
use env_logger::Env;

Expand All @@ -23,7 +23,7 @@ struct Args {
#[command(version)]
enum Subcommand {
Demux(Demux),
Trimmer(Trimmer),
Trimmer(TrimmerOpts),
}

fn main() -> Result<()> {
Expand Down
73 changes: 73 additions & 0 deletions src/lib/base_quality.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use super::moving_average::MovingAverage;

use std::ops::Range;

pub(crate) fn find_oscillating_quals(bqs: &[u8]) -> Range<usize> {
return 0..0;
}

pub(crate) enum Tail {
Left,
Right,
Both,
}

/// Uses a moving average to return a range of high quality bases.
/// If all bases are high-quality, the range is the full read.
pub(crate) fn find_high_quality_bases(
bqs: &[u8],
min_quality: u8,
window: u8,
tail: Tail,
) -> Range<usize> {
let mut left = 0;
let mut right = bqs.len();
if matches!(tail, Tail::Left | Tail::Both) {
let mut ma = MovingAverage::<u8>::new(window as usize);
for &bq in bqs {
let mean = ma.push(bq);
if mean >= min_quality as f64 {
break;
}
left += 1;
}
}
if matches!(tail, Tail::Right | Tail::Both) {
let mut ma = MovingAverage::<u8>::new(window as usize);
for &bq in bqs.iter().rev() {
let mean = ma.push(bq);
if mean >= min_quality as f64 {
break;
}
right -= 1;
}
}
left..right
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_find_hq_all() {
let bqs = b"IIIIIIII";
let range = find_high_quality_bases(bqs, 'I' as u8, 3, Tail::Both);
assert_eq!(range, 0..bqs.len());
}

#[test]
fn test_find_hq_ends() {
let bqs = b"EIIIIIIE";
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Both);
assert_eq!(range, 1..bqs.len() - 1);

let bqs = b"EIIIIIIE";
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Left);
assert_eq!(range, 1..bqs.len());

let bqs = b"EIIIIIIE";
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Right);
assert_eq!(range, 0..bqs.len() - 1);
}
}
2 changes: 2 additions & 0 deletions src/lib/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
pub mod barcode_matching;
pub mod base_quality;
pub mod moving_average;
pub mod pair_overlap;
pub mod samples;

Expand Down
53 changes: 53 additions & 0 deletions src/lib/moving_average.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/// A simple moving average calculator.
/// Only requires that T is convertable to f64.
/// Uses space of window * size_of(T) bytes.
pub(crate) struct MovingAverage<T> {
window: usize,
values: Vec<T>,
sum: f64,
idx: usize,
count: usize,
}

impl<T: Copy + Default + std::convert::Into<f64>> MovingAverage<T> {
/// create a new moving average calculator with a window of `window` values.
pub fn new(window: usize) -> Self {
Self { window, values: vec![T::default(); window], sum: 0.0, idx: 0, count: 0 }
}

/// push a new value into the moving average calculator and get the new mean.
pub fn push(&mut self, value: T) -> f64 {
let old_value = self.values[self.idx];
self.values[self.idx] = value;
self.sum = self.sum + value.into() - old_value.into();
self.idx = (self.idx + 1) % self.window;
self.count += 1;
self.mean()
}

/// get the current mean.
#[inline]
pub fn mean(&self) -> f64 {
self.sum / (self.count.min(self.window) as f64)
}
}

// write some tests for the calculator
#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_moving_average() {
let window_size = 3;
let mut ma = MovingAverage::new(window_size);
// NOTE the first value is always the mean
// we use min of values added and window size to calculate the mean
assert_eq!(ma.push(1), 1 as f64 / 1 as f64);
assert_eq!(ma.push(2), (1 + 2) as f64 / 2 as f64);
assert_eq!(ma.push(3), (1 + 2 + 3) as f64 / window_size as f64);
assert_eq!(ma.push(4), (2 + 3 + 4) as f64 / window_size as f64);
assert_eq!(ma.push(5), (3 + 4 + 5) as f64 / window_size as f64);
assert_eq!(ma.push(6), (4 + 5 + 6) as f64 / window_size as f64);
}
}

0 comments on commit 9d54bec

Please sign in to comment.