Skip to content

Commit

Permalink
Merge branch 'ar/gh-31-32' into 'master'
Browse files Browse the repository at this point in the history
QOL fixes for pileup, fix duplex sampling bug

See merge request machine-learning/modkit!65
  • Loading branch information
ArtRand committed Jun 26, 2023
2 parents 4e6240c + 0942419 commit 732bd17
Show file tree
Hide file tree
Showing 7 changed files with 383 additions and 316 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [v0.1.10]
### Fixes
- Don't log message whilst aggregating base modification probabilities (exposed with duplex reads as input).
### Changes
- [pileup] Fail fast when bam index doesn't contain any mapped reads.


## [v0.1.9]
### Changes
Expand Down
3 changes: 2 additions & 1 deletion src/extract_mods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ use crate::position_filter::StrandedPositionFilter;
use crate::read_ids_to_base_mod_probs::{
ModProfile, ReadBaseModProfile, ReadsBaseModProfile,
};
use crate::reads_sampler::record_sampler::{RecordSampler, SamplingSchedule};
use crate::reads_sampler::record_sampler::RecordSampler;
use crate::reads_sampler::sample_reads_from_interval;
use crate::reads_sampler::sampling_schedule::SamplingSchedule;
use crate::record_processor::WithRecords;
use crate::util::{
get_master_progress_bar, get_spinner, get_subroutine_progress_bar,
Expand Down
17 changes: 17 additions & 0 deletions src/pileup/subcommand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::writers::{
};
use anyhow::{anyhow, bail, Context};

use crate::reads_sampler::sampling_schedule::IdxStats;
use clap::{Args, ValueEnum};
use crossbeam_channel::bounded;
use indicatif::{MultiProgress, ParallelProgressIterator};
Expand Down Expand Up @@ -320,6 +321,22 @@ impl ModBamPileup {
)
})
.transpose()?;
// use the path here instead of passing the reader directly to avoid potentially
// changing mutable internal state of the reader.
IdxStats::new_from_path(
&self.in_bam,
region.as_ref(),
position_filter.as_ref(),
)
.and_then(|index_stats| {
if index_stats.mapped_read_count > 0 {
Ok(())
} else {
Err(anyhow!("did not find any mapped reads, perform alignment first or use \
modkit extract and/or modkit summary to inspect unaligned modBAMs"))
}
})?;

if self.filter_percentile > 1.0 {
bail!("filter percentile must be <= 1.0")
}
Expand Down
20 changes: 8 additions & 12 deletions src/read_ids_to_base_mod_probs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,12 @@ impl ReadIdsToBaseModProbs {
canonical_base: DnaBase,
mod_probs: Vec<BaseModProbs>,
) {
let added = self
.inner
self.inner
.entry(read_id.to_owned())
.or_insert(HashMap::new())
.insert(canonical_base, mod_probs);
if added.is_some() {
debug!(
"double added base mod calls for base {} and read {},\
potentially a logic error, please submit an issue.",
canonical_base.char(),
read_id
);
}
.entry(canonical_base)
.or_insert(Vec::new())
.extend(mod_probs)
}

#[inline]
Expand Down Expand Up @@ -270,6 +263,9 @@ impl RecordProcessor for ReadIdsToBaseModProbs {
&record,
);

// must stay such that mod_probs will not be empty if seq_pos_base_mod_probs
// is Some otherwise added_mod_probs_for_record should not be flipped to
// true
if let Some(seq_pos_base_mod_probs) =
seq_pos_base_mod_probs
{
Expand All @@ -289,7 +285,7 @@ impl RecordProcessor for ReadIdsToBaseModProbs {
canonical_base,
mod_probs,
);
added_probs_for_record = true
added_probs_for_record = true;
} else {
// trace!("all base mod positions were removed by filtering \
// for {record_name} and base {raw_canonical_base}");
Expand Down
3 changes: 2 additions & 1 deletion src/reads_sampler/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
pub(crate) mod record_sampler;
pub(crate) mod sampling_schedule;

use crate::interval_chunks::IntervalChunks;
use crate::mod_bam::{CollapseMethod, EdgeFilter};
use crate::monoid::Moniod;
use crate::position_filter::StrandedPositionFilter;
use crate::reads_sampler::record_sampler::SamplingSchedule;
use crate::reads_sampler::sampling_schedule::SamplingSchedule;
use crate::record_processor::{RecordProcessor, WithRecords};
use crate::util::{
get_master_progress_bar, get_subroutine_progress_bar, get_targets,
Expand Down
Loading

0 comments on commit 732bd17

Please sign in to comment.