Skip to content

Commit 732bd17

Browse files
committed
Merge branch 'ar/gh-31-32' into 'master'
QOL fixes for pileup, fix duplex sampling bug See merge request machine-learning/modkit!65
2 parents 4e6240c + 0942419 commit 732bd17

File tree

7 files changed

+383
-316
lines changed

7 files changed

+383
-316
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file.
33

44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6+
## [v0.1.10]
7+
### Fixes
8+
- Don't log message whilst aggregating base modification probabilities (exposed with duplex reads as input).
9+
### Changes
10+
- [pileup] Fail fast when bam index doesn't contain any mapped reads.
11+
612

713
## [v0.1.9]
814
### Changes

src/extract_mods.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ use crate::position_filter::StrandedPositionFilter;
2121
use crate::read_ids_to_base_mod_probs::{
2222
ModProfile, ReadBaseModProfile, ReadsBaseModProfile,
2323
};
24-
use crate::reads_sampler::record_sampler::{RecordSampler, SamplingSchedule};
24+
use crate::reads_sampler::record_sampler::RecordSampler;
2525
use crate::reads_sampler::sample_reads_from_interval;
26+
use crate::reads_sampler::sampling_schedule::SamplingSchedule;
2627
use crate::record_processor::WithRecords;
2728
use crate::util::{
2829
get_master_progress_bar, get_spinner, get_subroutine_progress_bar,

src/pileup/subcommand.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use crate::writers::{
1717
};
1818
use anyhow::{anyhow, bail, Context};
1919

20+
use crate::reads_sampler::sampling_schedule::IdxStats;
2021
use clap::{Args, ValueEnum};
2122
use crossbeam_channel::bounded;
2223
use indicatif::{MultiProgress, ParallelProgressIterator};
@@ -320,6 +321,22 @@ impl ModBamPileup {
320321
)
321322
})
322323
.transpose()?;
324+
// use the path here instead of passing the reader directly to avoid potentially
325+
// changing mutable internal state of the reader.
326+
IdxStats::new_from_path(
327+
&self.in_bam,
328+
region.as_ref(),
329+
position_filter.as_ref(),
330+
)
331+
.and_then(|index_stats| {
332+
if index_stats.mapped_read_count > 0 {
333+
Ok(())
334+
} else {
335+
Err(anyhow!("did not find any mapped reads, perform alignment first or use \
336+
modkit extract and/or modkit summary to inspect unaligned modBAMs"))
337+
}
338+
})?;
339+
323340
if self.filter_percentile > 1.0 {
324341
bail!("filter percentile must be <= 1.0")
325342
}

src/read_ids_to_base_mod_probs.rs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,12 @@ impl ReadIdsToBaseModProbs {
5050
canonical_base: DnaBase,
5151
mod_probs: Vec<BaseModProbs>,
5252
) {
53-
let added = self
54-
.inner
53+
self.inner
5554
.entry(read_id.to_owned())
5655
.or_insert(HashMap::new())
57-
.insert(canonical_base, mod_probs);
58-
if added.is_some() {
59-
debug!(
60-
"double added base mod calls for base {} and read {},\
61-
potentially a logic error, please submit an issue.",
62-
canonical_base.char(),
63-
read_id
64-
);
65-
}
56+
.entry(canonical_base)
57+
.or_insert(Vec::new())
58+
.extend(mod_probs)
6659
}
6760

6861
#[inline]
@@ -270,6 +263,9 @@ impl RecordProcessor for ReadIdsToBaseModProbs {
270263
&record,
271264
);
272265

266+
// must stay such that mod_probs will not be empty if seq_pos_base_mod_probs
267+
// is Some otherwise added_mod_probs_for_record should not be flipped to
268+
// true
273269
if let Some(seq_pos_base_mod_probs) =
274270
seq_pos_base_mod_probs
275271
{
@@ -289,7 +285,7 @@ impl RecordProcessor for ReadIdsToBaseModProbs {
289285
canonical_base,
290286
mod_probs,
291287
);
292-
added_probs_for_record = true
288+
added_probs_for_record = true;
293289
} else {
294290
// trace!("all base mod positions were removed by filtering \
295291
// for {record_name} and base {raw_canonical_base}");

src/reads_sampler/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
pub(crate) mod record_sampler;
2+
pub(crate) mod sampling_schedule;
23

34
use crate::interval_chunks::IntervalChunks;
45
use crate::mod_bam::{CollapseMethod, EdgeFilter};
56
use crate::monoid::Moniod;
67
use crate::position_filter::StrandedPositionFilter;
7-
use crate::reads_sampler::record_sampler::SamplingSchedule;
8+
use crate::reads_sampler::sampling_schedule::SamplingSchedule;
89
use crate::record_processor::{RecordProcessor, WithRecords};
910
use crate::util::{
1011
get_master_progress_bar, get_subroutine_progress_bar, get_targets,

0 commit comments

Comments
 (0)