Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: sum_of_base_qualities() returns zero when a record has no base qualities #212

Merged
merged 4 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,16 +849,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
"""Calculate the sum of base qualities score for an alignment record.

This function is useful for calculating the "mate score" as implemented in samtools fixmate.
This function is useful for calculating the "mate score" as implemented in `samtools fixmate`.
Consistently with `samtools fixmate`, this function returns 0 if the record has no base
qualities.

Args:
rec: The alignment record to calculate the sum of base qualities from.
min_quality_score: The minimum base quality score to use for summation.

Returns:
The sum of base qualities on the input record. 0 if the record has no base qualities.

See:
[`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
[`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
"""
if rec.query_qualities is None:
return 0
msto marked this conversation as resolved.
Show resolved Hide resolved

score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
return score

Expand Down
2 changes: 1 addition & 1 deletion fgpyo/util/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def attr_from(
# True, because python, so we need to check for that explicitly
if not set_value and attribute.type is not None and not attribute.type == bool:
try:
return_value = attribute.type(str_value) # type: ignore[operator]
return_value = attribute.type(str_value)
msto marked this conversation as resolved.
Show resolved Hide resolved
msto marked this conversation as resolved.
Show resolved Hide resolved
set_value = True
except (ValueError, TypeError):
pass
Expand Down
28 changes: 28 additions & 0 deletions tests/fgpyo/sam/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import pysam
import pytest
from pysam import AlignedSegment
from pysam import AlignmentFile
from pysam import AlignmentHeader

import fgpyo.sam as sam
Expand Down Expand Up @@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None:
assert sum_of_base_qualities(single, min_quality_score=4) == 9


def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None:
builder = SamBuilder(r1_len=5, r2_len=5)
single: AlignedSegment = builder.add_single()

# NB: assigning to `query_qualities` does not affect the cached object returned by the property,
# but it does change the underlying attribute used when constructing the string representation
single.query_qualities = None
record_str = single.to_string()

bam_path: Path = tmp_path / "no_qualities.bam"

with bam_path.open("w") as bam_file:
bam_file.write(str(builder.header))
bam_file.write(f"{record_str}\n")

with AlignmentFile(str(bam_path)) as bam:
record = next(bam)

# NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base
# qualities, as `SamBuilder` does not currently permit the construction of such records.
# TODO simplify this after `SamBuilder` is updated to support this.
# https://github.com/fulcrumgenomics/fgpyo/issues/211
assert record.query_qualities is None
assert sum_of_base_qualities(record) == 0


def test_calc_edit_info_no_edits() -> None:
chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG"
builder = SamBuilder(r1_len=30)
Expand Down
Loading