Skip to content

Commit

Permalink
fix: sum_of_base_qualities() returns zero when a record has no base…
Browse files Browse the repository at this point in the history
… qualities (#212)

Closes #210
  • Loading branch information
msto authored Jan 23, 2025
1 parent c5eb469 commit f1f89e9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 2 deletions.
20 changes: 18 additions & 2 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
import enum
import io
import sys
from array import array
from collections.abc import Collection
from itertools import chain
from pathlib import Path
Expand All @@ -178,6 +179,7 @@
from pysam import AlignedSegment
from pysam import AlignmentFile as SamFile
from pysam import AlignmentHeader as SamHeader
from pysam import qualitystring_to_array
from typing_extensions import deprecated

import fgpyo.io
Expand All @@ -189,12 +191,18 @@
NO_REF_INDEX: int = -1
"""The reference index to use to indicate no reference in SAM/BAM."""

NO_REF_NAME: str = "*"
STRING_PLACEHOLDER: str = "*"
"""The value to use when a string field's information is unavailable."""

NO_REF_NAME: str = STRING_PLACEHOLDER
"""The reference name to use to indicate no reference in SAM/BAM."""

NO_REF_POS: int = -1
"""The reference position to use to indicate no position in SAM/BAM."""

NO_QUERY_QUALITIES: array = qualitystring_to_array(STRING_PLACEHOLDER)
"""The quality array corresponding to an unavailable query quality string ("*")."""

_IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase)
"""The classes that should be treated as file-like classes"""

Expand Down Expand Up @@ -849,16 +857,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
"""Calculate the sum of base qualities score for an alignment record.
This function is useful for calculating the "mate score" as implemented in samtools fixmate.
This function is useful for calculating the "mate score" as implemented in `samtools fixmate`.
Consistently with `samtools fixmate`, this function returns 0 if the record has no base
qualities.
Args:
rec: The alignment record to calculate the sum of base qualities from.
min_quality_score: The minimum base quality score to use for summation.
Returns:
The sum of base qualities on the input record. 0 if the record has no base qualities.
See:
[`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
[`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
"""
if rec.query_qualities is None or rec.query_qualities == NO_QUERY_QUALITIES:
return 0

score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
return score

Expand Down
28 changes: 28 additions & 0 deletions tests/fgpyo/sam/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import pysam
import pytest
from pysam import AlignedSegment
from pysam import AlignmentFile
from pysam import AlignmentHeader

import fgpyo.sam as sam
Expand Down Expand Up @@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None:
assert sum_of_base_qualities(single, min_quality_score=4) == 9


def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None:
builder = SamBuilder(r1_len=5, r2_len=5)
single: AlignedSegment = builder.add_single()

# NB: assigning to `query_qualities` does not affect the cached object returned by the property,
# but it does change the underlying attribute used when constructing the string representation
single.query_qualities = None
record_str = single.to_string()

bam_path: Path = tmp_path / "no_qualities.bam"

with bam_path.open("w") as bam_file:
bam_file.write(str(builder.header))
bam_file.write(f"{record_str}\n")

with AlignmentFile(str(bam_path)) as bam:
record = next(bam)

# NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base
# qualities, as `SamBuilder` does not currently permit the construction of such records.
# TODO simplify this after `SamBuilder` is updated to support this.
# https://github.com/fulcrumgenomics/fgpyo/issues/211
assert record.query_qualities is None
assert sum_of_base_qualities(record) == 0


def test_calc_edit_info_no_edits() -> None:
chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG"
builder = SamBuilder(r1_len=30)
Expand Down

0 comments on commit f1f89e9

Please sign in to comment.