Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: sum_of_base_qualities() returns zero when a record has no base qualities #212

Merged
merged 4 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
import enum
import io
import sys
from array import array
from collections.abc import Collection
from itertools import chain
from pathlib import Path
Expand All @@ -178,6 +179,7 @@
from pysam import AlignedSegment
from pysam import AlignmentFile as SamFile
from pysam import AlignmentHeader as SamHeader
from pysam import qualitystring_to_array
from typing_extensions import deprecated

import fgpyo.io
Expand All @@ -189,12 +191,18 @@
NO_REF_INDEX: int = -1
"""The reference index to use to indicate no reference in SAM/BAM."""

NO_REF_NAME: str = "*"
STRING_PLACEHOLDER: str = "*"
"""The value to use when a string field's information is unavailable."""

NO_REF_NAME: str = STRING_PLACEHOLDER
"""The reference name to use to indicate no reference in SAM/BAM."""

NO_REF_POS: int = -1
"""The reference position to use to indicate no position in SAM/BAM."""

NO_QUERY_QUALITIES: array = qualitystring_to_array(STRING_PLACEHOLDER)
clintval marked this conversation as resolved.
Show resolved Hide resolved
"""The quality array corresponding to an unavailable query quality string ("*")."""

_IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase)
"""The classes that should be treated as file-like classes"""

Expand Down Expand Up @@ -849,16 +857,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
"""Calculate the sum of base qualities score for an alignment record.

This function is useful for calculating the "mate score" as implemented in samtools fixmate.
This function is useful for calculating the "mate score" as implemented in `samtools fixmate`.
Consistently with `samtools fixmate`, this function returns 0 if the record has no base
qualities.

Args:
rec: The alignment record to calculate the sum of base qualities from.
min_quality_score: The minimum base quality score to use for summation.

Returns:
The sum of base qualities on the input record. 0 if the record has no base qualities.

See:
[`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
[`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
"""
if rec.query_qualities is None or rec.query_qualities == NO_QUERY_QUALITIES:
return 0

score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
return score

Expand Down
28 changes: 28 additions & 0 deletions tests/fgpyo/sam/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import pysam
import pytest
from pysam import AlignedSegment
from pysam import AlignmentFile
from pysam import AlignmentHeader

import fgpyo.sam as sam
Expand Down Expand Up @@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None:
assert sum_of_base_qualities(single, min_quality_score=4) == 9


def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None:
builder = SamBuilder(r1_len=5, r2_len=5)
single: AlignedSegment = builder.add_single()

# NB: assigning to `query_qualities` does not affect the cached object returned by the property,
# but it does change the underlying attribute used when constructing the string representation
single.query_qualities = None
record_str = single.to_string()

bam_path: Path = tmp_path / "no_qualities.bam"

with bam_path.open("w") as bam_file:
bam_file.write(str(builder.header))
bam_file.write(f"{record_str}\n")

with AlignmentFile(str(bam_path)) as bam:
record = next(bam)

# NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base
# qualities, as `SamBuilder` does not currently permit the construction of such records.
# TODO simplify this after `SamBuilder` is updated to support this.
# https://github.com/fulcrumgenomics/fgpyo/issues/211
assert record.query_qualities is None
assert sum_of_base_qualities(record) == 0


def test_calc_edit_info_no_edits() -> None:
chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG"
builder = SamBuilder(r1_len=30)
Expand Down
Loading