diff --git a/fgpyo/sam/__init__.py b/fgpyo/sam/__init__.py index 8fa2c19..7f2ec87 100644 --- a/fgpyo/sam/__init__.py +++ b/fgpyo/sam/__init__.py @@ -158,6 +158,7 @@ import enum import io import sys +from array import array from collections.abc import Collection from itertools import chain from pathlib import Path @@ -178,6 +179,7 @@ from pysam import AlignedSegment from pysam import AlignmentFile as SamFile from pysam import AlignmentHeader as SamHeader +from pysam import qualitystring_to_array from typing_extensions import deprecated import fgpyo.io @@ -189,12 +191,18 @@ NO_REF_INDEX: int = -1 """The reference index to use to indicate no reference in SAM/BAM.""" -NO_REF_NAME: str = "*" +STRING_PLACEHOLDER: str = "*" +"""The value to use when a string field's information is unavailable.""" + +NO_REF_NAME: str = STRING_PLACEHOLDER """The reference name to use to indicate no reference in SAM/BAM.""" NO_REF_POS: int = -1 """The reference position to use to indicate no position in SAM/BAM.""" +NO_QUERY_QUALITIES: array = qualitystring_to_array(STRING_PLACEHOLDER) +"""The quality array corresponding to an unavailable query quality string ("*").""" + _IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase) """The classes that should be treated as file-like classes""" @@ -849,16 +857,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"] def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int: """Calculate the sum of base qualities score for an alignment record. - This function is useful for calculating the "mate score" as implemented in samtools fixmate. + This function is useful for calculating the "mate score" as implemented in `samtools fixmate`. + Consistently with `samtools fixmate`, this function returns 0 if the record has no base + qualities. Args: rec: The alignment record to calculate the sum of base qualities from. min_quality_score: The minimum base quality score to use for summation. + Returns: + The sum of base qualities on the input record. 0 if the record has no base qualities. + See: [`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238) [`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42) """ + if rec.query_qualities is None or rec.query_qualities == NO_QUERY_QUALITIES: + return 0 + score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score) return score diff --git a/tests/fgpyo/sam/test_sam.py b/tests/fgpyo/sam/test_sam.py index e822ee5..5f8b46e 100755 --- a/tests/fgpyo/sam/test_sam.py +++ b/tests/fgpyo/sam/test_sam.py @@ -10,6 +10,8 @@ import pysam import pytest +from pysam import AlignedSegment +from pysam import AlignmentFile from pysam import AlignmentHeader import fgpyo.sam as sam @@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None: assert sum_of_base_qualities(single, min_quality_score=4) == 9 +def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None: + builder = SamBuilder(r1_len=5, r2_len=5) + single: AlignedSegment = builder.add_single() + + # NB: assigning to `query_qualities` does not affect the cached object returned by the property, + # but it does change the underlying attribute used when constructing the string representation + single.query_qualities = None + record_str = single.to_string() + + bam_path: Path = tmp_path / "no_qualities.bam" + + with bam_path.open("w") as bam_file: + bam_file.write(str(builder.header)) + bam_file.write(f"{record_str}\n") + + with AlignmentFile(str(bam_path)) as bam: + record = next(bam) + + # NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base + # qualities, as `SamBuilder` does not currently permit the construction of such records. + # TODO simplify this after `SamBuilder` is updated to support this. + # https://github.com/fulcrumgenomics/fgpyo/issues/211 + assert record.query_qualities is None + assert sum_of_base_qualities(record) == 0 + + def test_calc_edit_info_no_edits() -> None: chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG" builder = SamBuilder(r1_len=30)