Skip to content

Commit f1f89e9

Browse files
authored
fix: sum_of_base_qualities() returns zero when a record has no base qualities (#212)
Closes #210
1 parent c5eb469 commit f1f89e9

File tree

2 files changed

+46
-2
lines changed

2 files changed

+46
-2
lines changed

fgpyo/sam/__init__.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
import enum
159159
import io
160160
import sys
161+
from array import array
161162
from collections.abc import Collection
162163
from itertools import chain
163164
from pathlib import Path
@@ -178,6 +179,7 @@
178179
from pysam import AlignedSegment
179180
from pysam import AlignmentFile as SamFile
180181
from pysam import AlignmentHeader as SamHeader
182+
from pysam import qualitystring_to_array
181183
from typing_extensions import deprecated
182184

183185
import fgpyo.io
@@ -189,12 +191,18 @@
189191
NO_REF_INDEX: int = -1
190192
"""The reference index to use to indicate no reference in SAM/BAM."""
191193

192-
NO_REF_NAME: str = "*"
194+
STRING_PLACEHOLDER: str = "*"
195+
"""The value to use when a string field's information is unavailable."""
196+
197+
NO_REF_NAME: str = STRING_PLACEHOLDER
193198
"""The reference name to use to indicate no reference in SAM/BAM."""
194199

195200
NO_REF_POS: int = -1
196201
"""The reference position to use to indicate no position in SAM/BAM."""
197202

203+
NO_QUERY_QUALITIES: array = qualitystring_to_array(STRING_PLACEHOLDER)
204+
"""The quality array corresponding to an unavailable query quality string ("*")."""
205+
198206
_IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase)
199207
"""The classes that should be treated as file-like classes"""
200208

@@ -849,16 +857,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
849857
def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
850858
"""Calculate the sum of base qualities score for an alignment record.
851859
852-
This function is useful for calculating the "mate score" as implemented in samtools fixmate.
860+
This function is useful for calculating the "mate score" as implemented in `samtools fixmate`.
861+
Consistently with `samtools fixmate`, this function returns 0 if the record has no base
862+
qualities.
853863
854864
Args:
855865
rec: The alignment record to calculate the sum of base qualities from.
856866
min_quality_score: The minimum base quality score to use for summation.
857867
868+
Returns:
869+
The sum of base qualities on the input record. 0 if the record has no base qualities.
870+
858871
See:
859872
[`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
860873
[`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
861874
"""
875+
if rec.query_qualities is None or rec.query_qualities == NO_QUERY_QUALITIES:
876+
return 0
877+
862878
score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
863879
return score
864880

tests/fgpyo/sam/test_sam.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import pysam
1212
import pytest
13+
from pysam import AlignedSegment
14+
from pysam import AlignmentFile
1315
from pysam import AlignmentHeader
1416

1517
import fgpyo.sam as sam
@@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None:
616618
assert sum_of_base_qualities(single, min_quality_score=4) == 9
617619

618620

621+
def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None:
622+
builder = SamBuilder(r1_len=5, r2_len=5)
623+
single: AlignedSegment = builder.add_single()
624+
625+
# NB: assigning to `query_qualities` does not affect the cached object returned by the property,
626+
# but it does change the underlying attribute used when constructing the string representation
627+
single.query_qualities = None
628+
record_str = single.to_string()
629+
630+
bam_path: Path = tmp_path / "no_qualities.bam"
631+
632+
with bam_path.open("w") as bam_file:
633+
bam_file.write(str(builder.header))
634+
bam_file.write(f"{record_str}\n")
635+
636+
with AlignmentFile(str(bam_path)) as bam:
637+
record = next(bam)
638+
639+
# NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base
640+
# qualities, as `SamBuilder` does not currently permit the construction of such records.
641+
# TODO simplify this after `SamBuilder` is updated to support this.
642+
# https://github.com/fulcrumgenomics/fgpyo/issues/211
643+
assert record.query_qualities is None
644+
assert sum_of_base_qualities(record) == 0
645+
646+
619647
def test_calc_edit_info_no_edits() -> None:
620648
chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG"
621649
builder = SamBuilder(r1_len=30)

0 commit comments

Comments
 (0)