|
158 | 158 | import enum
|
159 | 159 | import io
|
160 | 160 | import sys
|
| 161 | +from array import array |
161 | 162 | from collections.abc import Collection
|
162 | 163 | from itertools import chain
|
163 | 164 | from pathlib import Path
|
|
178 | 179 | from pysam import AlignedSegment
|
179 | 180 | from pysam import AlignmentFile as SamFile
|
180 | 181 | from pysam import AlignmentHeader as SamHeader
|
| 182 | +from pysam import qualitystring_to_array |
181 | 183 | from typing_extensions import deprecated
|
182 | 184 |
|
183 | 185 | import fgpyo.io
|
|
189 | 191 | NO_REF_INDEX: int = -1
|
190 | 192 | """The reference index to use to indicate no reference in SAM/BAM."""
|
191 | 193 |
|
192 |
| -NO_REF_NAME: str = "*" |
| 194 | +STRING_PLACEHOLDER: str = "*" |
| 195 | +"""The value to use when a string field's information is unavailable.""" |
| 196 | + |
| 197 | +NO_REF_NAME: str = STRING_PLACEHOLDER |
193 | 198 | """The reference name to use to indicate no reference in SAM/BAM."""
|
194 | 199 |
|
195 | 200 | NO_REF_POS: int = -1
|
196 | 201 | """The reference position to use to indicate no position in SAM/BAM."""
|
197 | 202 |
|
| 203 | +NO_QUERY_QUALITIES: array = qualitystring_to_array(STRING_PLACEHOLDER) |
| 204 | +"""The quality array corresponding to an unavailable query quality string ("*").""" |
| 205 | + |
198 | 206 | _IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase)
|
199 | 207 | """The classes that should be treated as file-like classes"""
|
200 | 208 |
|
@@ -849,16 +857,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
|
849 | 857 | def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
|
850 | 858 | """Calculate the sum of base qualities score for an alignment record.
|
851 | 859 |
|
852 |
| - This function is useful for calculating the "mate score" as implemented in samtools fixmate. |
| 860 | + This function is useful for calculating the "mate score" as implemented in `samtools fixmate`. |
| 861 | + Consistently with `samtools fixmate`, this function returns 0 if the record has no base |
| 862 | + qualities. |
853 | 863 |
|
854 | 864 | Args:
|
855 | 865 | rec: The alignment record to calculate the sum of base qualities from.
|
856 | 866 | min_quality_score: The minimum base quality score to use for summation.
|
857 | 867 |
|
| 868 | + Returns: |
| 869 | + The sum of base qualities on the input record. 0 if the record has no base qualities. |
| 870 | +
|
858 | 871 | See:
|
859 | 872 | [`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
|
860 | 873 | [`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
|
861 | 874 | """
|
| 875 | + if rec.query_qualities is None or rec.query_qualities == NO_QUERY_QUALITIES: |
| 876 | + return 0 |
| 877 | + |
862 | 878 | score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
|
863 | 879 | return score
|
864 | 880 |
|
|
0 commit comments