diff --git a/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java b/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java index 6e222f61fb..ab993aae78 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java +++ b/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java @@ -31,42 +31,15 @@ public class SAMRecordQueryNameComparator implements SAMRecordComparator, Serializable { private static final long serialVersionUID = 1L; + private static boolean isDigit(final char c) { + return Character.isDigit(c); + } + @Override public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) { - int cmp = fileOrderCompare(samRecord1, samRecord2); - if (cmp != 0) { - return cmp; - } - - final boolean r1Paired = samRecord1.getReadPairedFlag(); - final boolean r2Paired = samRecord2.getReadPairedFlag(); - - if (r1Paired || r2Paired) { - if (!r1Paired) return 1; - else if (!r2Paired) return -1; - else if (samRecord1.getFirstOfPairFlag() && samRecord2.getSecondOfPairFlag()) return -1; - else if (samRecord1.getSecondOfPairFlag() && samRecord2.getFirstOfPairFlag()) return 1; - } - - if (samRecord1.getReadNegativeStrandFlag() != samRecord2.getReadNegativeStrandFlag()) { - return (samRecord1.getReadNegativeStrandFlag()? 1: -1); - } - if (samRecord1.isSecondaryAlignment() != samRecord2.isSecondaryAlignment()) { - return samRecord2.isSecondaryAlignment()? -1: 1; - } - if (samRecord1.getSupplementaryAlignmentFlag() != samRecord2.getSupplementaryAlignmentFlag()) { - return samRecord2.getSupplementaryAlignmentFlag() ? -1 : 1; - } - final Integer hitIndex1 = samRecord1.getIntegerAttribute(SAMTag.HI); - final Integer hitIndex2 = samRecord2.getIntegerAttribute(SAMTag.HI); - if (hitIndex1 != null) { - if (hitIndex2 == null) return 1; - else { - cmp = hitIndex1.compareTo(hitIndex2); - if (cmp != 0) return cmp; - } - } else if (hitIndex2 != null) return -1; - return 0; + final int retval = compareReadNames(samRecord1.getReadName(), samRecord2.getReadName()); + if (retval == 0) return Integer.compare(samRecord1.getFlags()&0xc0, samRecord2.getFlags()&0xc0); + else return retval; } /** @@ -85,7 +58,57 @@ public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecor * Encapsulate algorithm for comparing read names in queryname-sorted file, since there have been * conversations about changing the behavior. */ - public static int compareReadNames(final String readName1, final String readName2) { - return readName1.compareTo(readName2); + public static int compareReadNames(final String name1, final String name2) { + int index1 = 0; + int index2 = 0; + final int len1 = name1.length(); + final int len2 = name2.length(); + + // keep going while we have characters left + while (index1 < len1 && index2 < len2) { + if (!isDigit(name1.charAt(index1)) || !isDigit(name2.charAt(index2))) { // no more + if (name1.charAt(index1) != name2.charAt(index2)) { + return (int) name1.charAt(index1) - (int) name2.charAt(index2); + } else { + index1++; + index2++; + } + } else { // next characters are digits + // skip over leading zeros + while (index1 < len1 && name1.charAt(index1) == '0') index1++; + while (index2 < len2 && name2.charAt(index2) == '0') index2++; + // skip over any matching digits + while (index1 < len1 && index2 < len2 && + isDigit(name1.charAt(index1)) && + isDigit(name2.charAt(index2)) && + name1.charAt(index1) == name2.charAt(index2)) { + index1++; + index2++; + } + + boolean isDigit1 = index1 < len1 && isDigit(name1.charAt(index1)); + boolean isDigit2 = index2 < len2 && isDigit(name2.charAt(index2)); + + if (isDigit1 && isDigit2) { + // skip until we hit a non-digit or the end. + int i = 0; + while (index1 + i < len1 && index2 + i < len2 && + isDigit(name1.charAt(index1 + i)) && + isDigit(name2.charAt(index2 + i))) { + i++; + } + if (index1 + i < len1 && isDigit(name1.charAt(index1 + i))) return 1; + else if (index2 + i < len2 && isDigit(name2.charAt(index2 + i))) return -1; + else return (int)name1.charAt(index1) - (int)name2.charAt(index2); + } + else if (isDigit1) return 1; + else if (isDigit2) return -1; + else if (index1 != index2) return index2 - index1; + } + } + + if (index1 < len1) return 1; + else if (index2 < len2) return -1; + else return 0; } } diff --git a/src/test/java/htsjdk/samtools/SAMRecordQueryNameComparatorTest.java b/src/test/java/htsjdk/samtools/SAMRecordQueryNameComparatorTest.java index b78fc81098..db1a0f5640 100644 --- a/src/test/java/htsjdk/samtools/SAMRecordQueryNameComparatorTest.java +++ b/src/test/java/htsjdk/samtools/SAMRecordQueryNameComparatorTest.java @@ -49,76 +49,32 @@ public void testCompareDifferentNames() throws Exception { Assert.assertTrue(COMPARATOR.compare(b, a) > 0); } - private static SAMRecord copyAndSet(final SAMRecord record, final Consumer setParams) { - final SAMRecord copy = record.deepCopy(); - setParams.accept(copy); - return copy; - } - - - // this test cases are separated for the different names comparison for re-use with SAMRecordQueryHashComparator - @DataProvider - public static Object[][] equalNameComparisonData() { - // base record with the information used in the comparator: - // - read name A - // - positive strand -> default - // - unpaired (without first/second of pair flags) -> explicitly set - // - primary alignment (and not supplementary) -> explicitly set - // - no hit index (HI tag) -> default - final SAMRecord record = new SAMRecord(null); - record.setReadName("A"); - record.setReadPairedFlag(false); - record.setFirstOfPairFlag(false); - record.setSecondOfPairFlag(false); - // primary/secondary/supplementary alignments - record.setSecondaryAlignment(false); - record.setSupplementaryAlignmentFlag(false); - - // record1, record2, comparison value - return new Object[][] { - // same record is equals after comparing all the fields - {record, record, 0}, - - // upaired vs. paired - {record, copyAndSet(record, (r) -> r.setReadPairedFlag(true)), 1}, - {copyAndSet(record, (r) -> r.setReadPairedFlag(true)), record, -1}, - // first/second of pair in natural order - {copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setFirstOfPairFlag(true);}), - copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setSecondOfPairFlag(true);}), - -1}, - {copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setSecondOfPairFlag(true);}), - copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setFirstOfPairFlag(true);}), - 1}, - - // negative strand is the last - {record, copyAndSet(record, r -> r.setReadNegativeStrandFlag(true)), -1}, - - // primary alignment is first compared to not primary - {record, copyAndSet(record, r -> r.setSecondaryAlignment(true)), -1}, - {copyAndSet(record, r -> r.setSecondaryAlignment(true)), record, 1}, - // secondary alignment is last compared to primary - {record, copyAndSet(record, r -> r.setSupplementaryAlignmentFlag(true)), -1}, - {copyAndSet(record, r -> r.setSupplementaryAlignmentFlag(true)), record, 1}, - - // the one with HI tag is first if the other is null - {record, copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), -1}, - {copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), record, 1}, - // if both have HI tag, order by it - {copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), - copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), 0}, - {copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), - copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 2)), -1}, - {copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 16)), - copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 5)), 1} + @DataProvider(name = "readNameOrderTestCases") + public Object[][] readNameOrderTestCases() { + // See: https://sourceforge.net/p/samtools/mailman/message/26674128/ + final String[] names1 = {"#01", "#2", ".1", ".09", "a001", "a01", "a01z", "a1", "a1a"}; + // See: https://github.com/samtools/samtools/blob/bdc5bb81c11f2ab25ea97351213cb87b33857c4d/test/sort/name.sort.expected.sam#L14-L28 + final String[] names2 = {"r000", "r001", "r002", "r003", "r004", "u1", "x1", "x2", "x3", "x4", "x5", "x6"}; + return new Object[][]{ + names1, names2 }; } - - - @Test(dataProvider = "equalNameComparisonData") - public void testCompareEqualNames(final SAMRecord record1, final SAMRecord record2, final int sign) throws Exception { - final int comparisonResult = COMPARATOR.compare(record1, record2); - Assert.assertEquals(Integer.signum(comparisonResult),sign); + @Test(dataProvider = "readNameOrderTestCases") + public void testReadNameOrder(final String[] names) { + final SAMRecord a = new SAMRecord(null); + final SAMRecord b = new SAMRecord(null); + int i, j; + for (i = 0; i < names.length; i++) { + for (j = 0; j < names.length; j++) { + a.setReadName(names[i]); + b.setReadName(names[j]); + final int actual = Integer.compare(COMPARATOR.compare(a, b), 0); + final int expected = Integer.compare(i, j); + Assert.assertEquals(actual, expected, names[i] + " < " + names[j]); + } + } } + }