Skip to content

Commit 7d03fb8

Browse files
committed
CRAM 3.1 read support changes and tests.
1 parent 1572af7 commit 7d03fb8

File tree

6 files changed

+843
-12
lines changed

6 files changed

+843
-12
lines changed

src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ public abstract class GATKBaseTest extends BaseTest {
7575
public static final String NA12878_20_21_WGS_bam = largeFileTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam";
7676
public static final String NA12878_20_21_WGS_mmp2_bam = largeFileTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.20.21.mmp2.bam";
7777
public static final String NA12878_20_21_WGS_cram = largeFileTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram";
78+
// created using samtools archive mode ("--output-fmt cram,version=3.1,archive"); contains slices that exercise all
79+
// of the cram 3.1 codecs (rANSnx16, adaptive arithmetic, fqzcomp, and name tokenization)
80+
public static final String NA12878_20_21_WGS_cram_31 = largeFileTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.1.samtools.archive.cram";
7881

7982
public static final String NA12878_20_21_covered_regions = publicTestDir + "wgs_calling_regions.v1.chr20_chr21.interval_list";
8083

src/test/java/org/broadinstitute/hellbender/PrintFileDiagnosticsIntegrationTest.java

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package org.broadinstitute.hellbender;
22

3+
import htsjdk.beta.plugin.IOUtils;
4+
import htsjdk.io.IOPath;
35
import org.apache.commons.lang3.tuple.Pair;
6+
import org.broadinstitute.hellbender.engine.GATKPath;
47
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
58
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
69
import org.testng.annotations.DataProvider;
@@ -14,11 +17,12 @@ public class PrintFileDiagnosticsIntegrationTest extends CommandLineProgramTest
1417

1518
@DataProvider(name = "fileDiagnosticsTestCases")
1619
public Object[][] getFileDiagnosticsTestCases() {
20+
// the pathnames used by the diagnostics tool wind up embedded in the diagnostics output file, so for these
21+
// tests use just a relative pathname as input (instead of the named constants, i.e., NA12878_20_21_WGS_cram,
22+
// which are full path names) in order to avoid test failures caused by the full pathname varying in
23+
// different environments, i.e. in CI
1724
return new Object[][]{
1825
{
19-
//this pathname is embedded in the diagnostics output file, so we use a relative pathname
20-
// instead of the named constant NA12878_20_21_WGS_cram in order to avoid test failures
21-
// caused by the full pathname varying in different environments
2226
"src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram",
2327
List.of(Pair.of("count-limit", "10")),
2428
"src/test/resources/filediagnostics/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.txt"
@@ -33,6 +37,12 @@ public Object[][] getFileDiagnosticsTestCases() {
3337
null,
3438
"src/test/resources/filediagnostics/cram_with_crai_index.cram.crai.txt"
3539
},
40+
{
41+
// cram file that uses all the new 3.1 codecs (fqzcomp, name tok, ransNx16, and adaptive arithmetic)
42+
"src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.1.samtools.archive.cram",
43+
List.of(Pair.of("count-limit", "20")),
44+
"src/test/resources/filediagnostics/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.1.samtools.archive.cram.txt"
45+
}
3646
};
3747
}
3848

@@ -41,15 +51,22 @@ public void testFileDiagnostics(
4151
final String inputPath,
4252
final List<Pair<String, String>> extraArgs,
4353
final String expectedOutputPath) throws IOException {
44-
final File outFile = createTempFile("testFileDiagnostics", ".txt");
45-
ArgumentsBuilder argBuilder = new ArgumentsBuilder();
46-
argBuilder.addInput(inputPath);
47-
argBuilder.addOutput(outFile);
54+
final IOPath outFile = IOUtils.createTempPath("testFileDiagnostics", ".txt");
55+
runFileDiagnosticsTool(new GATKPath(inputPath), extraArgs, outFile);
56+
IntegrationTestSpec.assertEqualTextFiles(outFile.toPath().toFile(), new File(expectedOutputPath));
57+
}
58+
59+
private void runFileDiagnosticsTool(
60+
final IOPath inputPath,
61+
final List<Pair<String, String>> extraArgs,
62+
final IOPath outputPath) {
63+
final ArgumentsBuilder argBuilder = new ArgumentsBuilder();
64+
argBuilder.addInput(inputPath.getRawInputString());
65+
argBuilder.addOutput(outputPath.getRawInputString());
4866
if (extraArgs != null) {
4967
extraArgs.forEach(argPair -> argBuilder.add(argPair.getKey(), argPair.getValue()));
5068
}
5169
runCommandLine(argBuilder.getArgsList());
52-
53-
IntegrationTestSpec.assertEqualTextFiles(outFile, new File(expectedOutputPath));
5470
}
71+
5572
}

src/test/java/org/broadinstitute/hellbender/engine/CRAMSupportIntegrationTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ public Object[][] getRoundTripCRAMTests() {
4343
// read equality; at least some of which are because they are unmapped/unplaced, but have cigar
4444
// strings that both samtools and htsjdk drop when roundtripping
4545
{NA12878_20_21_WGS_bam, b37_reference_20_21, true, false},
46+
// roundtrip a v3.0 file
4647
// this cram is the result of converting the above bam to cram using samtools; once the file is
4748
// converted, we can use full read equality when roundtripping through cram, so we don't need to
4849
// be lenient
4950
{NA12878_20_21_WGS_cram, b37_reference_20_21, false, false},
50-
// roundtrip a v2.1 file
51-
{ largeFileTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram",
52-
b37_reference_20_21, false, false },
51+
// roundtrip a v3.1 file
52+
{NA12878_20_21_WGS_cram_31, b37_reference_20_21, false, false },
5353
};
5454
}
5555

src/test/java/org/broadinstitute/hellbender/testutils/SamtoolsTestUtilsTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ public void testSamtoolsIsAvailable() {
2626

2727
@Test
2828
public void testSamtoolsVersion() {
29+
if (isGATKDockerContainer()) {
30+
// since this test confirms that we're running a recent (specific) version of samtools for cram 3.1
31+
// validation, skip it when running on the GATK Docker container, since the docker doesn't have the same
32+
// recent samtools version that we use to verify cram 3.1 functionality
33+
throw new SkipException("Samtools not available in GATK Docker container");
34+
}
2935
if (!SamtoolsTestUtils.isSamtoolsAvailable()) {
3036
throw new SkipException("Samtools not available on local device");
3137
}

0 commit comments

Comments
 (0)