Skip to content

Commit

Permalink
Adds option to validator to skip mate validation (#1025)
Browse files Browse the repository at this point in the history
Adds option to validator to skip mate validation (although we still validate mate cigars)
  • Loading branch information
eitanbanks authored Nov 8, 2017
1 parent 060047e commit 68189f3
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
24 changes: 23 additions & 1 deletion src/main/java/htsjdk/samtools/SamFileValidator.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public class SamFileValidator {
private SAMSortOrderChecker orderChecker;
private Set<Type> errorsToIgnore;
private boolean ignoreWarnings;
private boolean skipMateValidation;
private boolean bisulfiteSequenced;
private IndexValidationStringency indexValidationStringency;
private boolean sequenceDictionaryEmptyAndNoWarningEmitted;
Expand All @@ -114,6 +115,7 @@ public SamFileValidator(final PrintWriter out, final int maxTempFiles) {
this.errorsToIgnore = EnumSet.noneOf(Type.class);
this.verbose = false;
this.ignoreWarnings = false;
this.skipMateValidation = false;
this.bisulfiteSequenced = false;
this.sequenceDictionaryEmptyAndNoWarningEmitted = false;
this.numWarnings = 0;
Expand All @@ -137,6 +139,23 @@ public void setIgnoreWarnings(final boolean ignoreWarnings) {
this.ignoreWarnings = ignoreWarnings;
}

/**
* Sets whether or not we should run mate validation beyond the mate cigar check, which
* is useful in extreme edge cases that would require a lot of memory to do the validation.
*
* @param skipMateValidation should this tool skip mate validation
*/
public void setSkipMateValidation(final boolean skipMateValidation) {
this.skipMateValidation = skipMateValidation;
}

/**
* @return true if the validator will skip mate validation, otherwise false
*/
public boolean getSkipMateValidation() {
return skipMateValidation;
}

/**
* Outputs validation summary report to out.
*
Expand Down Expand Up @@ -242,7 +261,6 @@ private void validateSamFile(final SamReader samReader, final PrintWriter out) {
}
}


/**
* Report on reads marked as paired, for which the mate was not found.
*/
Expand Down Expand Up @@ -508,6 +526,10 @@ private void validateMateFields(final SAMRecord record, final long recordNumber)
}
validateMateCigar(record, recordNumber);

if (skipMateValidation) {
return;
}

final PairEndInfo pairEndInfo = pairEndInfoByName.remove(record.getReferenceIndex(), record.getReadName());
if (pairEndInfo == null) {
pairEndInfoByName.put(record.getMateReferenceIndex(), record.getReadName(), new PairEndInfo(record, recordNumber));
Expand Down
33 changes: 30 additions & 3 deletions src/test/java/htsjdk/samtools/ValidateSamFileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,29 @@ public void testPairedRecords() throws IOException {
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_UNALIGNED_MATE_START.getHistogramString()).getValue(), 1.0);
}

@Test
public void testSkipMateValidation() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();

for (int i = 0; i < 5; i++) {
samBuilder.addPair(String.valueOf(i), i, i, i + 100);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setMateReferenceName("*");
records.next().setMateAlignmentStart(Integer.MAX_VALUE);
records.next().setMateAlignmentStart(records.next().getAlignmentStart() + 1);
records.next().setMateNegativeStrandFlag(!records.next().getReadNegativeStrandFlag());
records.next().setMateReferenceIndex(records.next().getReferenceIndex() + 1);
records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag());

final Histogram<String> results = executeValidationWithErrorIgnoring(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE, Collections.EMPTY_LIST, true);

Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_NEG_STRAND.getHistogramString()));
Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_UNMAPPED.getHistogramString()));
Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_MATE_ALIGNMENT_START.getHistogramString()));
Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_MATE_REF_INDEX.getHistogramString()));
}

@Test(dataProvider = "missingMateTestCases")
public void testMissingMate(final SAMFileHeader.SortOrder sortOrder) throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(true, sortOrder);
Expand Down Expand Up @@ -584,17 +607,21 @@ public void validateBamFileTerminationTest(final File file, final SAMValidationE

private Histogram<String> executeValidation(final SamReader samReader, final ReferenceSequenceFile reference,
final IndexValidationStringency stringency) throws IOException {
return executeValidationWithErrorIgnoring(samReader, reference, stringency, Collections.EMPTY_LIST);
return executeValidationWithErrorIgnoring(samReader, reference, stringency, Collections.EMPTY_LIST, false);
}

private Histogram<String> executeValidationWithErrorIgnoring(final SamReader samReader, final ReferenceSequenceFile reference,
final IndexValidationStringency stringency, Collection<SAMValidationError.Type> ignoringError) throws IOException {
private Histogram<String> executeValidationWithErrorIgnoring(final SamReader samReader,
final ReferenceSequenceFile reference,
final IndexValidationStringency stringency,
final Collection<SAMValidationError.Type> ignoringError,
final boolean skipMateValidation) throws IOException {
final File outFile = File.createTempFile("validation", ".txt");
outFile.deleteOnExit();

final PrintWriter out = new PrintWriter(outFile);
final SamFileValidator samFileValidator = new SamFileValidator(out, 8000);
samFileValidator.setIndexValidationStringency(stringency).setErrorsToIgnore(ignoringError);
samFileValidator.setSkipMateValidation(skipMateValidation);
samFileValidator.validateSamFileSummary(samReader, reference);

final LineNumberReader reader = new LineNumberReader(new FileReader(outFile));
Expand Down

0 comments on commit 68189f3

Please sign in to comment.