Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions .github/workflows/gatk-tests-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: Test GATK 4 only tests, no WDL
concurrency:
group: gatk-tests-ci-${{ github.ref }}
cancel-in-progress: true
on:
push:
branches:
- 'master'
pull_request:
workflow_dispatch:
inputs:
branch:
description: 'Branch to run workflow on'
required: true
default: 'master'

env:
TERM: dumb
GRADLE_OPTS: "-Xmx2048m -Dorg.gradle.daemon=false"
HELLBENDER_TEST_INPUTS: gs://hellbender/test/resources/
HELLBENDER_TEST_STAGING: gs://hellbender-test-logs/staging/
HELLBENDER_TEST_LOGS: /hellbender-test-logs/build_reports/
HELLBENDER_TEST_PROJECT: broad-dsde-dev
TEST_VERBOSITY: minimal
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
## This workaround is necessary since there is no equivalent to the old TRAVIS_SECURE_ENVIRONMENT variable that indicated
## if a run was privileged and had secrets. Since the GCP credentials are necessary for all tests in order to upload their,
## results that makes them a reasonable proxy for testing the credentials of this entire execution. https://github.com/actions/runner/issues/520
check-secrets:
name: check if the environment has privileges
outputs:
google-credentials: ${{ steps.google-credentials.outputs.defined }}
runs-on: ubuntu-latest
steps:
- id: google-credentials
env:
GCP_CREDENTIALS: ${{ secrets.GCP_CREDENTIALS }}
if: "${{ env.GCP_CREDENTIALS != '' }}"
run: echo defined=true >> $GITHUB_OUTPUT


#Run our non-docker tests
test:
runs-on: ubuntu-latest
needs: check-secrets
strategy:
matrix:
java: [ 17.0.6+10 ]
experimental: [ false ]
scalaVersion: [ 2.13 ]
testType: [ integration, unit, variantcalling ]
fail-fast: false
continue-on-error: ${{ matrix.experimental }}
env:
TEST_TYPE: ${{ matrix.testType }}
SCALA_VERSION: ${{ matrix.scalaVersion }}
name: Java ${{ matrix.Java }} build and test ${{ matrix.testType }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.inputs.branch || github.ref }}
- uses: ./.github/actions/purge-runner-disk
- name: 'Set up java ${{ matrix.Java }}'
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.Java }}
distribution: 'temurin'
cache: gradle

- name: 'Compile with Gradle'
run: |
./gradlew compileJava ; ./gradlew installDist

#Google Cloud stuff
- id: 'gcloud-auth'
if: needs.check-secrets.outputs.google-credentials == 'true'
uses: google-github-actions/auth@v0
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}
project_id: ${{ env.HELLBENDER_TEST_PROJECT }}
create_credentials_file: true

- name: "export the credentials for GATK tests"
if: needs.check-secrets.outputs.google-credentials == 'true'
run: echo "HELLBENDER_JSON_SERVICE_ACCOUNT_KEY=${{ steps.auth.outputs.credentials_file_path }}" >> $GITHUB_ENV

- name: 'Set up Cloud SDK'
if: needs.check-secrets.outputs.google-credentials == 'true'
uses: google-github-actions/setup-gcloud@v2

- name: pull lfs files
run: git lfs pull

- name: compile test code
run: ./gradlew compileTestJava

- name: run-tests
if: ${{ needs.check-secrets.outputs.google-credentials == 'true' || matrix.testType != 'cloud'}}
id: jacoco-tests
run: |
./gradlew --daemon -Dscala.version=${{ env.SCALA_VERSION }} jacocoTestReport

- uses: ./.github/actions/upload-gatk-test-results
if: always()
with:
warnPR: ${{ github.event_name == 'pull_request' && steps.jacoco-tests.outcome != 'success' }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
job-matrix-id: ${{ github.run_id }}.1${{ strategy.job-index }}
repo-path: ${{ github.ref_name }}_${{ github.run_id }}.1${{ strategy.job-index }}
bot-comment-key: ${{ secrets.GATK_BOT_COMMENT_KEY }}
identifier: Java ${{ matrix.Java }} build and test ${{ matrix.testType }}
only-artifact: ${{ needs.check-secrets.outputs.google-credentials != 'true' }}


Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.samtools.CigarOperator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.clipping.ReadClipper;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.*;
import java.util.stream.Collectors;

/**
* Counts number of soft-clipped reads that support each allele. *
* <h3> Caveats </h3>
* This annotation can only be calculated by Mutect2 and HaplotypeCaller. In case
* the soft clipping is reverted during the flow
*/

@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of soft clipped reads per allele")

public class ClippedReadsCount implements GenotypeAnnotation {
private final static Logger logger = LogManager.getLogger(ClippedReadsCount.class);

public List<String> getKeyNames() {
return Arrays.asList(GATKVCFConstants.SOFT_CLIP_LEFT_COUNT_KEY, GATKVCFConstants.SOFT_CLIP_RIGHT_COUNT_KEY);
}

@Override
public VCFCompoundHeaderLine.SupportedHeaderLineType annotationType() {
return GenotypeAnnotation.super.annotationType();
}

@Override
public List<VCFCompoundHeaderLine> getDescriptions() {
return GenotypeAnnotation.super.getDescriptions();
}

@Override
public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, GenotypeBuilder gb, AlleleLikelihoods<GATKRead, Allele> likelihoods) {

List<GATKRead> allReads = likelihoods.sampleEvidence(likelihoods.indexOfSample(g.getSampleName())).stream().collect(Collectors.toList());
allReads.addAll(likelihoods.filteredSampleEvidence(likelihoods.indexOfSample(g.getSampleName())).stream().collect(Collectors.toList()));
List<GATKRead> leftClippedReads = allReads.stream().filter( rd -> (rd.getStart() <= rd.getEnd()) && rd.overlaps(vc) && wasReadClipped(rd, false )).collect(Collectors.toList());
List<GATKRead> rightClippedReads = allReads.stream().filter( rd -> (rd.getStart() <= rd.getEnd()) && rd.overlaps(vc) && wasReadClipped(rd, true )).collect(Collectors.toList());
ReadPileup leftClippedPileup = new ReadPileup(ref.getInterval(),leftClippedReads);
Map<Allele, Integer> leftClippedCounts = PileupBasedAlleles.getPileupAlleleCounts(vc, leftClippedPileup);
final int[] counts = new int[vc.getNAlleles()];
counts[0] = leftClippedCounts.get(vc.getReference()); //first one in AD is always ref
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = leftClippedCounts.get(vc.getAlternateAllele(i));
}
gb.attribute(getKeyNames().get(0), counts.clone());

ReadPileup rightClippedPileup = new ReadPileup(ref.getInterval(),rightClippedReads);
Map<Allele, Integer> rightClippedCounts = PileupBasedAlleles.getPileupAlleleCounts(vc, rightClippedPileup);
for (int i = 0 ; i < counts.length; i++){
counts[i] = 0;
}
counts[0] = rightClippedCounts.get(vc.getReference()); //first one in AD is always ref
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = rightClippedCounts.get(vc.getAlternateAllele(i));
}

gb.attribute(getKeyNames().get(1), counts);
}

//collect reads that are now softclipped or were softclipped before reversion of the softclipping or were softclipped before hardclipping
//softclipped bases
private boolean wasReadClipped(final GATKRead read, boolean rightClipping){

if ((!rightClipping) &&
read.hasAttribute(ReadClipper.ORIGINAL_SOFTCLIP_TAG) &&
read.getAttributeAsString(ReadClipper.ORIGINAL_SOFTCLIP_TAG).contains(ReadClipper.LEFT_SOFTCLIPPING_MARK)) {
return true;
}
if ((!rightClipping) && (!read.isUnmapped()) && (read.getCigar().getFirstCigarElement().getOperator()== CigarOperator.SOFT_CLIP)){
return true;
}

if ((rightClipping) &&
read.hasAttribute(ReadClipper.ORIGINAL_SOFTCLIP_TAG) &&
read.getAttributeAsString(ReadClipper.ORIGINAL_SOFTCLIP_TAG).contains(ReadClipper.RIGHT_SOFTCLIPPING_MARK)) {
return true;
}
if ((rightClipping) && (!read.isUnmapped()) && (read.getCigar().getLastCigarElement().getOperator()== CigarOperator.SOFT_CLIP)){
return true;
}
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of MQ0 reads per allele")
public class MappingQuality0Count implements GenotypeAnnotation{
private final static Logger logger = LogManager.getLogger(ClippedReadsCount.class);

public List<String> getKeyNames() {
return Arrays.asList(GATKVCFConstants.MQ0_COUNT_KEY);
}

@Override
public VCFCompoundHeaderLine.SupportedHeaderLineType annotationType() {
return GenotypeAnnotation.super.annotationType();
}

@Override
public List<VCFCompoundHeaderLine> getDescriptions() {
return GenotypeAnnotation.super.getDescriptions();
}

@Override
public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, GenotypeBuilder gb, AlleleLikelihoods<GATKRead, Allele> likelihoods) {

List<GATKRead> allReads = likelihoods.sampleEvidence(likelihoods.indexOfSample(g.getSampleName())).stream().collect(Collectors.toList());
allReads.addAll(likelihoods.filteredSampleEvidence(likelihoods.indexOfSample(g.getSampleName())).stream().collect(Collectors.toList()));
List<GATKRead> mq0Reads = allReads.stream().filter( rd -> (rd.getStart() <= rd.getEnd()) && rd.overlaps(vc) && (rd.getMappingQuality() == 0)).collect(Collectors.toList());
ReadPileup mq0Pileup = new ReadPileup(ref.getInterval(),mq0Reads);
Map<Allele, Integer> mq0Counts = PileupBasedAlleles.getPileupAlleleCounts(vc, mq0Pileup);
final int[] counts = new int[vc.getNAlleles()];
counts[0] = mq0Counts.get(vc.getReference()); //first one in AD is always ref
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = mq0Counts.get(vc.getAlternateAllele(i));
}
gb.attribute(getKeyNames().get(0), counts);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.*;

@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Mean counts of mismatches per allele")
public class MismatchCount implements GenotypeAnnotation {
private final static OneShotLogger logger = new OneShotLogger(MismatchCount.class);

public List<String> getKeyNames() {
return Arrays.asList(GATKVCFConstants.NM_COUNT_KEY);
}

@Override
public VCFCompoundHeaderLine.SupportedHeaderLineType annotationType() {
return GenotypeAnnotation.super.annotationType();
}

@Override
public List<VCFCompoundHeaderLine> getDescriptions() {
return GenotypeAnnotation.super.getDescriptions();
}

@Override
public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, GenotypeBuilder gb, AlleleLikelihoods<GATKRead, Allele> likelihoods) {

Map<Allele, List<Integer>> mismatchCounts = new HashMap<>();
for (Allele al: vc.getAlleles()){
mismatchCounts.put(al, new ArrayList<>());
}

if (!validateInputs(likelihoods)){
logger.warn(String.format("%s tag is missing. MismatchCount annotation not calculated. Perhaps the code ran without %s flag?",
ReadUtils.NUM_MISMATCH_TAG,
AssemblyBasedCallerArgumentCollection.ADD_MISMATCH_COUNT_ANNOTATION_LONG_NAME));
return;
}
fillMDFromLikelihoods(vc, ref, likelihoods, mismatchCounts);
final int[] counts = new int[vc.getNAlleles()];

if (mismatchCounts.get(vc.getReference()).size()==0){
counts[0] = 0;
} else {
counts[0] = (int) MathUtils.median(mismatchCounts.get(vc.getReference()));
}
for (int i = 0; i < vc.getNAlleles() -1; i++) {
if (mismatchCounts.get(vc.getAlternateAllele(i)).size()==0){
counts[i+1] = 0;
} else {
counts[i + 1] = (int) MathUtils.median(mismatchCounts.get(vc.getAlternateAllele(i)));
}
}
gb.attribute(getKeyNames().get(0), counts);
}


protected void fillMDFromLikelihoods(VariantContext vc, ReferenceContext ref, AlleleLikelihoods<GATKRead, Allele> likelihoods, Map<Allele,List<Integer>> mismatchCounts ) {
for (final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele : likelihoods.bestAllelesBreakingTies()) {
final GATKRead read = bestAllele.evidence;
final Allele allele = bestAllele.allele;
if (bestAllele.isInformative() && isUsableRead(read, vc) && vc.hasAllele(allele)) {
final Integer value = getElementForRead(read, vc, ref);
mismatchCounts.get(allele).add(value);
}
}
}

private boolean validateInputs(final AlleleLikelihoods<GATKRead, Allele> likelihoods) {

for (int sampleId = 0; sampleId < likelihoods.numberOfSamples(); sampleId++ ){
for (GATKRead read : likelihoods.sampleEvidence(sampleId)){
if (!read.hasAttribute(ReadUtils.NUM_MISMATCH_TAG)){
return false;
}
}
}
return true;
}
private Integer getElementForRead(final GATKRead read, final VariantContext vc, final ReferenceContext ref){
return read.getAttributeAsInteger(ReadUtils.NUM_MISMATCH_TAG);
}
/**
* Can the read be used in comparative tests between ref / alt bases?
*
* @param read the read to consider
* @param vc the variant to be annotated
* @return true if this read is meaningful for comparison, false otherwise
*/
private boolean isUsableRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
}

}
Loading
Loading