From 1ce218bfc0e5213d47965721ae40e7614462ef9b Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Tue, 24 Oct 2017 00:27:22 -0700 Subject: [PATCH] [AVOCADO-274] Add flag to emit all genotyped variants, regardless of quality. Resolves #274. --- .../avocado/cli/BiallelicGenotyper.scala | 7 ++++++- .../avocado/util/HardFilterGenotypes.scala | 19 ++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala index 5ac75a77..a47eb688 100644 --- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala +++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala @@ -186,6 +186,10 @@ class BiallelicGenotyperArgs extends Args4jBase with ADAMSaveAnyArgs with Parque name = "-score_all_sites", usage = "If provided, scores all sites, even non-variant sites. Emits a gVCF styled output.") var scoreAllSites = false + @Args4jOption(required = false, + name = "-emit_all_genotypes", + usage = "Emits genotypes for all sites that were discovered.") + var emitAllGenotypes = false // required by HardFilterGenotypesArgs var maxSnpPhredStrandBias: Float = -1.0f @@ -254,7 +258,8 @@ class BiallelicGenotyper( // hard filter the genotypes val filteredGenotypes = HardFilterGenotypes(genotypes, args, - filterRefGenotypes = !args.scoreAllSites) + filterRefGenotypes = !args.scoreAllSites, + emitAllGenotypes = (args.scoreAllSites || args.emitAllGenotypes)) // save the variant calls filteredGenotypes.saveAsParquet(args) diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala index 6a7dd0aa..890ae867 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala @@ -171,11 +171,13 @@ private[avocado] object HardFilterGenotypes extends Serializable { * @param grdd GenotypeRDD to filter. * @param args The hard filter configuration to apply. * @param filterRefGenotypes If true, discards homozygous ref calls. + * @param emitAllGenotypes If true, emits all genotypes. * @return A new GenotypeRDD of hard filtered genotypes. */ def apply(grdd: GenotypeRDD, args: HardFilterGenotypesArgs, - filterRefGenotypes: Boolean = true): GenotypeRDD = { + filterRefGenotypes: Boolean = true, + emitAllGenotypes: Boolean = true): GenotypeRDD = { // make snp and indel filters val snpFilters = buildSnpHardFilters(args) @@ -246,7 +248,8 @@ private[avocado] object HardFilterGenotypes extends Serializable { minQuality, snpFilters, indelFilters, - filterRefGenotypes)) + filterRefGenotypes, + emitAllGenotypes = emitAllGenotypes)) }).addHeaderLines(filterHeaders) } @@ -624,6 +627,7 @@ private[avocado] object HardFilterGenotypes extends Serializable { * @param snpFilters Collection of filters to apply to emitted SNPs. * @param indelFilters Collection of filters to apply to emitted INDELs. * @param filterRefGenotypes If true, discards hom-ref calls. + * @param emitAllGenotypes If true, emits all genotypes. * @return If genotype is high enough quality to emit, a hard filtered * genotype. */ @@ -632,11 +636,16 @@ private[avocado] object HardFilterGenotypes extends Serializable { minQuality: Int, snpFilters: Iterable[Genotype => Option[String]], indelFilters: Iterable[Genotype => Option[String]], - filterRefGenotypes: Boolean): Option[Genotype] = { + filterRefGenotypes: Boolean, + emitAllGenotypes: Boolean = false): Option[Genotype] = { // first, apply emission filters - val optGenotype = Some(genotype) - .filter(emitGenotypeFilter(_, minQuality, filterRefGenotypes)) + val optGenotype = if (emitAllGenotypes) { + Some(genotype) + } else { + Some(genotype) + .filter(emitGenotypeFilter(_, minQuality, filterRefGenotypes)) + } // then, check whether we are a snp or indel and apply hard filters if (genotype.getVariant.getAlternateAllele != null) {