Skip to content

Commit f5fc68c

Browse files
authored
Change how per-base errors are counted when calling consensuses. (#166)
* Change how per-base errors are counted when calling consensuses. When the final consensus call is a no-call (N), we instead count the number of errors relative to the most likely consensus call, whereas we previously used the total depth.
1 parent 1052b2f commit f5fc68c

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

src/main/scala/com/fulcrumgenomics/umi/ConsensusTags.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ object ConsensusTags {
5050
object PerBase {
5151
/** The per-base number of raw-reads contributing to the consensus (stored as a short[]). */
5252
val RawReadCount = "cd" // consensus depth
53-
/** The number of bases at each position that disagreed with the final consensus call (stored as a short[]). */
53+
/** The number of bases at each position that disagreed with the final consensus call (stored as a short[]). If the
54+
* final consensus call is a no call (N), then we use the most likely consensus base instead of the final call. */
5455
val RawReadErrors = "ce" // consensus errors
5556

5657
// Duplex versions of the above tags for the two single strand consensus reads

src/main/scala/com/fulcrumgenomics/umi/VanillaUmiConsensusCaller.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -187,23 +187,19 @@ class VanillaUmiConsensusCaller(override val readNamePrefix: String,
187187
}
188188

189189
// Call the consensus and do any additional filtering
190+
val (rawBase, rawQual) = builder.call()
190191
val (base, qual) = {
191-
if (builder.contributions < this.options.minReads) {
192-
(NoCall, NotEnoughReadsQual)
193-
}
194-
else {
195-
val (b,q) = builder.call()
196-
if (q < this.options.minConsensusBaseQuality) (NoCall, TooLowQualityQual) else (b,q)
197-
198-
}
192+
if (builder.contributions < this.options.minReads) (NoCall, NotEnoughReadsQual)
193+
else if (rawQual < this.options.minConsensusBaseQuality) (NoCall, TooLowQualityQual)
194+
else (rawBase, rawQual)
199195
}
200196

201197
consensusBases(positionInRead) = base
202198
consensusQuals(positionInRead) = qual
203199

204200
// Generate the values for depth and count of errors
205201
val depth = builder.contributions
206-
val errors = if (base == NoCall) depth else depth - builder.observations(base)
202+
val errors = if (rawBase == NoCall) depth else depth - builder.observations(rawBase)
207203
consensusDepths(positionInRead) = if (depth > Short.MaxValue) Short.MaxValue else depth.toShort
208204
consensusErrors(positionInRead) = if (errors > Short.MaxValue) Short.MaxValue else errors.toShort
209205

src/test/scala/com/fulcrumgenomics/umi/VanillaUmiConsensusCallerTest.scala

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@ import com.fulcrumgenomics.util.NumericTypes._
3333
import htsjdk.samtools.util.CloserUtil
3434
import htsjdk.samtools.{SAMRecordSetBuilder, SAMUtils}
3535
import net.jafama.FastMath._
36+
import org.scalatest.OptionValues
3637

3738
/**
3839
* Tests for ConsensusCaller.
3940
*/
40-
class VanillaUmiConsensusCallerTest extends UnitSpec {
41+
class VanillaUmiConsensusCallerTest extends UnitSpec with OptionValues {
4142
/** Helper function to make a set of consensus caller options. */
4243
def cco = VanillaUmiConsensusCallerOptions
4344

@@ -423,4 +424,18 @@ class VanillaUmiConsensusCallerTest extends UnitSpec {
423424
recs should have size 11
424425
recs.map(_.getCigarString).distinct.sorted shouldBe Seq("25M1D25M", "5S20M1D25M", "5S20M1D20M5H", "25M1D20M5S").sorted
425426
}
427+
428+
it should "calculate the # of errors relative to the most likely consensus call, even when the final call is an N" in {
429+
// NB: missing last base on the first read, which causes an N no-call, but errors should still be 1/3
430+
val call = cc(cco(minReads=4)).consensusCall(Seq(
431+
src("GATTACAN", Array(20,20,20,20,20,20,20,20)),
432+
src("GATTACAG", Array(20,20,20,20,20,20,20,20)),
433+
src("GATTACAG", Array(20,20,20,20,20,20,20,20)),
434+
src("GATTACAT", Array(20,20,20,20,20,20,20,20))
435+
)).value
436+
437+
call.baseString shouldBe "GATTACAN"
438+
call.depths should contain theSameElementsInOrderAs Seq(4,4,4,4,4,4,4,3)
439+
call.errors should contain theSameElementsInOrderAs Seq(0,0,0,0,0,0,0,1)
440+
}
426441
}

0 commit comments

Comments
 (0)