@@ -203,7 +203,7 @@ int wfa_calc_prec_recall_aln(
203
203
offs[0 ][0 ][s][graph->qseqs [0 ].length ()-1 ] = -1 ; // main diag
204
204
ptrs[0 ][0 ][s][graph->qseqs [0 ].length ()-1 ] = PTR_MAT;
205
205
206
- while (true ) {
206
+ while (s <= g. max_dist ) {
207
207
208
208
// EXTEND WAVEFRONT
209
209
// NOTE: because the graph is a topologically sorted DAG, we can safely assume that all
@@ -620,22 +620,19 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
620
620
std::vector< std::vector< std::vector< std::vector<uint32_t > > > > wfa_ptrs;
621
621
std::vector< std::vector< std::vector< std::vector<int > > > > wfa_offs;
622
622
623
- std::unordered_map<idx4, idx4> ptrs;
624
- int aln_score = calc_prec_recall_aln (graph, ptrs, false );
625
623
int wfa_score = wfa_calc_prec_recall_aln (graph, wfa_ptrs, wfa_offs, print);
626
- assert (aln_score == wfa_score);
627
- std::vector<idx4> path = parse_wfa_path (graph, wfa_score, wfa_ptrs, wfa_offs, false );
628
624
bool aligned = wfa_score <= g.max_dist ;
629
625
if (aligned) { // alignment succeeded
630
- calc_prec_recall (graph, path, truth_hi, false );
626
+ std::vector<idx4> path = parse_wfa_path (graph, wfa_score, wfa_ptrs, wfa_offs, print);
627
+ calc_prec_recall (graph, path, truth_hi, print);
631
628
done = true ;
632
629
}
633
630
634
631
// NOTE: if alignment failed because it was too expensive, this can only be caused by a FN
635
632
// truth variant, since query variants can be skipped and the reference sections match.
636
633
// Try alignments without some of the largest truth variants
637
634
638
- // NOTE: if we did align successfully and found a FN variant , re-evaluate all FP and FN variants
635
+ // NOTE: if we did align successfully and found a FN INDEL , re-evaluate all FP and FN variants
639
636
// besides the largest FN. The motivation for this is that large FN truth SVs will have a sync
640
637
// group that extends pretty far left and right, "swallowing" other correct variant calls.
641
638
// Since correctness is determined per sync group, many TP SNP calls can't be identified unless
@@ -644,7 +641,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
644
641
for (int tni = 0 ; tni < graph->tnodes ; tni++) {
645
642
if (graph->ttypes [tni] != TYPE_REF) {
646
643
int tvar_idx = graph->tidxs [tni];
647
- if (not aligned || tvars->errtypes [truth_hi][tvar_idx] == ERRTYPE_FN) {
644
+ if (not aligned or (tvars->errtypes [truth_hi][tvar_idx] == ERRTYPE_FN
645
+ and std::max (tvars->refs [tvar_idx].size (),
646
+ tvars->alts [tvar_idx].size ()) > 1 )){
648
647
done = false ;
649
648
int tvar_size = std::max (tvars->alts [tvar_idx].size (), tvars->refs [tvar_idx].size ());
650
649
exclude_sizes.push_back (std::make_pair (tvar_size, tvar_idx));
@@ -686,8 +685,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
686
685
// retry alignment (with one large variant excluded, as well as all TPs)
687
686
std::shared_ptr<Graph> retry_graph (new Graph (scs, sc_idx, ref, ctg, truth_hi));
688
687
if (print) retry_graph->print ();
689
- std::unordered_map<idx4, idx4> retry_ptrs;
690
- int dist = calc_prec_recall_aln (retry_graph, retry_ptrs, false );
688
+ std::vector< std::vector< std::vector< std::vector<uint32_t > > > > retry_ptrs;
689
+ std::vector< std::vector< std::vector< std::vector<int > > > > retry_offs;
690
+ int dist = wfa_calc_prec_recall_aln (retry_graph, retry_ptrs, retry_offs, print);
691
691
exclude_dists.push_back (std::make_pair (dist, exclude_sizes[retry].second ));
692
692
}
693
693
0 commit comments