Skip to content

Commit 099c255

Browse files
committed
feat: WFA only precision/recall alignment!
- fix: end WFA early if max score is reached - change: only consider redoing alignment if FN is an INDEL (not SUB) - fix: don't parse WFA path if alignment didn't finish - feat: removed backup ordinary alignment
1 parent e50a05c commit 099c255

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

src/dist.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ int wfa_calc_prec_recall_aln(
203203
offs[0][0][s][graph->qseqs[0].length()-1] = -1; // main diag
204204
ptrs[0][0][s][graph->qseqs[0].length()-1] = PTR_MAT;
205205

206-
while (true) {
206+
while (s <= g.max_dist) {
207207

208208
// EXTEND WAVEFRONT
209209
// NOTE: because the graph is a topologically sorted DAG, we can safely assume that all
@@ -620,22 +620,19 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
620620
std::vector< std::vector< std::vector< std::vector<uint32_t> > > > wfa_ptrs;
621621
std::vector< std::vector< std::vector< std::vector<int> > > > wfa_offs;
622622

623-
std::unordered_map<idx4, idx4> ptrs;
624-
int aln_score = calc_prec_recall_aln(graph, ptrs, false);
625623
int wfa_score = wfa_calc_prec_recall_aln(graph, wfa_ptrs, wfa_offs, print);
626-
assert(aln_score == wfa_score);
627-
std::vector<idx4> path = parse_wfa_path(graph, wfa_score, wfa_ptrs, wfa_offs, false);
628624
bool aligned = wfa_score <= g.max_dist;
629625
if (aligned) { // alignment succeeded
630-
calc_prec_recall(graph, path, truth_hi, false);
626+
std::vector<idx4> path = parse_wfa_path(graph, wfa_score, wfa_ptrs, wfa_offs, print);
627+
calc_prec_recall(graph, path, truth_hi, print);
631628
done = true;
632629
}
633630

634631
// NOTE: if alignment failed because it was too expensive, this can only be caused by a FN
635632
// truth variant, since query variants can be skipped and the reference sections match.
636633
// Try alignments without some of the largest truth variants
637634

638-
// NOTE: if we did align successfully and found a FN variant, re-evaluate all FP and FN variants
635+
// NOTE: if we did align successfully and found a FN INDEL, re-evaluate all FP and FN variants
639636
// besides the largest FN. The motivation for this is that large FN truth SVs will have a sync
640637
// group that extends pretty far left and right, "swallowing" other correct variant calls.
641638
// Since correctness is determined per sync group, many TP SNP calls can't be identified unless
@@ -644,7 +641,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
644641
for (int tni = 0; tni < graph->tnodes; tni++) {
645642
if (graph->ttypes[tni] != TYPE_REF) {
646643
int tvar_idx = graph->tidxs[tni];
647-
if (not aligned || tvars->errtypes[truth_hi][tvar_idx] == ERRTYPE_FN) {
644+
if (not aligned or (tvars->errtypes[truth_hi][tvar_idx] == ERRTYPE_FN
645+
and std::max(tvars->refs[tvar_idx].size(),
646+
tvars->alts[tvar_idx].size()) > 1)){
648647
done = false;
649648
int tvar_size = std::max(tvars->alts[tvar_idx].size(), tvars->refs[tvar_idx].size());
650649
exclude_sizes.push_back(std::make_pair(tvar_size, tvar_idx));
@@ -686,8 +685,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
686685
// retry alignment (with one large variant excluded, as well as all TPs)
687686
std::shared_ptr<Graph> retry_graph(new Graph(scs, sc_idx, ref, ctg, truth_hi));
688687
if (print) retry_graph->print();
689-
std::unordered_map<idx4, idx4> retry_ptrs;
690-
int dist = calc_prec_recall_aln(retry_graph, retry_ptrs, false);
688+
std::vector< std::vector< std::vector< std::vector<uint32_t> > > > retry_ptrs;
689+
std::vector< std::vector< std::vector< std::vector<int> > > > retry_offs;
690+
int dist = wfa_calc_prec_recall_aln(retry_graph, retry_ptrs, retry_offs, print);
691691
exclude_dists.push_back(std::make_pair(dist, exclude_sizes[retry].second));
692692
}
693693

0 commit comments

Comments
 (0)