Skip to content

Commit 7ce91eb

Browse files
committed
merge adjacent intervals from different threads
1 parent 89af9b6 commit 7ce91eb

File tree

2 files changed

+55
-24
lines changed

2 files changed

+55
-24
lines changed

src/rgfa.cpp

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ void RGFACover::compute(const PathHandleGraph* graph,
128128
this->num_ref_intervals = this->rgfa_intervals.size();
129129

130130
#ifdef debug
131+
#pragma omp critical(cerr)
131132
cerr << "[rgfa] Selected " << rgfa_intervals.size() << " rank=0 reference paths" << endl;
132133
#endif
133134

@@ -148,8 +149,6 @@ void RGFACover::compute(const PathHandleGraph* graph,
148149

149150
vector<const Snarl*> queue = {snarl};
150151

151-
cerr << "top level snarl " << pb2json(*snarl) << endl;
152-
153152
while(!queue.empty()) {
154153
const Snarl* cur_snarl = queue.back();
155154
queue.pop_back();
@@ -168,24 +167,23 @@ void RGFACover::compute(const PathHandleGraph* graph,
168167
}
169168
});
170169

171-
// now we need to fold up the thread covers
170+
// now we need to fold up the thread covers
172171
for (int64_t t = 0; t < thread_count; ++t) {
173-
int64_t offset = this->rgfa_intervals.size();
174-
175-
// todo: figure out one-shot stl invocation to move
176-
for (int64_t j = 0; j < rgfa_intervals_vector[t].size(); ++j) {
177-
this->rgfa_intervals.push_back(rgfa_intervals_vector[t][j]);
178-
}
179172
#ifdef debug
173+
#pragma omp critical(cerr)
180174
cerr << "Adding " << rgfa_intervals_vector[t].size() << " rgfa intervals from thread " << t << endl;
181175
#endif
182-
rgfa_intervals_vector[t].clear();
183-
184-
for (const auto& node_interval : node_to_interval_vector[t]) {
185-
this->node_to_interval[node_interval.first] = node_interval.second + offset;
176+
// important to go through function rather than do a raw copy since
177+
// inter-top-level snarl merging may need to happen
178+
for (int64_t j = 0; j < rgfa_intervals_vector[t].size(); ++j) {
179+
// the true flag at the end disables the overlap check. since they were computed
180+
// in separate threads, snarls can overlap by a single node
181+
add_interval(this->rgfa_intervals, this->node_to_interval, rgfa_intervals_vector[t][j], true);
186182
}
183+
rgfa_intervals_vector[t].clear();
187184
node_to_interval_vector[t].clear();
188185
}
186+
189187

190188
// todo: we could optionally go through uncovered nodes and try to greedily search
191189
// for rgfa intervals at this point, since it seems for some graphs there are
@@ -345,7 +343,10 @@ void RGFACover::load(const PathHandleGraph* graph,
345343

346344
void RGFACover::apply(MutablePathMutableHandleGraph* mutable_graph) {
347345
assert(this->graph == static_cast<PathHandleGraph*>(mutable_graph));
348-
346+
#ifdef debug
347+
cerr << "applying rGFA cover with " << this->num_ref_intervals << " ref intervals "
348+
<< " and " << this->rgfa_intervals.size() << " total intervals" << endl;
349+
#endif
349350
// index paths isued in rgfa cover
350351
unordered_map<step_handle_t, int64_t> step_to_offset;
351352
unordered_set<path_handle_t> source_path_set;
@@ -484,6 +485,7 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
484485
}
485486
}
486487
#ifdef debug
488+
#pragma omp critical(cerr)
487489
cerr << "doing snarl " << pb2json(snarl.start()) << "-" << pb2json(snarl.end()) << " with " << travs.size() << " travs" << endl;
488490
#endif
489491

@@ -537,9 +539,12 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
537539
const pair<int64_t, int64_t>& uncovered_interval = best_stats_fragment.fragment;
538540

539541
#ifdef debug
542+
#pragma omp critical(cerr)
543+
{
540544
cerr << "best trav: ";
541545
for (auto xx : trav) cerr << " " << graph->get_id(graph->get_handle_of_step(xx));
542546
cerr << endl << "uncovered interval [" << uncovered_interval.first << "," << uncovered_interval.second << "]" <<endl;
547+
}
543548
#endif
544549

545550

@@ -582,6 +587,7 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
582587
graph->get_next_step(trav.at(uncovered_interval.second - 1)));
583588
#ifdef debug
584589
int64_t interval_length = uncovered_interval.second - uncovered_interval.first;
590+
#pragma omp critical(cerr)
585591
cerr << "adding interval with length " << interval_length << endl;
586592
#endif
587593
add_interval(thread_rgfa_intervals, thread_node_to_interval, new_interval);
@@ -619,9 +625,17 @@ vector<pair<int64_t, int64_t>> RGFACover::get_uncovered_intervals(const vector<s
619625

620626
bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
621627
unordered_map<nid_t, int64_t>& thread_node_to_interval,
622-
const pair<step_handle_t, step_handle_t>& new_interval) {
628+
const pair<step_handle_t, step_handle_t>& new_interval,
629+
bool global) {
623630

631+
#ifdef debug
632+
#pragma omp critical(cerr)
633+
cerr << "adding interval " << graph->get_path_name(graph->get_path_handle_of_step(new_interval.first))
634+
<< ":" << graph->get_id(graph->get_handle_of_step(new_interval.first))
635+
<< "-" << graph->get_id(graph->get_handle_of_step(new_interval.second)) << endl;
636+
#endif
624637
bool merged = false;
638+
int64_t merged_interval_idx = -1;
625639
path_handle_t path_handle = graph->get_path_handle_of_step(new_interval.first);
626640

627641
// check the before-first step. if it's in an interval then it must be immediately
@@ -630,11 +644,19 @@ bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_
630644
if (before_first_step != graph->path_front_end(graph->get_path_handle_of_step(before_first_step))) {
631645
nid_t prev_node_id = graph->get_id(graph->get_handle_of_step(before_first_step));
632646
if (thread_node_to_interval.count(prev_node_id)) {
633-
pair<step_handle_t, step_handle_t>& prev_interval = thread_rgfa_intervals[thread_node_to_interval[prev_node_id]];
647+
int64_t prev_idx = thread_node_to_interval[prev_node_id];
648+
pair<step_handle_t, step_handle_t>& prev_interval = thread_rgfa_intervals[prev_idx];
634649
if (graph->get_path_handle_of_step(prev_interval.first) == path_handle) {
635-
assert(prev_interval.second == new_interval.first);
650+
#ifdef debug
651+
#pragma omp critical(cerr)
652+
cerr << "prev interval found" << graph->get_path_name(graph->get_path_handle_of_step(prev_interval.first))
653+
<< ":" << graph->get_id(graph->get_handle_of_step(prev_interval.first))
654+
<< "-" << graph->get_id(graph->get_handle_of_step(prev_interval.second)) << endl;
655+
#endif
656+
assert(global || prev_interval.second == new_interval.first);
636657
prev_interval.second = new_interval.second;
637658
merged = true;
659+
merged_interval_idx = prev_idx;
638660
}
639661
}
640662
}
@@ -644,24 +666,32 @@ bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_
644666
if (new_interval.second != graph->path_end(graph->get_path_handle_of_step(new_interval.second))) {
645667
nid_t next_node_id = graph->get_id(graph->get_handle_of_step(new_interval.second));
646668
if (thread_node_to_interval.count(next_node_id)) {
647-
pair<step_handle_t, step_handle_t>& next_interval = thread_rgfa_intervals[thread_node_to_interval[next_node_id]];
669+
int64_t next_idx = thread_node_to_interval[next_node_id];
670+
pair<step_handle_t, step_handle_t>& next_interval = thread_rgfa_intervals[next_idx];
648671
path_handle_t next_path = graph->get_path_handle_of_step(next_interval.first);
649672
if (graph->get_path_handle_of_step(next_interval.first) == path_handle) {
650-
assert(next_interval.first == new_interval.second);
673+
#ifdef debug
674+
#pragma omp critical(cerr)
675+
cerr << "next interval found" << graph->get_path_name(graph->get_path_handle_of_step(next_interval.first))
676+
<< ":" << graph->get_id(graph->get_handle_of_step(next_interval.first))
677+
<< "-" << graph->get_id(graph->get_handle_of_step(next_interval.second)) << endl;
678+
#endif
679+
assert(global || next_interval.first == new_interval.second);
651680
next_interval.first = new_interval.first;
652681
merged = true;
682+
merged_interval_idx = next_idx;
653683
}
654684
}
655685
}
656686

657687
// add the interval to the local (thread safe) structures
658688
if (!merged) {
659-
for (step_handle_t step = new_interval.first; step != new_interval.second; step = graph->get_next_step(step)) {
660-
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = thread_rgfa_intervals.size();
661-
}
689+
merged_interval_idx = thread_rgfa_intervals.size();
662690
thread_rgfa_intervals.push_back(new_interval);
663691
}
664-
692+
for (step_handle_t step = new_interval.first; step != new_interval.second; step = graph->get_next_step(step)) {
693+
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = merged_interval_idx;
694+
}
665695
return !merged;
666696
}
667697

src/rgfa.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ class RGFACover {
106106
// returns true if a new interval was added, false if an existing interval was updated
107107
bool add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
108108
unordered_map<nid_t, int64_t>& thread_node_to_interval,
109-
const pair<step_handle_t, step_handle_t>& new_interval);
109+
const pair<step_handle_t, step_handle_t>& new_interval,
110+
bool global = false);
110111

111112
// get the total coverage of a traversal (sum of step lengths)
112113
int64_t get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval);

0 commit comments

Comments
 (0)