@@ -128,6 +128,7 @@ void RGFACover::compute(const PathHandleGraph* graph,
128128 this ->num_ref_intervals = this ->rgfa_intervals .size ();
129129
130130#ifdef debug
131+ #pragma omp critical(cerr)
131132 cerr << " [rgfa] Selected " << rgfa_intervals.size () << " rank=0 reference paths" << endl;
132133#endif
133134
@@ -148,8 +149,6 @@ void RGFACover::compute(const PathHandleGraph* graph,
148149
149150 vector<const Snarl*> queue = {snarl};
150151
151- cerr << " top level snarl " << pb2json (*snarl) << endl;
152-
153152 while (!queue.empty ()) {
154153 const Snarl* cur_snarl = queue.back ();
155154 queue.pop_back ();
@@ -168,24 +167,23 @@ void RGFACover::compute(const PathHandleGraph* graph,
168167 }
169168 });
170169
171- // now we need to fold up the thread covers
170+ // now we need to fold up the thread covers
172171 for (int64_t t = 0 ; t < thread_count; ++t) {
173- int64_t offset = this ->rgfa_intervals .size ();
174-
175- // todo: figure out one-shot stl invocation to move
176- for (int64_t j = 0 ; j < rgfa_intervals_vector[t].size (); ++j) {
177- this ->rgfa_intervals .push_back (rgfa_intervals_vector[t][j]);
178- }
179172#ifdef debug
173+ #pragma omp critical(cerr)
180174 cerr << " Adding " << rgfa_intervals_vector[t].size () << " rgfa intervals from thread " << t << endl;
181175#endif
182- rgfa_intervals_vector[t].clear ();
183-
184- for (const auto & node_interval : node_to_interval_vector[t]) {
185- this ->node_to_interval [node_interval.first ] = node_interval.second + offset;
176+ // important to go through function rather than do a raw copy since
177+ // inter-top-level snarl merging may need to happen
178+ for (int64_t j = 0 ; j < rgfa_intervals_vector[t].size (); ++j) {
179+ // the true flag at the end disables the overlap check. since they were computed
180+ // in separate threads, snarls can overlap by a single node
181+ add_interval (this ->rgfa_intervals , this ->node_to_interval , rgfa_intervals_vector[t][j], true );
186182 }
183+ rgfa_intervals_vector[t].clear ();
187184 node_to_interval_vector[t].clear ();
188185 }
186+
189187
190188 // todo: we could optionally go through uncovered nodes and try to greedily search
191189 // for rgfa intervals at this point, since it seems for some graphs there are
@@ -345,7 +343,10 @@ void RGFACover::load(const PathHandleGraph* graph,
345343
346344void RGFACover::apply (MutablePathMutableHandleGraph* mutable_graph) {
347345 assert (this ->graph == static_cast <PathHandleGraph*>(mutable_graph));
348-
346+ #ifdef debug
347+ cerr << " applying rGFA cover with " << this ->num_ref_intervals << " ref intervals "
348+ << " and " << this ->rgfa_intervals .size () << " total intervals" << endl;
349+ #endif
349350 // index paths isued in rgfa cover
350351 unordered_map<step_handle_t , int64_t > step_to_offset;
351352 unordered_set<path_handle_t > source_path_set;
@@ -484,6 +485,7 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
484485 }
485486 }
486487#ifdef debug
488+ #pragma omp critical(cerr)
487489 cerr << " doing snarl " << pb2json (snarl.start ()) << " -" << pb2json (snarl.end ()) << " with " << travs.size () << " travs" << endl;
488490#endif
489491
@@ -537,9 +539,12 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
537539 const pair<int64_t , int64_t >& uncovered_interval = best_stats_fragment.fragment ;
538540
539541#ifdef debug
542+ #pragma omp critical(cerr)
543+ {
540544 cerr << " best trav: " ;
541545 for (auto xx : trav) cerr << " " << graph->get_id (graph->get_handle_of_step (xx));
542546 cerr << endl << " uncovered interval [" << uncovered_interval.first << " ," << uncovered_interval.second << " ]" <<endl;
547+ }
543548#endif
544549
545550
@@ -582,6 +587,7 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
582587 graph->get_next_step (trav.at (uncovered_interval.second - 1 )));
583588#ifdef debug
584589 int64_t interval_length = uncovered_interval.second - uncovered_interval.first ;
590+ #pragma omp critical(cerr)
585591 cerr << " adding interval with length " << interval_length << endl;
586592#endif
587593 add_interval (thread_rgfa_intervals, thread_node_to_interval, new_interval);
@@ -619,9 +625,17 @@ vector<pair<int64_t, int64_t>> RGFACover::get_uncovered_intervals(const vector<s
619625
620626bool RGFACover::add_interval (vector<pair<step_handle_t , step_handle_t >>& thread_rgfa_intervals,
621627 unordered_map<nid_t , int64_t >& thread_node_to_interval,
622- const pair<step_handle_t , step_handle_t >& new_interval) {
628+ const pair<step_handle_t , step_handle_t >& new_interval,
629+ bool global) {
623630
631+ #ifdef debug
632+ #pragma omp critical(cerr)
633+ cerr << " adding interval " << graph->get_path_name (graph->get_path_handle_of_step (new_interval.first ))
634+ << " :" << graph->get_id (graph->get_handle_of_step (new_interval.first ))
635+ << " -" << graph->get_id (graph->get_handle_of_step (new_interval.second )) << endl;
636+ #endif
624637 bool merged = false ;
638+ int64_t merged_interval_idx = -1 ;
625639 path_handle_t path_handle = graph->get_path_handle_of_step (new_interval.first );
626640
627641 // check the before-first step. if it's in an interval then it must be immediately
@@ -630,11 +644,19 @@ bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_
630644 if (before_first_step != graph->path_front_end (graph->get_path_handle_of_step (before_first_step))) {
631645 nid_t prev_node_id = graph->get_id (graph->get_handle_of_step (before_first_step));
632646 if (thread_node_to_interval.count (prev_node_id)) {
633- pair<step_handle_t , step_handle_t >& prev_interval = thread_rgfa_intervals[thread_node_to_interval[prev_node_id]];
647+ int64_t prev_idx = thread_node_to_interval[prev_node_id];
648+ pair<step_handle_t , step_handle_t >& prev_interval = thread_rgfa_intervals[prev_idx];
634649 if (graph->get_path_handle_of_step (prev_interval.first ) == path_handle) {
635- assert (prev_interval.second == new_interval.first );
650+ #ifdef debug
651+ #pragma omp critical(cerr)
652+ cerr << " prev interval found" << graph->get_path_name (graph->get_path_handle_of_step (prev_interval.first ))
653+ << " :" << graph->get_id (graph->get_handle_of_step (prev_interval.first ))
654+ << " -" << graph->get_id (graph->get_handle_of_step (prev_interval.second )) << endl;
655+ #endif
656+ assert (global || prev_interval.second == new_interval.first );
636657 prev_interval.second = new_interval.second ;
637658 merged = true ;
659+ merged_interval_idx = prev_idx;
638660 }
639661 }
640662 }
@@ -644,24 +666,32 @@ bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_
644666 if (new_interval.second != graph->path_end (graph->get_path_handle_of_step (new_interval.second ))) {
645667 nid_t next_node_id = graph->get_id (graph->get_handle_of_step (new_interval.second ));
646668 if (thread_node_to_interval.count (next_node_id)) {
647- pair<step_handle_t , step_handle_t >& next_interval = thread_rgfa_intervals[thread_node_to_interval[next_node_id]];
669+ int64_t next_idx = thread_node_to_interval[next_node_id];
670+ pair<step_handle_t , step_handle_t >& next_interval = thread_rgfa_intervals[next_idx];
648671 path_handle_t next_path = graph->get_path_handle_of_step (next_interval.first );
649672 if (graph->get_path_handle_of_step (next_interval.first ) == path_handle) {
650- assert (next_interval.first == new_interval.second );
673+ #ifdef debug
674+ #pragma omp critical(cerr)
675+ cerr << " next interval found" << graph->get_path_name (graph->get_path_handle_of_step (next_interval.first ))
676+ << " :" << graph->get_id (graph->get_handle_of_step (next_interval.first ))
677+ << " -" << graph->get_id (graph->get_handle_of_step (next_interval.second )) << endl;
678+ #endif
679+ assert (global || next_interval.first == new_interval.second );
651680 next_interval.first = new_interval.first ;
652681 merged = true ;
682+ merged_interval_idx = next_idx;
653683 }
654684 }
655685 }
656686
657687 // add the interval to the local (thread safe) structures
658688 if (!merged) {
659- for (step_handle_t step = new_interval.first ; step != new_interval.second ; step = graph->get_next_step (step)) {
660- thread_node_to_interval[graph->get_id (graph->get_handle_of_step (step))] = thread_rgfa_intervals.size ();
661- }
689+ merged_interval_idx = thread_rgfa_intervals.size ();
662690 thread_rgfa_intervals.push_back (new_interval);
663691 }
664-
692+ for (step_handle_t step = new_interval.first ; step != new_interval.second ; step = graph->get_next_step (step)) {
693+ thread_node_to_interval[graph->get_id (graph->get_handle_of_step (step))] = merged_interval_idx;
694+ }
665695 return !merged;
666696}
667697
0 commit comments