Skip to content

Commit 6ebd95f

Browse files
committed
merge contiguous rgfa fragments
1 parent f23e65a commit 6ebd95f

File tree

2 files changed

+65
-12
lines changed

2 files changed

+65
-12
lines changed

src/rgfa.cpp

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,9 @@ void RGFACover::compute(const PathHandleGraph* graph,
146146
vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals = rgfa_intervals_vector[omp_get_thread_num()];
147147
unordered_map<nid_t, int64_t>& thread_node_to_interval = node_to_interval_vector[omp_get_thread_num()];
148148

149-
vector<const Snarl*> queue = {snarl};
149+
vector<const Snarl*> queue = {snarl};
150+
151+
cerr << "top level snarl " << pb2json(*snarl) << endl;
150152

151153
while(!queue.empty()) {
152154
const Snarl* cur_snarl = queue.back();
@@ -576,19 +578,13 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
576578
}
577579
continue;
578580
}
579-
580-
// add the interval to the local (thread safe) structures
581-
step_handle_t step = trav[uncovered_interval.first];
582-
int64_t interval_length = uncovered_interval.second - uncovered_interval.first;
581+
pair<step_handle_t, step_handle_t> new_interval = make_pair(trav.at(uncovered_interval.first),
582+
graph->get_next_step(trav.at(uncovered_interval.second - 1)));
583583
#ifdef debug
584+
int64_t interval_length = uncovered_interval.second - uncovered_interval.first;
584585
cerr << "adding interval with length " << interval_length << endl;
585586
#endif
586-
for (int64_t i = 0; i < interval_length; ++i) {
587-
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = thread_rgfa_intervals.size();
588-
step = graph->get_next_step(step);
589-
}
590-
thread_rgfa_intervals.push_back(make_pair(trav.at(uncovered_interval.first),
591-
graph->get_next_step(trav.at(uncovered_interval.second - 1))));
587+
add_interval(thread_rgfa_intervals, thread_node_to_interval, new_interval);
592588
}
593589
}
594590

@@ -621,6 +617,54 @@ vector<pair<int64_t, int64_t>> RGFACover::get_uncovered_intervals(const vector<s
621617
return intervals;
622618
}
623619

620+
bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
621+
unordered_map<nid_t, int64_t>& thread_node_to_interval,
622+
const pair<step_handle_t, step_handle_t>& new_interval) {
623+
624+
bool merged = false;
625+
path_handle_t path_handle = graph->get_path_handle_of_step(new_interval.first);
626+
627+
// check the before-first step. if it's in an interval then it must be immediately
628+
// preceeding so we merge the new interval to the end of the found interval
629+
step_handle_t before_first_step = graph->get_previous_step(new_interval.first);
630+
if (before_first_step != graph->path_front_end(graph->get_path_handle_of_step(before_first_step))) {
631+
nid_t prev_node_id = graph->get_id(graph->get_handle_of_step(before_first_step));
632+
if (thread_node_to_interval.count(prev_node_id)) {
633+
pair<step_handle_t, step_handle_t>& prev_interval = thread_rgfa_intervals[thread_node_to_interval[prev_node_id]];
634+
if (graph->get_path_handle_of_step(prev_interval.first) == path_handle) {
635+
assert(prev_interval.second == new_interval.first);
636+
prev_interval.second = new_interval.second;
637+
merged = true;
638+
}
639+
}
640+
}
641+
642+
// check the end step. if it's in an interval then it must be immediately
643+
// following we merge the new interval to the front of the found interval
644+
if (new_interval.second != graph->path_end(graph->get_path_handle_of_step(new_interval.second))) {
645+
nid_t next_node_id = graph->get_id(graph->get_handle_of_step(new_interval.second));
646+
if (thread_node_to_interval.count(next_node_id)) {
647+
pair<step_handle_t, step_handle_t>& next_interval = thread_rgfa_intervals[thread_node_to_interval[next_node_id]];
648+
path_handle_t next_path = graph->get_path_handle_of_step(next_interval.first);
649+
if (graph->get_path_handle_of_step(next_interval.first) == path_handle) {
650+
assert(next_interval.first == new_interval.second);
651+
next_interval.first = new_interval.first;
652+
merged = true;
653+
}
654+
}
655+
}
656+
657+
// add the interval to the local (thread safe) structures
658+
if (!merged) {
659+
for (step_handle_t step = new_interval.first; step != new_interval.second; step = graph->get_next_step(step)) {
660+
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = thread_rgfa_intervals.size();
661+
}
662+
thread_rgfa_intervals.push_back(new_interval);
663+
}
664+
665+
return !merged;
666+
}
667+
624668
int64_t RGFACover::get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval) {
625669
path_handle_t path_handle = graph->get_path_handle_of_step(trav.front());
626670
int64_t coverage = 0;
@@ -635,7 +679,9 @@ int64_t RGFACover::get_coverage(const vector<step_handle_t>& trav, const pair<in
635679

636680
return coverage;
637681
}
638-
682+
683+
684+
639685
// copied pretty much verbatem from
640686
// https://github.com/ComparativeGenomicsToolkit/hal2vg/blob/v1.1.2/clip-vg.cpp#L809-L880
641687
void RGFACover::forwardize_rgfa_paths(MutablePathMutableHandleGraph* mutable_graph) {

src/rgfa.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ class RGFACover {
101101
vector<pair<int64_t, int64_t>> get_uncovered_intervals(const vector<step_handle_t>& trav,
102102
const unordered_map<nid_t, int64_t>& thread_node_to_interval);
103103

104+
// add a new interval into the rgfa_intervals veector and update the node_to_interval map
105+
// if the interval can be merged into an existing, contiguous interval, do that instead
106+
// returns true if a new interval was added, false if an existing interval was updated
107+
bool add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
108+
unordered_map<nid_t, int64_t>& thread_node_to_interval,
109+
const pair<step_handle_t, step_handle_t>& new_interval);
110+
104111
// get the total coverage of a traversal (sum of step lengths)
105112
int64_t get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval);
106113

0 commit comments

Comments
 (0)