Skip to content

Commit 7ccb000

Browse files
committed
simplify / refine path cover table (but not finalized)
1 parent 1771d0f commit 7ccb000

File tree

4 files changed

+86
-5
lines changed

4 files changed

+86
-5
lines changed

src/deconstructor.cpp

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,10 +1053,14 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t
10531053
ref_info.parent_allele = 0;
10541054
ref_info.parent_len = v.alleles[0].length();
10551055
ref_info.parent_ref_len = v.alleles[0].length();
1056+
ref_info.parent_path_interval = trav_steps[ref_trav_idx];
1057+
ref_info.parent_ref_interval = trav_steps[ref_trav_idx];
1058+
ref_info.lv0_ref_name = v.sequenceName;
10561059
ref_info.lv0_ref_name = v.sequenceName;
10571060
ref_info.lv0_ref_start = v.position;
10581061
ref_info.lv0_ref_len = v.alleles[0].length();
10591062
ref_info.lv0_alt_len = v.alleles[ref_info.parent_allele].length();
1063+
ref_info.lv = 0;
10601064
}
10611065
v.info["PA"].push_back(std::to_string(ref_info.parent_allele));
10621066
v.info["PL"].push_back(std::to_string(ref_info.parent_len));
@@ -1101,14 +1105,20 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t
11011105
}
11021106
}
11031107
}
1108+
// update the path cover
1109+
#pragma omp critical (off_ref_info)
1110+
{
1111+
this->update_path_cover(trav_steps, trav_clusters, in_nesting_info->has_ref ? *in_nesting_info : ref_info);
1112+
}
1113+
11041114
// remember the reference path of this variant site
11051115
// for fasta output, along with information about its parent
11061116
int64_t ref_trav = ref_travs[i];
11071117
const PathInterval& ref_path_interval = trav_steps[ref_trav];
11081118
if (!this->ref_paths.count(graph->get_path_name(graph->get_path_handle_of_step(ref_path_interval.first)))) {
11091119
#pragma omp critical (off_ref_info)
11101120
{
1111-
this->off_ref_sequences[ref_path_interval] = *in_nesting_info;
1121+
//this->off_ref_sequences[ref_path_interval] = *in_nesting_info;
11121122
}
11131123
}
11141124
}
@@ -1480,6 +1490,62 @@ void Deconstructor::deconstruct(vector<string> ref_paths, const PathPositionHand
14801490
write_variants(cout, snarl_manager);
14811491
}
14821492

1493+
void Deconstructor::update_path_cover(const vector<pair<step_handle_t, step_handle_t>>& trav_steps,
1494+
const vector<vector<int>>& traversal_clusters,
1495+
const NestingInfo& nesting_info) const {
1496+
// for every cluster, add off-reference sequences
1497+
// todo: are these in the best order?
1498+
for (const vector<int>& trav_cluster : traversal_clusters) {
1499+
if (trav_cluster[0] >= trav_steps.size()) {
1500+
assert(this->star_allele);
1501+
continue;
1502+
}
1503+
const PathInterval& path_interval = trav_steps.at(trav_cluster[0]);
1504+
int64_t start = graph->get_position_of_step(path_interval.first);
1505+
int64_t end = graph->get_position_of_step(path_interval.second);
1506+
bool reversed = start > end;
1507+
1508+
// scan the interval storing any uncovered sub-intervals
1509+
vector<PathInterval> sub_intervals;
1510+
bool open_interval = false;
1511+
step_handle_t prev_step;
1512+
string prev_name;
1513+
for (step_handle_t step = path_interval.first; step != path_interval.second;
1514+
step = (reversed ? graph->get_previous_step(step) : graph->get_next_step(step))) {
1515+
if (this->node_cover.count(graph->get_id(graph->get_handle_of_step(step)))) {
1516+
if (open_interval) {
1517+
if (!this->ref_paths.count(prev_name)) {
1518+
// expects inclusive interval
1519+
step_handle_t end_step = reversed ? graph->get_next_step(step) : graph->get_previous_step(step);
1520+
sub_intervals.push_back(make_pair(prev_step, end_step));
1521+
}
1522+
open_interval = false;
1523+
}
1524+
} else {
1525+
if (open_interval == false) {
1526+
prev_step = step;
1527+
prev_name = graph->get_path_name(graph->get_path_handle_of_step(prev_step));
1528+
open_interval = true;
1529+
}
1530+
this->node_cover.insert(graph->get_id(graph->get_handle_of_step(step)));
1531+
}
1532+
}
1533+
1534+
if (open_interval && !this->ref_paths.count(prev_name)) {
1535+
// expects inclusive interval
1536+
step_handle_t end_step = reversed ? graph->get_next_step(path_interval.second) :
1537+
graph->get_previous_step(path_interval.second);
1538+
sub_intervals.push_back(make_pair(prev_step, end_step));
1539+
}
1540+
1541+
// update the path cover
1542+
for (const PathInterval& interval : sub_intervals) {
1543+
// todo: store something
1544+
this->off_ref_sequences[interval] = nesting_info;
1545+
}
1546+
}
1547+
}
1548+
14831549
static string resolve_path_name(const PathPositionHandleGraph* graph,
14841550
const PathInterval& path_interval,
14851551
int64_t& out_start,
@@ -1615,11 +1681,10 @@ void Deconstructor::save_off_ref_sequences(const string& out_fasta_filename) con
16151681
graph->get_handle_of_step(nesting_info.parent_ref_interval.second));
16161682

16171683

1618-
out_metadata_file << path_name << "\t"
1684+
out_metadata_file << Paths::strip_subrange(path_name) << "\t"
1685+
<< pos1 << "\t"
1686+
<< pos2 << "\t"
16191687
<< snarl_name << "\t"
1620-
<< par_path_name << "\t"
1621-
<< par_snarl_name << "\t"
1622-
<< nesting_info.lv << "\t"
16231688
<< nesting_info.lv0_ref_name << "\t"
16241689
<< nesting_info.lv0_ref_start << "\t"
16251690
<< (nesting_info.lv0_ref_start + nesting_info.lv0_ref_len) << "\t"

src/deconstructor.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ class Deconstructor : public VCFOutputCaller {
142142
const vector<string>& trav_to_name,
143143
const vector<int>& gbwt_phases) const;
144144

145+
// update off-ref path cover
146+
void update_path_cover(const vector<pair<step_handle_t, step_handle_t>>& trav_steps,
147+
const vector<vector<int>>& traversal_clusters,
148+
const NestingInfo& nesting_info) const;
149+
150+
145151
// the underlying context-getter
146152
vector<nid_t> get_context(
147153
step_handle_t start_step,
@@ -217,6 +223,9 @@ class Deconstructor : public VCFOutputCaller {
217223

218224
// keep track of off-reference reference sequences to print to fasta at the end
219225
mutable unordered_map<PathInterval, NestingInfo> off_ref_sequences;
226+
227+
// keep track of path cover for computing off_ref_sequences
228+
mutable unordered_set<nid_t> node_cover;
220229
};
221230

222231

src/traversal_finder.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ string graph_interval_to_string(const HandleGraph* graph, const handle_t& start_
4040
return handle_to_string(start_handle) + handle_to_string(end_handle);
4141
}
4242

43+
string path_interval_to_string(const PathHandleGraph* graph, const PathInterval& path_interval) {
44+
string s = graph->get_path_name(graph->get_path_handle_of_step(path_interval.first));
45+
return s + ":" + graph_interval_to_string(graph, graph->get_handle_of_step(path_interval.first),
46+
graph->get_handle_of_step(path_interval.second));
47+
}
48+
4349
PathBasedTraversalFinder::PathBasedTraversalFinder(const PathHandleGraph& g, SnarlManager& sm) : graph(g), snarlmanager(sm){
4450
}
4551

src/traversal_finder.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ using PathInterval = pair<step_handle_t, step_handle_t>;
4242
string traversal_to_string(const PathHandleGraph* graph, const Traversal& traversal, int64_t max_steps = 10);
4343
// replaces pb2json(snarl)
4444
string graph_interval_to_string(const HandleGraph* graph, const handle_t& start_handle, const handle_t& end_handle);
45+
string path_interval_to_string(const PathHandleGraph* graph, const PathInterval& path_interval);
4546

4647
/**
4748
* Represents a strategy for finding traversals of (nested) sites. Polymorphic

0 commit comments

Comments
 (0)