Skip to content

Commit a33c73a

Browse files
authored
Merge pull request #204 from vgteam/why-unchop-why
Why unchop why
2 parents 706c372 + 1ec3767 commit a33c73a

File tree

6 files changed

+133
-36
lines changed

6 files changed

+133
-36
lines changed

src/algorithms/topological_sort.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ std::vector<handle_t> topological_order(const HandleGraph* g, bool use_heads, bo
102102
max_handle_rank = std::max(max_handle_rank,
103103
number_bool_packing::unpack_number(found));
104104
});
105-
for (uint64_t i = 0; i <= max_handle_rank; ++i) {
105+
for (uint64_t i = 0; i <= max_handle_rank+1; ++i) {
106106
s.push_back(0);
107107
masked_edges_bv.push_back(1);
108108
masked_edges_bv.push_back(1);

src/algorithms/unchop.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,6 @@
66

77
#include "unchop.hpp"
88

9-
#include <unordered_set>
10-
#include <list>
11-
#include <set>
12-
#include <iostream>
13-
#include <sstream>
14-
#include <atomic>
15-
#include "ips4o.hpp"
16-
179
namespace odgi {
1810
namespace algorithms {
1911

@@ -388,4 +380,4 @@ namespace odgi {
388380
return ok.load();
389381
}
390382
}
391-
}
383+
}

src/algorithms/unchop.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,14 @@
44
#include <handlegraph/util.hpp>
55
#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
66
#include <vector>
7-
7+
#include <unordered_set>
8+
#include <list>
9+
#include <set>
10+
#include <iostream>
11+
#include <sstream>
12+
#include <atomic>
13+
14+
#include "ips4o.hpp"
815
#include "simple_components.hpp"
916

1017
namespace odgi {

src/node.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,9 @@ void node_t::apply_ordering(
354354
uint64_t j = 0;
355355
for (uint64_t i = 0; i < decoding.size(); ++i) {
356356
uint64_t old_id = decode(i);
357-
if (old_id) {
358-
dec_v.push_back(get_new_id(old_id));
357+
uint64_t new_id = old_id ? get_new_id(old_id) : 0;
358+
if (new_id) {
359+
dec_v.push_back(new_id);
359360
encoding_map.push_back(j++);
360361
} else {
361362
// this means that the node referred to by this entry has been deleted

src/odgi.cpp

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ step_handle_t graph_t::get_next_step(const step_handle_t& step_handle) const {
390390
auto step_rank = as_integers(step_handle)[1];
391391
if (node.step_is_end(step_rank)) {
392392
node.clear_lock();
393-
return path_front_end(get_path_handle_of_step(step_handle));
393+
return path_end(get_path_handle_of_step(step_handle));
394394
}
395395
nid_t next_id = node.step_next_id(step_rank);
396396
auto next_rank = node.step_next_rank(step_rank);
@@ -762,30 +762,19 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
762762
}
763763

764764
// establish id mapping
765-
uint64_t max_handle_rank = 0;
766-
uint64_t min_handle_rank = std::numeric_limits<uint64_t>::max();
767-
{
768-
uint64_t tmp;
769-
for_each_handle([&](const handle_t& handle) {
770-
tmp = number_bool_packing::unpack_number(handle);
771-
max_handle_rank = std::max(max_handle_rank, tmp);
772-
min_handle_rank = std::min(min_handle_rank, tmp);
773-
});
774-
}
775-
776765
std::vector<std::pair<nid_t, bool>> ids;
777-
// fill even for deleted nodes
766+
// fill even for deleted nodes, which we map to 0
778767
ids.resize(node_v.size(), std::make_pair(0, false));
779768

780769
if (compact_ids) {
781770
for (uint64_t i = 0; i < order->size(); ++i) {
782-
ids[number_bool_packing::unpack_number(order->at(i)) - min_handle_rank] =
771+
ids[number_bool_packing::unpack_number(order->at(i))] =
783772
std::make_pair(i+1,
784773
get_is_reverse(order->at(i)));
785774
}
786775
} else {
787776
for (auto handle : *order) {
788-
ids[number_bool_packing::unpack_number(handle) - min_handle_rank] =
777+
ids[number_bool_packing::unpack_number(handle)] =
789778
std::make_pair(get_id(handle),
790779
get_is_reverse(handle));
791780
}
@@ -794,11 +783,11 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
794783
// helpers to map from current to new id and orientation
795784
auto get_new_id =
796785
[&](uint64_t id) {
797-
return ids[id - 1 - min_handle_rank].first;
786+
return ids[id - 1].first;
798787
};
799788
auto to_flip =
800789
[&](uint64_t id) {
801-
return ids[id - 1 - min_handle_rank].second;
790+
return ids[id - 1].second;
802791
};
803792

804793
// nodes, edges, and path steps
@@ -845,13 +834,27 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
845834
}
846835

847836
// now we actually apply the ordering to our node_v, while removing deleted slots
848-
std::vector<node_t*> new_node_v(order->size());
849-
uint64_t j = 0;
850-
for (uint64_t i = 0; i < node_v.size(); ++i) {
851-
if (node_v[i] != nullptr) {
852-
auto h = (*order)[j];
853-
new_node_v[j++] = &get_node_ref(h);
837+
std::vector<node_t*> new_node_v; //(order->size());
838+
if (compact_ids) {
839+
uint64_t j = 0;
840+
for (uint64_t i = 0; i < node_v.size(); ++i) {
841+
if (node_v[i] != nullptr) {
842+
auto h = (*order)[j++];
843+
new_node_v.push_back(&get_node_ref(h));
844+
}
845+
}
846+
_max_node_id = new_node_v.size();
847+
} else {
848+
uint64_t j = 0;
849+
for (uint64_t i = 0; i < node_v.size(); ++i) {
850+
if (node_v[i] != nullptr) {
851+
auto h = (*order)[j++];
852+
new_node_v.push_back(&get_node_ref(h));
853+
} else {
854+
new_node_v.push_back(nullptr);
855+
}
854856
}
857+
_max_node_id = new_node_v.size();
855858
}
856859
node_v = new_node_v;
857860
deleted_nodes.clear();

src/unittest/simplify.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,5 +185,99 @@ TEST_CASE("Graph simplification reduces a graph with a self inverting -/+ loop",
185185
}
186186
}
187187

188+
TEST_CASE("Graph simplification reduces a graph with a self inverting -/+ loop with paths", "[simplify]") {
189+
graph_t graph;
190+
handle_t n1 = graph.create_handle("CAAATAAG");
191+
handle_t n2 = graph.create_handle("A");
192+
handle_t n3 = graph.create_handle("G");
193+
handle_t n4 = graph.create_handle("T");
194+
handle_t n5 = graph.create_handle("C");
195+
handle_t n6 = graph.create_handle("TTG");
196+
graph.create_edge(n1, n2);
197+
graph.create_edge(n2, n3);
198+
graph.create_edge(graph.flip(n3), n3);
199+
graph.create_edge(n3, n4);
200+
graph.create_edge(n4, n5);
201+
graph.create_edge(n5, n6);
202+
path_handle_t p_x = graph.create_path_handle("x");
203+
path_handle_t p_y = graph.create_path_handle("y");
204+
for (auto& p : { p_x, p_y }) {
205+
for (auto& h : { n1, n2, n3, n4, n5, n6 }) {
206+
graph.append_step(p, h);
207+
}
208+
}
209+
path_handle_t q_y = graph.create_path_handle("q");
210+
for (auto& p : { q_y }) {
211+
for (auto& h : { n6, n5, n4, n3, n2, n1 }) {
212+
graph.append_step(p, graph.flip(h));
213+
}
214+
}
215+
216+
algorithms::unchop(graph);
217+
218+
uint64_t seen_steps = 0;
219+
for (auto& p : { p_x, p_y, q_y }) {
220+
step_handle_t begin = graph.path_begin(p);
221+
step_handle_t end = graph.path_end(p);
222+
for (step_handle_t step = begin;
223+
step != end;
224+
step = graph.get_next_step(step)) {
225+
handle_t h = graph.get_handle_of_step(step);
226+
++seen_steps;
227+
}
228+
}
229+
230+
// sort the graph
231+
graph.apply_ordering(algorithms::topological_order(&graph), true);
232+
233+
// check that iteration still works
234+
uint64_t seen_steps_after_sort = 0;
235+
for (auto& p : { p_x, p_y, q_y }) {
236+
step_handle_t begin = graph.path_begin(p);
237+
step_handle_t end = graph.path_end(p);
238+
for (step_handle_t step = begin;
239+
step != end;
240+
step = graph.get_next_step(step)) {
241+
handle_t h = graph.get_handle_of_step(step);
242+
++seen_steps_after_sort;
243+
}
244+
}
245+
246+
SECTION("The graph is as expected") {
247+
REQUIRE(seen_steps == 6);
248+
REQUIRE(seen_steps == seen_steps_after_sort);
249+
REQUIRE(graph.get_sequence(graph.get_handle(1)) == "CAAATAAGA");
250+
REQUIRE(graph.get_sequence(graph.get_handle(2)) == "GTCTTG");
251+
REQUIRE(graph.has_edge(graph.get_handle(1), graph.get_handle(2)));
252+
REQUIRE(graph.has_edge(graph.flip(graph.get_handle(2)), graph.get_handle(2)));
253+
}
254+
255+
// sort the graph
256+
auto order = algorithms::topological_order(&graph);
257+
std::reverse(order.begin(), order.end());
258+
graph.apply_ordering(order, true);
259+
//graph.optimize(); // breaks!!
260+
261+
// check that iteration still works
262+
uint64_t seen_steps_after_rev = 0;
263+
for (auto& p : { p_x, p_y, q_y }) {
264+
step_handle_t begin = graph.path_begin(p);
265+
step_handle_t end = graph.path_end(p);
266+
for (step_handle_t step = begin;
267+
step != end;
268+
step = graph.get_next_step(step)) {
269+
handle_t h = graph.get_handle_of_step(step);
270+
++seen_steps_after_rev;
271+
}
272+
}
273+
274+
SECTION("The graph is as expected after reverse sorting") {
275+
REQUIRE(seen_steps == seen_steps_after_rev);
276+
REQUIRE(graph.get_sequence(graph.get_handle(2)) == "CAAATAAGA");
277+
REQUIRE(graph.get_sequence(graph.get_handle(1)) == "GTCTTG");
278+
}
279+
280+
}
281+
188282
}
189283
}

0 commit comments

Comments
 (0)