Skip to content

Commit e6c702c

Browse files
committed
Revert "trade space for time and store edges in their own hacked vector"
This reverts commit 52dcdf3.
1 parent 965174b commit e6c702c

File tree

3 files changed

+82
-52
lines changed

3 files changed

+82
-52
lines changed

src/node.cpp

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,55 @@
55
namespace odgi {
66

77
uint64_t node_t::sequence_size(void) const {
8-
return sequence.size();
8+
return seq_bytes();
99
}
1010

11-
const std::string node_t::get_sequence(void) const {
12-
return sequence;
11+
const std::string node_t::sequence(void) const {
12+
const std::string res((char*)bytes.data()+seq_start(), seq_bytes());
13+
return res;
1314
}
1415

1516
void node_t::set_sequence(const std::string& seq) {
16-
sequence = seq;
17+
if (seq.size() > seq_bytes()) {
18+
bytes.reserve(bytes.size()+seq.size()-seq_bytes());
19+
bytes.insert(bytes.begin()+seq_start(), seq.size() - seq_bytes(), 0);
20+
set_seq_bytes(seq.size());
21+
} else if (seq.size() < seq_bytes()) {
22+
bytes.erase(bytes.begin()+seq_start(), bytes.begin()+seq_start()+(seq_bytes()-seq.size()));;
23+
set_seq_bytes(seq.size());
24+
}
25+
memcpy(bytes.data()+seq_start(), seq.c_str(), seq.size());
1726
}
1827

19-
const dyn::hacked_vector& node_t::get_edges(void) const {
20-
return edges;
28+
std::vector<uint64_t> node_t::edges(void) const {
29+
std::vector<uint64_t> res;
30+
if (edge_count()) {
31+
res.resize(edge_count()*EDGE_RECORD_LENGTH);
32+
sqvarint::decode(res.data(),
33+
(uint8_t*)bytes.data()+edge_start(),
34+
edge_count()*EDGE_RECORD_LENGTH);
35+
}
36+
return res;
2137
}
2238

2339
void node_t::add_edge(const uint64_t& relative_id, const uint64_t& edge_type) {
2440
//std::cerr << "add edge " << "relative_id " << relative_id << " edge_type " << edge_type << std::endl;
25-
edges.push_back(relative_id);
26-
edges.push_back(edge_type);
41+
uint64_t add_edge_bytes = sqvarint::length({relative_id, edge_type});
42+
bytes.reserve(bytes.size()+add_edge_bytes);
43+
bytes.insert(bytes.begin()+edge_start(), add_edge_bytes, 0);
44+
sqvarint::encode({relative_id, edge_type}, bytes.data()+edge_start());
45+
set_edge_bytes(edge_bytes() + add_edge_bytes);
46+
set_edge_count(edge_count() + 1);
2747
}
2848

2949
void node_t::remove_edge(const uint64_t& rank) {
3050
assert(rank < edge_count());
31-
uint64_t offset = EDGE_RECORD_LENGTH*rank;
32-
for (uint8_t i = 0; i < EDGE_RECORD_LENGTH; ++i) {
33-
edges.remove(offset);
34-
}
51+
uint64_t edge_offset = edge_start() + sqvarint::bytes(bytes.data()+edge_start(), EDGE_RECORD_LENGTH*rank);
52+
// a bit redundant
53+
uint64_t j = sqvarint::bytes(bytes.data()+edge_offset, EDGE_RECORD_LENGTH);
54+
bytes.erase(bytes.begin()+edge_offset, bytes.begin()+edge_offset+j);
55+
set_edge_count(edge_count()-1);
56+
set_edge_bytes(edge_bytes()-j);
3557
}
3658

3759
void node_t::add_path_step(const uint64_t& path_id, const bool& is_rev,
@@ -117,54 +139,56 @@ void node_t::remove_path_step(const uint64_t& rank) {
117139
}
118140

119141
void node_t::clear(void) {
120-
sequence.clear();
121-
clear_edges();
142+
set_seq_bytes(0);
143+
set_edge_bytes(0);
144+
set_edge_count(0);
145+
bytes.clear();
122146
clear_path_steps();
123147
}
124148

125-
void node_t::clear_edges(void) {
126-
dyn::hacked_vector null_iv;
127-
edges = null_iv;
128-
}
129-
130149
void node_t::clear_path_steps(void) {
131150
dyn::hacked_vector null_iv;
132151
path_steps = null_iv;
133152
}
134153

135154
uint64_t node_t::serialize(std::ostream& out) const {
136155
uint64_t written = 0;
137-
size_t seq_size = sequence.size();
138-
out.write((char*)&seq_size, sizeof(size_t));
139-
written += sizeof(size_t);
140-
out << sequence;
141-
written += sequence.size();
142-
written += edges.serialize(out);
156+
out.write((char*)&_seq_bytes, sizeof(uint32_t));
157+
out.write((char*)&_edge_bytes, sizeof(uint32_t));
158+
out.write((char*)&_edge_count, sizeof(uint32_t));
159+
written += sizeof(uint32_t)*4 + sizeof(uint8_t);
160+
uint64_t node_size = bytes.size();
161+
out.write((char*)&node_size, sizeof(node_size));
162+
written += sizeof(uint64_t);
163+
out.write((char*)bytes.data(), node_size*sizeof(uint8_t));
164+
written += sizeof(uint8_t)*node_size;
143165
written += path_steps.serialize(out);
144166
return written;
145167
}
146168

147169
void node_t::load(std::istream& in) {
148-
size_t seq_size;
149-
in.read((char*)&seq_size, sizeof(size_t));
150-
sequence.resize(seq_size);
151-
in.read((char*)sequence.c_str(), seq_size);
152-
edges.load(in);
170+
in.read((char*)&_seq_bytes, sizeof(uint32_t));
171+
in.read((char*)&_edge_bytes, sizeof(uint32_t));
172+
in.read((char*)&_edge_count, sizeof(uint32_t));
173+
uint64_t node_size = 0;
174+
in.read((char*)&node_size, sizeof(node_size));
175+
bytes.resize(node_size);
176+
in.read((char*)bytes.data(), node_size*sizeof(uint8_t));
153177
path_steps.load(in);
154178
}
155179

156180
void node_t::display(void) const {
157-
std::cerr << "seq " << sequence << " "
181+
std::cerr << "self_bytes " << bytes.size() << " "
182+
<< "seq_bytes " << seq_bytes() << " "
183+
<< "seq " << sequence() << " "
184+
<< "edge_start " << edge_start() << " "
158185
<< "edge_count " << edge_count() << " "
186+
<< "edge_bytes " << edge_bytes() << " "
159187
<< "path_count " << path_count() << " | ";
160-
if (edge_count()) {
161-
for (uint64_t i = 0; i < edge_count(); ++i) {
162-
std::cerr
163-
<< edges.at(i) << ":"
164-
<< edges.at(i+1) << " ";
165-
}
188+
for (auto i : bytes) {
189+
std::cerr << (int) i << " ";
166190
}
167-
std::cerr << "| ";
191+
std::cerr << " | ";
168192
if (path_count()) {
169193
for (uint64_t i = 0; i < path_count(); ++i) {
170194
std::cerr

src/node.hpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,21 @@ const uint8_t PATH_RECORD_LENGTH = 5;
2020

2121
/// A node object with the sequence, its edge lists, and paths
2222
class node_t {
23-
std::string sequence;
24-
dyn::hacked_vector edges;
23+
std::vector<uint8_t> bytes;
2524
dyn::hacked_vector path_steps;
25+
uint32_t _seq_bytes = 0;
26+
uint32_t _edge_bytes = 0;
27+
uint32_t _edge_count = 0;
2628
public:
2729
inline const uint64_t seq_start(void) const { return 0; }
28-
inline const uint64_t seq_bytes(void) const { return sequence.size(); }
29-
inline const uint64_t edge_count(void) const { return edges.size()/EDGE_RECORD_LENGTH; }
30+
inline const uint64_t seq_bytes(void) const { return _seq_bytes; }
31+
inline const uint64_t edge_start(void) const { return _seq_bytes; }
32+
inline const uint64_t edge_count(void) const { return _edge_count; }
33+
inline const uint64_t edge_bytes(void) const { return _edge_bytes; }
3034
inline const uint64_t path_count(void) const { return path_steps.size()/PATH_RECORD_LENGTH; }
35+
inline void set_seq_bytes(const uint64_t& i) { _seq_bytes = i; }
36+
inline void set_edge_count(const uint64_t& i) { _edge_count = i; }
37+
inline void set_edge_bytes(const uint64_t& i) { _edge_bytes = i; }
3138
struct step_t {
3239
uint64_t data[5] = { 0, 0, 0, 0, 0 }; // PATH_RECORD_LENGTH
3340
step_t(void) { }
@@ -58,9 +65,9 @@ class node_t {
5865
inline void set_next_rank(const uint64_t& i) { data[4] = i; }
5966
};
6067
uint64_t sequence_size(void) const;
61-
const std::string get_sequence(void) const;
68+
const std::string sequence(void) const;
6269
void set_sequence(const std::string& seq);
63-
const dyn::hacked_vector& get_edges(void) const;
70+
std::vector<uint64_t> edges(void) const;
6471
void add_edge(const uint64_t& relative_id, const uint64_t& edge_type);
6572
void remove_edge(const uint64_t& rank);
6673
void add_path_step(const uint64_t& path_id, const bool& is_rev,
@@ -79,7 +86,6 @@ class node_t {
7986
void remove_path_step(const uint64_t& rank);
8087
void update_path_last_bytes(void);
8188
void clear(void);
82-
void clear_edges(void);
8389
void clear_path_steps(void);
8490
uint64_t serialize(std::ostream& out) const;
8591
void load(std::istream& in);

src/odgi.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ size_t graph_t::get_length(const handle_t& handle) const {
4949

5050
/// Get the sequence of a node, presented in the handle's local forward orientation.
5151
std::string graph_t::get_sequence(const handle_t& handle) const {
52-
auto& seq = node_v.at(number_bool_packing::unpack_number(handle)).get_sequence();
52+
auto& seq = node_v.at(number_bool_packing::unpack_number(handle)).sequence();
5353
return (get_is_reverse(handle) ? reverse_complement(seq) : seq);
5454
}
5555

@@ -60,7 +60,7 @@ bool graph_t::follow_edges_impl(const handle_t& handle, bool go_left, const std:
6060
const node_t& node = node_v.at(number_bool_packing::unpack_number(handle));
6161
bool is_rev = get_is_reverse(handle);
6262
nid_t node_id = get_id(handle);
63-
auto& node_edges = node.get_edges();
63+
const std::vector<uint64_t> node_edges = node.edges();
6464
if (node_edges.size() == 0) return true;
6565
for (uint64_t i = 0; i < node_edges.size(); i+=2) {
6666
// unpack the edge
@@ -603,7 +603,7 @@ void graph_t::destroy_edge(const handle_t& left_h, const handle_t& right_h) {
603603
nid_t right_node_id = get_id(right_h);
604604
nid_t left_node_id = get_id(left_h);
605605

606-
auto& left_node_edges = left_node.get_edges();
606+
std::vector<uint64_t> left_node_edges = left_node.edges();
607607
bool found_edge = false;
608608
for (uint64_t i = 0; i < left_node_edges.size(); ) {
609609
uint64_t other_id = edge_delta_to_id(left_node_id, left_node_edges.at(i++));
@@ -622,7 +622,7 @@ void graph_t::destroy_edge(const handle_t& left_h, const handle_t& right_h) {
622622
}
623623
}
624624

625-
auto& right_node_edges = right_node.get_edges();
625+
std::vector<uint64_t> right_node_edges = right_node.edges();
626626
for (uint64_t i = 0; i < right_node_edges.size(); ) {
627627
uint64_t other_id = edge_delta_to_id(right_node_id, right_node_edges.at(i++));
628628
uint8_t packed_edge = right_node_edges.at(i++);
@@ -1332,8 +1332,8 @@ void graph_t::display(void) const {
13321332
for (uint64_t i = 0; i < node_v.size(); ++i) {
13331333
auto& node = node_v.at(i);
13341334
nid_t node_id = i+1;
1335-
std::cerr << node_id << ":" << node.get_sequence() << " ";
1336-
auto& node_edges = node.get_edges();
1335+
std::cerr << node_id << ":" << node.sequence() << " ";
1336+
const std::vector<uint64_t> node_edges = node.edges();
13371337
for (uint64_t j = 0; j < node_edges.size(); ++j) {
13381338
std::cerr << node_edges.at(j) << ",";
13391339
}
@@ -1381,7 +1381,7 @@ void graph_t::to_gfa(std::ostream& out) const {
13811381
const node_t& node = node_v.at(number_bool_packing::unpack_number(h));
13821382
bool is_rev = get_is_reverse(h);
13831383
nid_t node_id = get_id(h);
1384-
auto& node_edges = node.get_edges();
1384+
const std::vector<uint64_t> node_edges = node.edges();
13851385
for (uint64_t i = 0; i < node_edges.size(); i+=2) {
13861386
// unpack the edge
13871387
uint64_t other_id = edge_delta_to_id(node_id, node_edges.at(i));

0 commit comments

Comments
 (0)