|
5 | 5 | namespace odgi { |
6 | 6 |
|
7 | 7 | uint64_t node_t::sequence_size(void) const { |
8 | | - return sequence.size(); |
| 8 | + return seq_bytes(); |
9 | 9 | } |
10 | 10 |
|
11 | | -const std::string node_t::get_sequence(void) const { |
12 | | - return sequence; |
| 11 | +const std::string node_t::sequence(void) const { |
| 12 | + const std::string res((char*)bytes.data()+seq_start(), seq_bytes()); |
| 13 | + return res; |
13 | 14 | } |
14 | 15 |
|
15 | 16 | void node_t::set_sequence(const std::string& seq) { |
16 | | - sequence = seq; |
| 17 | + if (seq.size() > seq_bytes()) { |
| 18 | + bytes.reserve(bytes.size()+seq.size()-seq_bytes()); |
| 19 | + bytes.insert(bytes.begin()+seq_start(), seq.size() - seq_bytes(), 0); |
| 20 | + set_seq_bytes(seq.size()); |
| 21 | + } else if (seq.size() < seq_bytes()) { |
| 22 | + bytes.erase(bytes.begin()+seq_start(), bytes.begin()+seq_start()+(seq_bytes()-seq.size()));; |
| 23 | + set_seq_bytes(seq.size()); |
| 24 | + } |
| 25 | + memcpy(bytes.data()+seq_start(), seq.c_str(), seq.size()); |
17 | 26 | } |
18 | 27 |
|
19 | | -const dyn::hacked_vector& node_t::get_edges(void) const { |
20 | | - return edges; |
| 28 | +std::vector<uint64_t> node_t::edges(void) const { |
| 29 | + std::vector<uint64_t> res; |
| 30 | + if (edge_count()) { |
| 31 | + res.resize(edge_count()*EDGE_RECORD_LENGTH); |
| 32 | + sqvarint::decode(res.data(), |
| 33 | + (uint8_t*)bytes.data()+edge_start(), |
| 34 | + edge_count()*EDGE_RECORD_LENGTH); |
| 35 | + } |
| 36 | + return res; |
21 | 37 | } |
22 | 38 |
|
23 | 39 | void node_t::add_edge(const uint64_t& relative_id, const uint64_t& edge_type) { |
24 | 40 | //std::cerr << "add edge " << "relative_id " << relative_id << " edge_type " << edge_type << std::endl; |
25 | | - edges.push_back(relative_id); |
26 | | - edges.push_back(edge_type); |
| 41 | + uint64_t add_edge_bytes = sqvarint::length({relative_id, edge_type}); |
| 42 | + bytes.reserve(bytes.size()+add_edge_bytes); |
| 43 | + bytes.insert(bytes.begin()+edge_start(), add_edge_bytes, 0); |
| 44 | + sqvarint::encode({relative_id, edge_type}, bytes.data()+edge_start()); |
| 45 | + set_edge_bytes(edge_bytes() + add_edge_bytes); |
| 46 | + set_edge_count(edge_count() + 1); |
27 | 47 | } |
28 | 48 |
|
29 | 49 | void node_t::remove_edge(const uint64_t& rank) { |
30 | 50 | assert(rank < edge_count()); |
31 | | - uint64_t offset = EDGE_RECORD_LENGTH*rank; |
32 | | - for (uint8_t i = 0; i < EDGE_RECORD_LENGTH; ++i) { |
33 | | - edges.remove(offset); |
34 | | - } |
| 51 | + uint64_t edge_offset = edge_start() + sqvarint::bytes(bytes.data()+edge_start(), EDGE_RECORD_LENGTH*rank); |
| 52 | + // a bit redundant |
| 53 | + uint64_t j = sqvarint::bytes(bytes.data()+edge_offset, EDGE_RECORD_LENGTH); |
| 54 | + bytes.erase(bytes.begin()+edge_offset, bytes.begin()+edge_offset+j); |
| 55 | + set_edge_count(edge_count()-1); |
| 56 | + set_edge_bytes(edge_bytes()-j); |
35 | 57 | } |
36 | 58 |
|
37 | 59 | void node_t::add_path_step(const uint64_t& path_id, const bool& is_rev, |
@@ -117,54 +139,56 @@ void node_t::remove_path_step(const uint64_t& rank) { |
117 | 139 | } |
118 | 140 |
|
119 | 141 | void node_t::clear(void) { |
120 | | - sequence.clear(); |
121 | | - clear_edges(); |
| 142 | + set_seq_bytes(0); |
| 143 | + set_edge_bytes(0); |
| 144 | + set_edge_count(0); |
| 145 | + bytes.clear(); |
122 | 146 | clear_path_steps(); |
123 | 147 | } |
124 | 148 |
|
125 | | -void node_t::clear_edges(void) { |
126 | | - dyn::hacked_vector null_iv; |
127 | | - edges = null_iv; |
128 | | -} |
129 | | - |
130 | 149 | void node_t::clear_path_steps(void) { |
131 | 150 | dyn::hacked_vector null_iv; |
132 | 151 | path_steps = null_iv; |
133 | 152 | } |
134 | 153 |
|
135 | 154 | uint64_t node_t::serialize(std::ostream& out) const { |
136 | 155 | uint64_t written = 0; |
137 | | - size_t seq_size = sequence.size(); |
138 | | - out.write((char*)&seq_size, sizeof(size_t)); |
139 | | - written += sizeof(size_t); |
140 | | - out << sequence; |
141 | | - written += sequence.size(); |
142 | | - written += edges.serialize(out); |
| 156 | + out.write((char*)&_seq_bytes, sizeof(uint32_t)); |
| 157 | + out.write((char*)&_edge_bytes, sizeof(uint32_t)); |
| 158 | + out.write((char*)&_edge_count, sizeof(uint32_t)); |
| 159 | + written += sizeof(uint32_t)*4 + sizeof(uint8_t); |
| 160 | + uint64_t node_size = bytes.size(); |
| 161 | + out.write((char*)&node_size, sizeof(node_size)); |
| 162 | + written += sizeof(uint64_t); |
| 163 | + out.write((char*)bytes.data(), node_size*sizeof(uint8_t)); |
| 164 | + written += sizeof(uint8_t)*node_size; |
143 | 165 | written += path_steps.serialize(out); |
144 | 166 | return written; |
145 | 167 | } |
146 | 168 |
|
147 | 169 | void node_t::load(std::istream& in) { |
148 | | - size_t seq_size; |
149 | | - in.read((char*)&seq_size, sizeof(size_t)); |
150 | | - sequence.resize(seq_size); |
151 | | - in.read((char*)sequence.c_str(), seq_size); |
152 | | - edges.load(in); |
| 170 | + in.read((char*)&_seq_bytes, sizeof(uint32_t)); |
| 171 | + in.read((char*)&_edge_bytes, sizeof(uint32_t)); |
| 172 | + in.read((char*)&_edge_count, sizeof(uint32_t)); |
| 173 | + uint64_t node_size = 0; |
| 174 | + in.read((char*)&node_size, sizeof(node_size)); |
| 175 | + bytes.resize(node_size); |
| 176 | + in.read((char*)bytes.data(), node_size*sizeof(uint8_t)); |
153 | 177 | path_steps.load(in); |
154 | 178 | } |
155 | 179 |
|
156 | 180 | void node_t::display(void) const { |
157 | | - std::cerr << "seq " << sequence << " " |
| 181 | + std::cerr << "self_bytes " << bytes.size() << " " |
| 182 | + << "seq_bytes " << seq_bytes() << " " |
| 183 | + << "seq " << sequence() << " " |
| 184 | + << "edge_start " << edge_start() << " " |
158 | 185 | << "edge_count " << edge_count() << " " |
| 186 | + << "edge_bytes " << edge_bytes() << " " |
159 | 187 | << "path_count " << path_count() << " | "; |
160 | | - if (edge_count()) { |
161 | | - for (uint64_t i = 0; i < edge_count(); ++i) { |
162 | | - std::cerr |
163 | | - << edges.at(i) << ":" |
164 | | - << edges.at(i+1) << " "; |
165 | | - } |
| 188 | + for (auto i : bytes) { |
| 189 | + std::cerr << (int) i << " "; |
166 | 190 | } |
167 | | - std::cerr << "| "; |
| 191 | + std::cerr << " | "; |
168 | 192 | if (path_count()) { |
169 | 193 | for (uint64_t i = 0; i < path_count(); ++i) { |
170 | 194 | std::cerr |
|
0 commit comments