Skip to content

Commit 4c67bab

Browse files
committed
Change sfs_solution_t to use maps.
1 parent d95ac15 commit 4c67bab

File tree

3 files changed

+11
-19
lines changed

3 files changed

+11
-19
lines changed

aggregator.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void Aggregator::find_high_abundance_sequences() {
3030
if (_sequences[j].find(hash) == _sequences[j].end()) {
3131
_sequences[j][hash] = 0 ;
3232
}
33-
_sequences[j][hash] += std::stoi(tokens[4]) ;
33+
_sequences[j][hash] += 1 ; // std::stoi(tokens[4]) ; it's always 1
3434
}
3535
txt_file.close() ;
3636
}
@@ -78,16 +78,16 @@ void Aggregator::load_sequences() {
7878
while (std::getline(txt_file, line)) {
7979
istringstream iss(line) ;
8080
vector<string> tokens{istream_iterator<string>{iss}, istream_iterator<string>{}} ;
81-
string canon = canonicalize(tokens[1]) ;
8281
if (read == "*") {
8382
read = tokens[0] ;
8483
}
84+
string canon = canonicalize(tokens[1]) ;
8585
int hash = std::hash<std::string>()(canon) ;
8686
if (sequence_index.find(hash) != sequence_index.end()) {
8787
if (_sequences[j].find(canon) == _sequences[j].end()) {
8888
_sequences[j][canon] = 0 ;
8989
}
90-
_sequences[j][canon] += std::stoi(tokens[4]) ;
90+
_sequences[j][canon] += 1 ; //std::stoi(tokens[4]) ;
9191
_read_ids[j][canon][read] = 1 ;
9292
}
9393
}
@@ -102,6 +102,7 @@ void Aggregator::load_sequences() {
102102
}
103103
sequences[it->first] += it->second ;
104104
read_ids[it->first].insert(_read_ids[j][it->first].begin(), _read_ids[j][it->first].end()) ;
105+
assert(sequences[it->first] == read_ids[it->first].size()) ;
105106
}
106107
_sequences[j].clear() ;
107108
}

ping_pong.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ bool PingPong::backward_search(rld_t *index, const uint8_t *P, int p2) {
5353
return sai.x[2] != 0 ;
5454
}
5555

56-
void PingPong::ping_pong_search(rld_t *index, fastq_entry_t fqe, std::vector<sfs_solution_t>& solutions) {
56+
void PingPong::ping_pong_search(rld_t *index, const fastq_entry_t& fqe, std::vector<sfs_solution_t>& solutions) {
5757
int l = fqe.seq.size() ;
5858
if (l <= 10) {
5959
return ;
@@ -101,18 +101,9 @@ void PingPong::ping_pong_search(rld_t *index, fastq_entry_t fqe, std::vector<sfs
101101
DEBUG(cerr << "Mismatch " << int2char[P[end]] << " (" << end << "). fmatches: " << fmatches << endl ;)
102102
// add solution
103103
DEBUG(cerr << "Adding [" << begin << ", " << end << "]." << endl ;)
104-
int acc_len = end - begin + 1 ;
105104
int sfs_len = end - begin + 1 ;
106-
//if (config->min_string_length > 0) {
107-
// sfs_len = acc_len > config->min_string_length ? acc_len : config->min_string_length ;
108-
// if (begin + sfs_len >= l - 1) {
109-
// sfs_len = acc_len ;
110-
// }
111-
// assert(sfs_len == config->min_string_length || sfs_len == acc_len) ;
112-
// DEBUG(cerr << "Adjusted length to " << sfs_len << "." << endl ;)
113-
//}
105+
int acc_len = end - begin + 1 ;
114106
DEBUG(cerr << "Adjusted length from " << acc_len << " to " << sfs_len << "." << endl ;)
115-
116107
// CHECKMERGE
117108
solutions.push_back(sfs_solution_t{begin, sfs_len, fqe.seq.substr(begin, sfs_len)});
118109
// if (!check_solution(index, fqe.seq.substr(begin, sfs_len))) {
@@ -200,10 +191,10 @@ bool PingPong::load_batch_fastq(int threads, int batch_size, int p) {
200191
return n != 0 ? true : false ;
201192
}
202193

203-
batch_type_t PingPong::process_batch(rld_t* index, vector<fastq_entry_t> fastq_entries) {
194+
batch_type_t PingPong::process_batch(rld_t* index, const vector<fastq_entry_t>& fastq_entries) {
204195
batch_type_t solutions ;
205196
// store read id once for all strings to save space, is it worth it?
206-
for (const auto fastq_entry: fastq_entries) {
197+
for (const auto &fastq_entry: fastq_entries) {
207198
ping_pong_search(index, fastq_entry, solutions[fastq_entry.head]) ;
208199
}
209200
return solutions ;

ping_pong.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static unsigned char seq_nt6_table[128] = {
6565
5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
6666
} ;
6767

68-
typedef std::unordered_map<std::string, std::vector<sfs_solution_t>> batch_type_t;
68+
typedef std::map<std::string, std::vector<sfs_solution_t>> batch_type_t;
6969

7070
static const std::vector<std::string> int2char ({"$", "A", "C", "G", "T", "N"}) ;
7171

@@ -99,13 +99,13 @@ class PingPong {
9999
bool load_batch_fastq(int threads, int batch_size, int p) ;
100100

101101
//batch_type_t search_solutions ;
102-
batch_type_t process_batch(rld_t* index, std::vector<fastq_entry_t> fastq_entries) ;
102+
batch_type_t process_batch(rld_t* index, const std::vector<fastq_entry_t>& fastq_entries) ;
103103
std::vector<std::vector<batch_type_t>> batches ;
104104
void output_batch(int) ;
105105

106106
bool check_solution(rld_t* index, std::string S) ;
107107
bool backward_search(rld_t *index, const uint8_t *P, int p2) ;
108-
void ping_pong_search(rld_t *index, fastq_entry_t fqe, std::vector<sfs_solution_t>&) ;
108+
void ping_pong_search(rld_t *index, const fastq_entry_t &fqe, std::vector<sfs_solution_t>&) ;
109109
fastq_entry_t get_solution(fastq_entry_t fqe, int s, int l) ;
110110

111111
Configuration* config ;

0 commit comments

Comments
 (0)