-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathassembler.cpp
55 lines (52 loc) · 2.02 KB
/
assembler.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include "assembler.hpp"
void Assembler::run() {
auto c = Configuration::getInstance();
int num_batches = c->aggregate_batches;
int tau = c->cutoff;
lprint({"Assembling high-abundance strings from", to_string(num_batches), "batches.."});
#pragma omp parallel for num_threads(c->threads)
for (int j = 0; j < num_batches; j++) {
lprint({"Loading batch", to_string(j) + ".."}) ;
string s_j = std::to_string(j);
string inpath = c->workdir + "/solution_batch_" + s_j + ".sfs";
string outpath = c->workdir + "/solution_batch_" + s_j + ".assembled.sfs";
ofstream outf(outpath);
map<string, vector<SFS>> SFSs = parse_sfsfile(inpath, tau);
//cout << SFSs.size() << "SFS in total." << endl ;
for (map<string, vector<SFS>>::iterator it = SFSs.begin(); it != SFSs.end(); ++it) {
string ridx = it->first;
vector<SFS> sfs = it->second;
vector<SFS> assembled_sfs = assemble(sfs);
bool is_first = true;
for (const SFS &sfs : assembled_sfs) {
outf << (is_first ? ridx : "*") << "\t"
<< "\t" << sfs.s << "\t" << sfs.l << "\t" << sfs.c << endl;
is_first = false;
}
}
outf.close();
}
}
vector<SFS> Assembler::assemble(vector<SFS> &sfs) {
vector<SFS> assembled_sfs;
sort(sfs.begin(), sfs.end());
int i = 0;
while (i < sfs.size()) {
uint j;
for (j = i + 1; j < sfs.size(); ++j) {
if (sfs[j - 1].s + sfs[j - 1].l <= sfs[j].s) {
// non-overlapping
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
break;
}
}
if (j == sfs.size()) {
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
}
}
return assembled_sfs;
}