Skip to content

Commit 7e0a613

Browse files
committed
clang-format
1 parent ceabcc3 commit 7e0a613

31 files changed

+3553
-3314
lines changed

assembler.cpp

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,57 @@
11
#include "assembler.hpp"
22

33
void Assembler::run() {
4-
auto c = Configuration::getInstance();
5-
int num_batches = c->aggregate_batches;
6-
int tau = -1; // c->cutoff;
4+
auto c = Configuration::getInstance();
5+
int num_batches = c->aggregate_batches;
6+
int tau = -1; // c->cutoff;
77

8-
lprint({"Assembling high-abundance strings from", to_string(num_batches), "batches.."});
9-
#pragma omp parallel for num_threads(c->threads)
10-
for (int j = 0; j < num_batches; j++) {
11-
lprint({"Loading batch", to_string(j) + ".."}) ;
12-
string s_j = std::to_string(j);
13-
string inpath = c->workdir + "/solution_batch_" + s_j + ".sfs";
14-
string outpath = c->workdir + "/solution_batch_" + s_j + ".assembled.sfs";
15-
ofstream outf(outpath);
16-
map<string, vector<SFS>> SFSs = parse_sfsfile(inpath, tau);
17-
//cout << SFSs.size() << "SFS in total." << endl ;
18-
for (map<string, vector<SFS>>::iterator it = SFSs.begin(); it != SFSs.end(); ++it) {
19-
string ridx = it->first;
20-
vector<SFS> sfs = it->second;
21-
vector<SFS> assembled_sfs = assemble(sfs);
22-
bool is_first = true;
23-
for (const SFS &sfs : assembled_sfs) {
24-
outf << (is_first ? ridx : "*") << "\t"
25-
<< "\t" << sfs.s << "\t" << sfs.l << "\t" << sfs.c << endl;
26-
is_first = false;
27-
}
28-
}
29-
outf.close();
8+
lprint({"Assembling high-abundance strings from", to_string(num_batches),
9+
"batches.."});
10+
#pragma omp parallel for num_threads(c->threads)
11+
for (int j = 0; j < num_batches; j++) {
12+
lprint({"Loading batch", to_string(j) + ".."});
13+
string s_j = std::to_string(j);
14+
string inpath = c->workdir + "/solution_batch_" + s_j + ".sfs";
15+
string outpath = c->workdir + "/solution_batch_" + s_j + ".assembled.sfs";
16+
ofstream outf(outpath);
17+
map<string, vector<SFS>> SFSs = parse_sfsfile(inpath, tau);
18+
// cout << SFSs.size() << "SFS in total." << endl ;
19+
for (map<string, vector<SFS>>::iterator it = SFSs.begin(); it != SFSs.end();
20+
++it) {
21+
string ridx = it->first;
22+
vector<SFS> sfs = it->second;
23+
vector<SFS> assembled_sfs = assemble(sfs);
24+
bool is_first = true;
25+
for (const SFS &sfs : assembled_sfs) {
26+
outf << (is_first ? ridx : "*") << "\t"
27+
<< "\t" << sfs.s << "\t" << sfs.l << "\t" << sfs.c << endl;
28+
is_first = false;
29+
}
3030
}
31+
outf.close();
32+
}
3133
}
3234

3335
vector<SFS> Assembler::assemble(vector<SFS> &sfs) {
34-
vector<SFS> assembled_sfs;
35-
sort(sfs.begin(), sfs.end());
36-
int i = 0;
37-
while (i < sfs.size()) {
38-
uint j;
39-
for (j = i + 1; j < sfs.size(); ++j) {
40-
if (sfs[j - 1].s + sfs[j - 1].l <= sfs[j].s) {
41-
// non-overlapping
42-
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
43-
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
44-
i = j;
45-
break;
46-
}
47-
}
48-
if (j == sfs.size()) {
49-
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
50-
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
51-
i = j;
52-
}
36+
vector<SFS> assembled_sfs;
37+
sort(sfs.begin(), sfs.end());
38+
int i = 0;
39+
while (i < sfs.size()) {
40+
uint j;
41+
for (j = i + 1; j < sfs.size(); ++j) {
42+
if (sfs[j - 1].s + sfs[j - 1].l <= sfs[j].s) {
43+
// non-overlapping
44+
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
45+
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
46+
i = j;
47+
break;
48+
}
5349
}
54-
return assembled_sfs;
50+
if (j == sfs.size()) {
51+
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
52+
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
53+
i = j;
54+
}
55+
}
56+
return assembled_sfs;
5557
}

assembler.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#ifndef ASSEMBLER_HPP
22
#define ASSEMBLER_HPP
33

4-
#include <list>
54
#include <fstream>
5+
#include <list>
66

7-
#include "sfs.hpp"
87
#include "config.hpp"
8+
#include "sfs.hpp"
99

1010
using namespace std;
1111

bam.cpp

Lines changed: 117 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,134 +1,137 @@
11
#include "bam.hpp"
22

3-
using namespace std ;
3+
using namespace std;
44

5-
uint32_t cigar_len_mask = 0xFFFFFFF0 ;
6-
uint32_t cigar_type_mask = 0xF ;
5+
uint32_t cigar_len_mask = 0xFFFFFFF0;
6+
uint32_t cigar_type_mask = 0xF;
77

88
string print_cigar_symbol(int type) {
9-
if (type == BAM_CMATCH) {
10-
return "M" ;
11-
}
12-
if (type == BAM_CINS) {
13-
return "I" ;
14-
}
15-
if (type == BAM_CDEL) {
16-
return "D" ;
17-
}
18-
if (type == BAM_CSOFT_CLIP) {
19-
return "S" ;
20-
}
21-
if (type == BAM_CHARD_CLIP) {
22-
return "H" ;
23-
}
24-
return "X" ;
9+
if (type == BAM_CMATCH) {
10+
return "M";
11+
}
12+
if (type == BAM_CINS) {
13+
return "I";
14+
}
15+
if (type == BAM_CDEL) {
16+
return "D";
17+
}
18+
if (type == BAM_CSOFT_CLIP) {
19+
return "S";
20+
}
21+
if (type == BAM_CHARD_CLIP) {
22+
return "H";
23+
}
24+
return "X";
2525
}
2626

27-
vector<pair<uint32_t, uint32_t>> decode_cigar(bam1_t* read) {
28-
// get CIGAR
29-
vector<pair<uint32_t, uint32_t>> cigar_offsets ;
30-
uint32_t* cigar = bam_get_cigar(read) ;
31-
int offset = 0 ;
32-
for (int i = 0; i < read->core.n_cigar; i++) {
33-
uint32_t type = cigar[i] & cigar_type_mask ;
34-
uint32_t length = cigar[i] >> 4 ;
35-
cigar_offsets.push_back(make_pair(length, type)) ;
36-
}
37-
return cigar_offsets ;
27+
vector<pair<uint32_t, uint32_t>> decode_cigar(bam1_t *read) {
28+
// get CIGAR
29+
vector<pair<uint32_t, uint32_t>> cigar_offsets;
30+
uint32_t *cigar = bam_get_cigar(read);
31+
int offset = 0;
32+
for (int i = 0; i < read->core.n_cigar; i++) {
33+
uint32_t type = cigar[i] & cigar_type_mask;
34+
uint32_t length = cigar[i] >> 4;
35+
cigar_offsets.push_back(make_pair(length, type));
36+
}
37+
return cigar_offsets;
3838
}
3939

40-
uint8_t* encode_cigar(vector<pair<uint32_t, uint32_t>> cigar) {
41-
uint32_t* cigar_bytes = (uint32_t*) malloc(sizeof(uint32_t) * cigar.size()) ;
42-
for (int i = 0; i < cigar.size(); i++) {
43-
cigar_bytes[i] = (cigar[i].first << 4) | (cigar[i].second & cigar_type_mask) ;
44-
}
45-
return (uint8_t*) cigar_bytes ;
40+
uint8_t *encode_cigar(vector<pair<uint32_t, uint32_t>> cigar) {
41+
uint32_t *cigar_bytes = (uint32_t *)malloc(sizeof(uint32_t) * cigar.size());
42+
for (int i = 0; i < cigar.size(); i++) {
43+
cigar_bytes[i] =
44+
(cigar[i].first << 4) | (cigar[i].second & cigar_type_mask);
45+
}
46+
return (uint8_t *)cigar_bytes;
4647
}
4748

48-
uint8_t* encode_bam_seq(char* seq) {
49-
int n = (strlen(seq) + 1) >> 1 ;
50-
int l_seq = strlen(seq) ;
51-
uint8_t* seq_bytes = (uint8_t*) malloc(sizeof(uint8_t) * n) ;
52-
int i = 0 ;
53-
n = 0 ;
54-
for (i = 0; i + 1 < l_seq; i += 2) {
55-
seq_bytes[n] = (seq_nt16_table[(unsigned char)seq[i]] << 4) | seq_nt16_table[(unsigned char)seq[i + 1]];
56-
n += 1 ;
57-
}
58-
for (; i < l_seq; i++) {
59-
seq_bytes[n] = seq_nt16_table[(unsigned char)seq[i]] << 4;
60-
n += 1 ;
61-
}
62-
return seq_bytes ;
49+
uint8_t *encode_bam_seq(char *seq) {
50+
int n = (strlen(seq) + 1) >> 1;
51+
int l_seq = strlen(seq);
52+
uint8_t *seq_bytes = (uint8_t *)malloc(sizeof(uint8_t) * n);
53+
int i = 0;
54+
n = 0;
55+
for (i = 0; i + 1 < l_seq; i += 2) {
56+
seq_bytes[n] = (seq_nt16_table[(unsigned char)seq[i]] << 4) |
57+
seq_nt16_table[(unsigned char)seq[i + 1]];
58+
n += 1;
59+
}
60+
for (; i < l_seq; i++) {
61+
seq_bytes[n] = seq_nt16_table[(unsigned char)seq[i]] << 4;
62+
n += 1;
63+
}
64+
return seq_bytes;
6365
}
6466

6567
char reverse_complement_base(char base) {
66-
if (base == 'C' || base == 'c') {
67-
return 'G' ;
68-
}
69-
if (base == 'A' || base == 'a') {
70-
return 'T' ;
71-
}
72-
if (base == 'G' || base == 'g') {
73-
return 'C' ;
74-
}
75-
if (base == 'T' || base == 't') {
76-
return 'A' ;
77-
}
78-
else {
79-
return 'N' ;
80-
}
68+
if (base == 'C' || base == 'c') {
69+
return 'G';
70+
}
71+
if (base == 'A' || base == 'a') {
72+
return 'T';
73+
}
74+
if (base == 'G' || base == 'g') {
75+
return 'C';
76+
}
77+
if (base == 'T' || base == 't') {
78+
return 'A';
79+
} else {
80+
return 'N';
81+
}
8182
}
8283

83-
void reverse_complement_read(char* seq) {
84-
int l = strlen(seq) ;
85-
int i = 0 ;
86-
while (i < l / 2) {
87-
auto t = reverse_complement_base(seq[l - i]) ;
88-
seq[l - 1 - i] = reverse_complement_base(seq[i]) ;
89-
seq[i] = t ;
90-
i += 1 ;
91-
}
84+
void reverse_complement_read(char *seq) {
85+
int l = strlen(seq);
86+
int i = 0;
87+
while (i < l / 2) {
88+
auto t = reverse_complement_base(seq[l - i]);
89+
seq[l - 1 - i] = reverse_complement_base(seq[i]);
90+
seq[i] = t;
91+
i += 1;
92+
}
9293
}
9394

9495
vector<pair<int, int>> get_aligned_pairs(bam1_t *alignment) {
95-
vector<pair<int, int>> result ;
96-
uint ref_pos = alignment->core.pos ;
97-
uint read_pos = 0 ;
98-
auto cigar_offsets = decode_cigar(alignment) ;
99-
int m = 0 ;
100-
while (true) {
101-
if (m == cigar_offsets.size()) {
102-
break ;
103-
}
104-
if (cigar_offsets[m].second == BAM_CMATCH or cigar_offsets[m].second == BAM_CEQUAL or cigar_offsets[m].second == BAM_CDIFF) {
105-
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
106-
result.push_back(make_pair(read_pos, i));
107-
read_pos++;
108-
}
109-
ref_pos += cigar_offsets[m].first;
110-
} else if (cigar_offsets[m].second == BAM_CINS or cigar_offsets[m].second == BAM_CSOFT_CLIP) {
111-
for (uint i = 0; i < cigar_offsets[m].first; ++i) {
112-
result.push_back(make_pair(read_pos, -1));
113-
read_pos++;
114-
}
115-
} else if (cigar_offsets[m].second == BAM_CDEL) {
116-
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
117-
result.push_back(make_pair(-1, i));
118-
}
119-
ref_pos += cigar_offsets[m].first;
120-
} else if (cigar_offsets[m].second == BAM_CHARD_CLIP) {
121-
// advances neither
122-
} else if (cigar_offsets[m].second == BAM_CREF_SKIP) {
123-
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
124-
result.push_back(make_pair(-1, i));
125-
}
126-
ref_pos += cigar_offsets[m].first;
127-
} else { //if (cigar_offsets[m].second == BAM_CPAD) {
128-
//TODO
129-
}
130-
m++ ;
131-
}
132-
return result;
96+
vector<pair<int, int>> result;
97+
uint ref_pos = alignment->core.pos;
98+
uint read_pos = 0;
99+
auto cigar_offsets = decode_cigar(alignment);
100+
int m = 0;
101+
while (true) {
102+
if (m == cigar_offsets.size()) {
103+
break;
104+
}
105+
if (cigar_offsets[m].second == BAM_CMATCH or
106+
cigar_offsets[m].second == BAM_CEQUAL or
107+
cigar_offsets[m].second == BAM_CDIFF) {
108+
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
109+
result.push_back(make_pair(read_pos, i));
110+
read_pos++;
111+
}
112+
ref_pos += cigar_offsets[m].first;
113+
} else if (cigar_offsets[m].second == BAM_CINS or
114+
cigar_offsets[m].second == BAM_CSOFT_CLIP) {
115+
for (uint i = 0; i < cigar_offsets[m].first; ++i) {
116+
result.push_back(make_pair(read_pos, -1));
117+
read_pos++;
118+
}
119+
} else if (cigar_offsets[m].second == BAM_CDEL) {
120+
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
121+
result.push_back(make_pair(-1, i));
122+
}
123+
ref_pos += cigar_offsets[m].first;
124+
} else if (cigar_offsets[m].second == BAM_CHARD_CLIP) {
125+
// advances neither
126+
} else if (cigar_offsets[m].second == BAM_CREF_SKIP) {
127+
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
128+
result.push_back(make_pair(-1, i));
129+
}
130+
ref_pos += cigar_offsets[m].first;
131+
} else { // if (cigar_offsets[m].second == BAM_CPAD) {
132+
// TODO
133+
}
134+
m++;
135+
}
136+
return result;
133137
}
134-

0 commit comments

Comments
 (0)