@@ -52,6 +52,8 @@ void Caller::run() {
52
52
for (const SV &sv : clipped_svs)
53
53
cout << sv << endl;
54
54
}
55
+
56
+ destroy_chromosomes ();
55
57
}
56
58
57
59
void Caller::write_vcf () {
@@ -94,7 +96,7 @@ vector<Cluster> Caller::split_cluster_by_len(const Cluster &cluster) {
94
96
return subclusters;
95
97
}
96
98
97
- /* Split cluster in subclusters */
99
+ // Split cluster in subclusters
98
100
vector<Cluster> Caller::split_cluster (const Cluster &cluster) {
99
101
// Step 1: split cluster by haplotype tag
100
102
Cluster cluster_0 = cluster;
@@ -256,13 +258,16 @@ string Caller::run_poa(const vector<string> &seqs) {
256
258
uint n_seqs = seqs.size ();
257
259
abpoa_t *ab = abpoa_init ();
258
260
abpoa_para_t *abpt = abpoa_init_para ();
259
- abpt->disable_seeding = 1 ;
260
261
abpt->align_mode = 0 ; // global
262
+ abpt->disable_seeding = 1 ;
263
+ abpt->progressive_poa = 0 ;
264
+ abpt->amb_strand = 0 ;
261
265
abpt->out_msa = 0 ;
262
266
abpt->out_cons = 1 ;
263
267
abpt->out_gfa = 0 ;
264
268
// abpt->is_diploid = 1; // TODO: maybe this works now
265
- abpt->progressive_poa = 0 ;
269
+ // abpt->max_n_cons = 2;
270
+ // abpt->min_freq = 0.25;
266
271
abpoa_post_set_para (abpt);
267
272
268
273
// abpt->match = 2; // match score
@@ -285,7 +290,8 @@ string Caller::run_poa(const vector<string> &seqs) {
285
290
286
291
abpoa_msa (ab, abpt, n_seqs, NULL , seq_lens, bseqs, NULL , NULL );
287
292
abpoa_cons_t *abc = ab->abc ;
288
- string cons = " " ;
293
+ string cons = " " ; // XXX: we may avoid converting to ACGT here since we need
294
+ // to reconvert back for ksw2
289
295
if (abc->n_cons > 0 )
290
296
for (int j = 0 ; j < abc->cons_len [0 ]; ++j)
291
297
cons += " ACGTN" [abc->cons_base [0 ][j]];
@@ -301,10 +307,8 @@ string Caller::run_poa(const vector<string> &seqs) {
301
307
return cons;
302
308
}
303
309
304
- /* Call SVs by POA+realignment */
310
+ // Call SVs by POA+realignment
305
311
void Caller::pcall (const vector<Cluster> &clusters) {
306
- // vector<Genotyper> genotypers(config->threads);
307
- vector<string> gt_strings = {" 0/0" , " 0/1" , " 1/0" , " 1/1" };
308
312
#pragma omp parallel for num_threads(config->threads) schedule(static, 1)
309
313
for (size_t i = 0 ; i < clusters.size (); i++) {
310
314
int t = omp_get_thread_num ();
@@ -313,24 +317,6 @@ void Caller::pcall(const vector<Cluster> &clusters) {
313
317
continue ;
314
318
string chrom = cluster.chrom ;
315
319
316
- // genotypers.at(t).posterior_sv_genotype_give_reads(cluster.reads);
317
- // vector<double> gts = genotypers.at(t).get_posterior_sv_genotype();
318
- Genotyper gtyper;
319
- gtyper.posterior_sv_genotype_give_reads (cluster.reads );
320
- vector<double > gts = gtyper.get_posterior_sv_genotype ();
321
- auto max_gt = max_element (gts.begin (), gts.end ());
322
- double gtq = *max_gt;
323
- if (gtq < 0 || gtq > 1 ) {
324
- cerr << chrom << " :" << cluster.s << " -" << cluster.e << endl;
325
- for (const auto &tpl : cluster.reads )
326
- cerr << get<0 >(tpl) << " :" << get<1 >(tpl) << " " ;
327
- cerr << endl;
328
- for (int i = 0 ; i < 4 ; ++i)
329
- cerr << gt_strings[i] << " - " << gts[i] << endl;
330
- }
331
- string gt = gt_strings[distance (gts.begin (), max_gt)];
332
- if (config->noref && gt.compare (" 0/0" ) == 0 )
333
- continue ;
334
320
const vector<Cluster> &subclusters = split_cluster (cluster);
335
321
336
322
// Calling from one or two clusters
@@ -342,26 +328,41 @@ void Caller::pcall(const vector<Cluster> &clusters) {
342
328
343
329
string ref = string (chromosome_seqs[chrom] + cl.s , cl.e - cl.s + 1 );
344
330
string consensus = run_poa (cl.get_seqs ());
345
- parasail_result_t *result = NULL ;
346
- result = parasail_nw_trace_striped_16 (consensus.c_str (), consensus.size (),
347
- ref.c_str (), ref.size (), 10 , 1 ,
348
- ¶sail_nuc44);
349
- parasail_cigar_t *pcigar =
350
- parasail_result_get_cigar (result, consensus.c_str (), consensus.size (),
351
- ref.c_str (), ref.size (), NULL );
352
- char *cigar_str = parasail_cigar_decode (pcigar);
353
- int score = result->score ;
354
- parasail_cigar_free (pcigar);
355
- parasail_result_free (result);
331
+
332
+ // ksw2 stuff - TODO: move to a separate function
333
+ int sc_mch = 1 , sc_mis = -9 , gapo = 16 , gape = 2 , gapo2 = 41 , gape2 = 1 ;
334
+ int8_t a = (int8_t )sc_mch,
335
+ b = sc_mis < 0 ? (int8_t )sc_mis : -(int8_t )sc_mis; // a>0 and b<0
336
+ int8_t mat[25 ] = {a, b, b, b, 0 , b, a, b, b, 0 , b, b, a,
337
+ b, 0 , b, b, b, a, 0 , 0 , 0 , 0 , 0 , 0 };
338
+ uint tl = ref.size (), ql = consensus.size ();
339
+ uint8_t *ts = (uint8_t *)malloc (tl);
340
+ uint8_t *qs = (uint8_t *)malloc (ql);
341
+ for (i = 0 ; i < tl; ++i)
342
+ ts[i] = _char26_table[(uint8_t )ref[i]]; // encode to 0/1/2/3
343
+ for (i = 0 ; i < ql; ++i)
344
+ qs[i] = _char26_table[(uint8_t )consensus[i]];
345
+
346
+ ksw_extz_t ez;
347
+ memset (&ez, 0 , sizeof (ksw_extz_t ));
348
+ ksw_extd2_sse (0 , ql, qs, tl, ts, 5 , mat, gapo, gape, gapo2, gape2, -1 , -1 ,
349
+ -1 , 0 , &ez);
350
+
351
+ int score = ez.score ;
352
+ string cigar_str = " " ;
353
+ for (int i = 0 ; i < ez.n_cigar ; ++i) {
354
+ cigar_str += to_string (ez.cigar [i] >> 4 ) + " MID" [ez.cigar [i] & 0xf ];
355
+ }
356
356
_p_alignments[t].push_back (
357
357
Consensus (consensus, cigar_str, chrom, cl.s , cl.e ));
358
+
358
359
// -- Extracting SVs
359
360
uint rpos = cl.s ; // position on reference
360
361
uint cpos = 0 ; // position on consensus
361
362
CIGAR cigar;
362
- cigar.parse_cigar (cigar_str);
363
+ cigar.parse_cigar (cigar_str. c_str () );
363
364
int nv = 0 ;
364
- for (const auto cigar_pair : cigar.ops ) {
365
+ for (const auto & cigar_pair : cigar.ops ) {
365
366
uint l = cigar_pair.first ;
366
367
char op = cigar_pair.second ;
367
368
if (op == ' =' || op == ' M' ) {
@@ -394,7 +395,7 @@ void Caller::pcall(const vector<Cluster> &clusters) {
394
395
}
395
396
for (size_t v = 0 ; v < _svs.size (); v++) {
396
397
_svs[v].ngaps = nv;
397
- _svs[v].set_gt (gt, max ( 0 , ( int )(gtq * 100 )) );
398
+ _svs[v].set_gt (" ./. " , 100 );
398
399
_svs[v].set_cov (cl.cov , cl.cov0 , cl.cov1 , cl.cov2 );
399
400
_svs[v].set_rvec (cluster.reads );
400
401
}
@@ -404,7 +405,7 @@ void Caller::pcall(const vector<Cluster> &clusters) {
404
405
}
405
406
}
406
407
407
- /* Clean same SV reported twice */
408
+ // Clean same SV reported twice
408
409
void Caller::clean_dups () {
409
410
vector<SV> _svs;
410
411
string last_chrom = " " ;
0 commit comments