@@ -162,6 +162,7 @@ sort_superclusters(std::shared_ptr<superclusterData> sc_data) {
162
162
void superclusterData::add_callset_vars (int callset,
163
163
std::vector< std::unordered_map< std::string,
164
164
std::shared_ptr<ctgVariants> > > vars) {
165
+ bool print = false ;
165
166
166
167
for (int ctg_idx = 0 ; ctg_idx < int (this ->contigs .size ()); ctg_idx++) {
167
168
std::string ctg = contigs[ctg_idx];
@@ -191,6 +192,8 @@ void superclusterData::add_callset_vars(int callset,
191
192
} else if (vars[HAP2][ctg]->n ) {
192
193
curr_left_reach = vars[HAP2][ctg]->left_reaches [0 ];
193
194
curr_right_reach = vars[HAP2][ctg]->right_reaches [0 ];
195
+ } else {
196
+ ERROR (" No variants on contig: %s" , ctg.data ());
194
197
}
195
198
196
199
// initialize indices
@@ -199,8 +202,7 @@ void superclusterData::add_callset_vars(int callset,
199
202
int curr_var_idx = 0 ;
200
203
int next_left_reach = std::numeric_limits<int >::max ();
201
204
int next_right_reach = std::numeric_limits<int >::min ();
202
- while (var_idx[HAP1] < vars[HAP1][ctg]->n ||
203
- var_idx[HAP2] < vars[HAP2][ctg]->n ) {
205
+ while (var_idx[HAP1] < vars[HAP1][ctg]->n || var_idx[HAP2] < vars[HAP2][ctg]->n ) {
204
206
205
207
// ////////////////
206
208
// ADD VARIANTS //
@@ -228,13 +230,13 @@ void superclusterData::add_callset_vars(int callset,
228
230
vars[HAP1][ctg]->gt_quals [var_idx[HAP1]],
229
231
vars[HAP1][ctg]->var_quals [var_idx[HAP1]],
230
232
vars[HAP1][ctg]->phase_sets [var_idx[HAP1]]);
231
- /* printf("adding 1|1 var= %s:%d\t%s\t%s\t%s\n", */
232
- /* ctg.data(), */
233
- /* vars[HAP1][ctg]->poss[var_idx[HAP1]], */
234
- /* vars[HAP1][ctg]->refs[var_idx[HAP1]].data(), */
235
- /* vars[HAP1][ctg]->alts[var_idx[HAP1]].data(), */
236
- /* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
237
- /* ); */
233
+ if (print) printf (" adding 1|1 var= %s:%d\t %s\t %s\t %s\n " ,
234
+ ctg.data (),
235
+ vars[HAP1][ctg]->poss [var_idx[HAP1]],
236
+ vars[HAP1][ctg]->refs [var_idx[HAP1]].data (),
237
+ vars[HAP1][ctg]->alts [var_idx[HAP1]].data (),
238
+ gt_strs[vars[HAP1][ctg]->orig_gts [var_idx[HAP1]]].data ()
239
+ );
238
240
var_idx[HAP1]++; var_idx[HAP2]++;
239
241
updated[HAP1] = true ; updated[HAP2] = true ;
240
242
@@ -261,59 +263,58 @@ void superclusterData::add_callset_vars(int callset,
261
263
vars[hap_idx][ctg]->gt_quals [var_idx[hap_idx]],
262
264
vars[hap_idx][ctg]->var_quals [var_idx[hap_idx]],
263
265
vars[hap_idx][ctg]->phase_sets [var_idx[hap_idx]]);
264
- /* printf("adding %s var= %s:%d\t%s\t%s\t%s\n", */
265
- /* hap_idx == 0 ? "1|0" : "0|1", */
266
- /* ctg.data(), */
267
- /* vars[hap_idx][ctg]->poss[var_idx[hap_idx]], */
268
- /* vars[hap_idx][ctg]->refs[var_idx[hap_idx]].data(), */
269
- /* vars[hap_idx][ctg]->alts[var_idx[hap_idx]].data(), */
270
- /* gt_strs[vars[hap_idx][ctg]->orig_gts[var_idx[hap_idx]]].data() */
271
- /* ); */
266
+ if (print) printf (" adding %s var= %s:%d\t %s\t %s\t %s\n " ,
267
+ hap_idx == 0 ? " 1|0" : " 0|1" ,
268
+ ctg.data (),
269
+ vars[hap_idx][ctg]->poss [var_idx[hap_idx]],
270
+ vars[hap_idx][ctg]->refs [var_idx[hap_idx]].data (),
271
+ vars[hap_idx][ctg]->alts [var_idx[hap_idx]].data (),
272
+ gt_strs[vars[hap_idx][ctg]->orig_gts [var_idx[hap_idx]]].data ()
273
+ );
272
274
var_idx[hap_idx]++;
273
275
updated[hap_idx] = true ;
274
276
}
275
277
} else if (var_idx[HAP1] < vars[HAP1][ctg]->n ) { // only hap1 vars left
276
- merged_vars->add_var (
277
- vars[HAP1][ctg]->poss [var_idx[HAP1]],
278
- vars[HAP1][ctg]->rlens [var_idx[HAP1]],
279
- vars[HAP1][ctg]->types [var_idx[HAP1]],
280
- vars[HAP1][ctg]->locs [var_idx[HAP1]],
281
- vars[HAP1][ctg]->refs [var_idx[HAP1]],
282
- vars[HAP1][ctg]->alts [var_idx[HAP1]],
283
- vars[HAP1][ctg]->orig_gts [var_idx[HAP1]],
284
- vars[HAP1][ctg]->gt_quals [var_idx[HAP1]],
285
- vars[HAP1][ctg]->var_quals [var_idx[HAP1]],
286
- vars[HAP1][ctg]->phase_sets [var_idx[HAP1]]);
287
- /* printf("adding 1|0 var= %s:%d\t%s\t%s\t%s\n", */
288
- /* ctg.data(), */
289
- /* vars[HAP1][ctg]->poss[var_idx[HAP1]], */
290
- /* vars[HAP1][ctg]->refs[var_idx[HAP1]].data(), */
291
- /* vars[HAP1][ctg]->alts[var_idx[HAP1]].data(), */
292
- /* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
293
- /* ); */
294
- var_idx[HAP1]++;
295
- updated[HAP1] = true ;
278
+ merged_vars->add_var (
279
+ vars[HAP1][ctg]->poss [var_idx[HAP1]],
280
+ vars[HAP1][ctg]->rlens [var_idx[HAP1]],
281
+ vars[HAP1][ctg]->types [var_idx[HAP1]],
282
+ vars[HAP1][ctg]->locs [var_idx[HAP1]],
283
+ vars[HAP1][ctg]->refs [var_idx[HAP1]],
284
+ vars[HAP1][ctg]->alts [var_idx[HAP1]],
285
+ vars[HAP1][ctg]->orig_gts [var_idx[HAP1]],
286
+ vars[HAP1][ctg]->gt_quals [var_idx[HAP1]],
287
+ vars[HAP1][ctg]->var_quals [var_idx[HAP1]],
288
+ vars[HAP1][ctg]->phase_sets [var_idx[HAP1]]);
289
+ if (print) printf (" adding 1|0 var= %s:%d\t %s\t %s\t %s\n " ,
290
+ ctg.data (),
291
+ vars[HAP1][ctg]->poss [var_idx[HAP1]],
292
+ vars[HAP1][ctg]->refs [var_idx[HAP1]].data (),
293
+ vars[HAP1][ctg]->alts [var_idx[HAP1]].data (),
294
+ gt_strs[vars[HAP1][ctg]->orig_gts [var_idx[HAP1]]].data ()
295
+ );
296
+ var_idx[HAP1]++;
297
+ updated[HAP1] = true ;
296
298
} else if (var_idx[HAP2] < vars[HAP2][ctg]->n ) { // only hap2 vars left
297
- merged_vars->add_var (
298
- vars[HAP2][ctg]->poss [var_idx[HAP2]],
299
- vars[HAP2][ctg]->rlens [var_idx[HAP2]],
300
- vars[HAP2][ctg]->types [var_idx[HAP2]],
301
- vars[HAP2][ctg]->locs [var_idx[HAP2]],
302
- vars[HAP2][ctg]->refs [var_idx[HAP2]],
303
- vars[HAP2][ctg]->alts [var_idx[HAP2]],
304
- vars[HAP2][ctg]->orig_gts [var_idx[HAP2]],
305
- vars[HAP2][ctg]->gt_quals [var_idx[HAP2]],
306
- vars[HAP2][ctg]->var_quals [var_idx[HAP2]],
307
- vars[HAP2][ctg]->phase_sets [var_idx[HAP2]]);
308
- /* printf("adding 0|1 var= %s:%d\t%s\t%s\t%s\n", */
309
- /* ctg.data(), */
310
- /* vars[HAP2][ctg]->poss[var_idx[HAP2]], */
311
- /* vars[HAP2][ctg]->refs[var_idx[HAP2]].data(), */
312
- /* vars[HAP2][ctg]->alts[var_idx[HAP2]].data(), */
313
- /* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
314
- /* ); */
315
- var_idx[HAP2]++;
316
- updated[HAP2] = true ;
299
+ merged_vars->add_var (
300
+ vars[HAP2][ctg]->poss [var_idx[HAP2]],
301
+ vars[HAP2][ctg]->rlens [var_idx[HAP2]],
302
+ vars[HAP2][ctg]->types [var_idx[HAP2]],
303
+ vars[HAP2][ctg]->locs [var_idx[HAP2]],
304
+ vars[HAP2][ctg]->refs [var_idx[HAP2]],
305
+ vars[HAP2][ctg]->alts [var_idx[HAP2]],
306
+ vars[HAP2][ctg]->orig_gts [var_idx[HAP2]],
307
+ vars[HAP2][ctg]->gt_quals [var_idx[HAP2]],
308
+ vars[HAP2][ctg]->var_quals [var_idx[HAP2]],
309
+ vars[HAP2][ctg]->phase_sets [var_idx[HAP2]]);
310
+ if (print) printf (" adding 0|1 var= %s:%d\t %s\t %s\t %s\n " ,
311
+ ctg.data (),
312
+ vars[HAP2][ctg]->poss [var_idx[HAP2]],
313
+ vars[HAP2][ctg]->refs [var_idx[HAP2]].data (),
314
+ vars[HAP2][ctg]->alts [var_idx[HAP2]].data (),
315
+ gt_strs[vars[HAP2][ctg]->orig_gts [var_idx[HAP2]]].data ());
316
+ var_idx[HAP2]++;
317
+ updated[HAP2] = true ;
317
318
}
318
319
319
320
// ///////////////////
@@ -332,22 +333,24 @@ void superclusterData::add_callset_vars(int callset,
332
333
// update reaches
333
334
for (int h = 0 ; h < HAPS; h++) {
334
335
if (clust_idx[h] < vars[h][ctg]->nc ) {
335
- if (vars[h][ctg]->left_reaches [clust_idx[h]] <=
336
+ if (clust_idx[h^1 ] >= vars[h^1 ][ctg]->nc ||
337
+ vars[h][ctg]->left_reaches [clust_idx[h]] <=
336
338
vars[h^1 ][ctg]->left_reaches [clust_idx[h^1 ]]) {
339
+ // TODO: why no std::min here?
337
340
next_left_reach = vars[h][ctg]->left_reaches [clust_idx[h]];
338
341
next_right_reach = std::max (next_right_reach,
339
342
vars[h][ctg]->right_reaches [clust_idx[h]]);
340
343
}
341
344
}
342
345
}
343
- /* printf("curr = (%d, %d)\tnext = (%d, %d)\n", */
344
- /* curr_left_reach, curr_right_reach, */
345
- /* next_left_reach, next_right_reach); */
346
+ if (print) printf (" curr = (%d, %d)\t next = (%d, %d)\n " ,
347
+ curr_left_reach, curr_right_reach,
348
+ next_left_reach, next_right_reach);
346
349
347
350
// starting new supercluster
348
351
if (next_left_reach > curr_right_reach) {
349
- /* printf("new supercluster, adding curr = %d (%d, %d)\n\n", */
350
- /* curr_var_idx, curr_left_reach, curr_right_reach); */
352
+ if (print) printf (" new supercluster, adding curr = %d (%d, %d)\n " ,
353
+ curr_var_idx, curr_left_reach, curr_right_reach);
351
354
352
355
// save reaches of prev cluster
353
356
merged_vars->clusters .push_back (curr_var_idx);
@@ -365,8 +368,9 @@ void superclusterData::add_callset_vars(int callset,
365
368
next_right_reach = std::numeric_limits<int >::min ();
366
369
for (int h = 0 ; h < HAPS; h++) {
367
370
if (clust_idx[h] < vars[h][ctg]->nc &&
371
+ (clust_idx[h^1 ] >= vars[h^1 ][ctg]->nc ||
368
372
vars[h][ctg]->left_reaches [clust_idx[h]] <=
369
- vars[h^1 ][ctg]->left_reaches [clust_idx[h^1 ]]) {
373
+ vars[h^1 ][ctg]->left_reaches [clust_idx[h^1 ]])) {
370
374
next_left_reach = std::min (next_left_reach,
371
375
vars[h][ctg]->left_reaches [clust_idx[h]]);
372
376
next_right_reach = std::max (next_right_reach,
@@ -375,7 +379,7 @@ void superclusterData::add_callset_vars(int callset,
375
379
}
376
380
377
381
} else { // same supercluster
378
- /* printf("same supercluster\n"); */
382
+ if (print) printf (" same supercluster\n " );
379
383
curr_left_reach = std::min (curr_left_reach, next_left_reach);
380
384
curr_right_reach = std::max (curr_right_reach, next_right_reach);
381
385
}
0 commit comments