Skip to content

Commit ddf5b30

Browse files
committed
fix: clustering bugs
- typo in printing, HAP1 instead of HAP2 copy-paste error - fixed segfault when one hap has no variants
1 parent cb8b30a commit ddf5b30

File tree

1 file changed

+69
-65
lines changed

1 file changed

+69
-65
lines changed

src/cluster.cpp

Lines changed: 69 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ sort_superclusters(std::shared_ptr<superclusterData> sc_data) {
162162
void superclusterData::add_callset_vars(int callset,
163163
std::vector< std::unordered_map< std::string,
164164
std::shared_ptr<ctgVariants> > > vars) {
165+
bool print = false;
165166

166167
for (int ctg_idx = 0; ctg_idx < int(this->contigs.size()); ctg_idx++) {
167168
std::string ctg = contigs[ctg_idx];
@@ -191,6 +192,8 @@ void superclusterData::add_callset_vars(int callset,
191192
} else if (vars[HAP2][ctg]->n) {
192193
curr_left_reach = vars[HAP2][ctg]->left_reaches[0];
193194
curr_right_reach = vars[HAP2][ctg]->right_reaches[0];
195+
} else {
196+
ERROR("No variants on contig: %s", ctg.data());
194197
}
195198

196199
// initialize indices
@@ -199,8 +202,7 @@ void superclusterData::add_callset_vars(int callset,
199202
int curr_var_idx = 0;
200203
int next_left_reach = std::numeric_limits<int>::max();
201204
int next_right_reach = std::numeric_limits<int>::min();
202-
while (var_idx[HAP1] < vars[HAP1][ctg]->n ||
203-
var_idx[HAP2] < vars[HAP2][ctg]->n) {
205+
while (var_idx[HAP1] < vars[HAP1][ctg]->n || var_idx[HAP2] < vars[HAP2][ctg]->n) {
204206

205207
//////////////////
206208
// ADD VARIANTS //
@@ -228,13 +230,13 @@ void superclusterData::add_callset_vars(int callset,
228230
vars[HAP1][ctg]->gt_quals[var_idx[HAP1]],
229231
vars[HAP1][ctg]->var_quals[var_idx[HAP1]],
230232
vars[HAP1][ctg]->phase_sets[var_idx[HAP1]]);
231-
/* printf("adding 1|1 var= %s:%d\t%s\t%s\t%s\n", */
232-
/* ctg.data(), */
233-
/* vars[HAP1][ctg]->poss[var_idx[HAP1]], */
234-
/* vars[HAP1][ctg]->refs[var_idx[HAP1]].data(), */
235-
/* vars[HAP1][ctg]->alts[var_idx[HAP1]].data(), */
236-
/* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
237-
/* ); */
233+
if (print) printf("adding 1|1 var= %s:%d\t%s\t%s\t%s\n",
234+
ctg.data(),
235+
vars[HAP1][ctg]->poss[var_idx[HAP1]],
236+
vars[HAP1][ctg]->refs[var_idx[HAP1]].data(),
237+
vars[HAP1][ctg]->alts[var_idx[HAP1]].data(),
238+
gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data()
239+
);
238240
var_idx[HAP1]++; var_idx[HAP2]++;
239241
updated[HAP1] = true; updated[HAP2] = true;
240242

@@ -261,59 +263,58 @@ void superclusterData::add_callset_vars(int callset,
261263
vars[hap_idx][ctg]->gt_quals[var_idx[hap_idx]],
262264
vars[hap_idx][ctg]->var_quals[var_idx[hap_idx]],
263265
vars[hap_idx][ctg]->phase_sets[var_idx[hap_idx]]);
264-
/* printf("adding %s var= %s:%d\t%s\t%s\t%s\n", */
265-
/* hap_idx == 0 ? "1|0" : "0|1", */
266-
/* ctg.data(), */
267-
/* vars[hap_idx][ctg]->poss[var_idx[hap_idx]], */
268-
/* vars[hap_idx][ctg]->refs[var_idx[hap_idx]].data(), */
269-
/* vars[hap_idx][ctg]->alts[var_idx[hap_idx]].data(), */
270-
/* gt_strs[vars[hap_idx][ctg]->orig_gts[var_idx[hap_idx]]].data() */
271-
/* ); */
266+
if (print) printf("adding %s var= %s:%d\t%s\t%s\t%s\n",
267+
hap_idx == 0 ? "1|0" : "0|1",
268+
ctg.data(),
269+
vars[hap_idx][ctg]->poss[var_idx[hap_idx]],
270+
vars[hap_idx][ctg]->refs[var_idx[hap_idx]].data(),
271+
vars[hap_idx][ctg]->alts[var_idx[hap_idx]].data(),
272+
gt_strs[vars[hap_idx][ctg]->orig_gts[var_idx[hap_idx]]].data()
273+
);
272274
var_idx[hap_idx]++;
273275
updated[hap_idx] = true;
274276
}
275277
} else if (var_idx[HAP1] < vars[HAP1][ctg]->n) { // only hap1 vars left
276-
merged_vars->add_var(
277-
vars[HAP1][ctg]->poss[var_idx[HAP1]],
278-
vars[HAP1][ctg]->rlens[var_idx[HAP1]],
279-
vars[HAP1][ctg]->types[var_idx[HAP1]],
280-
vars[HAP1][ctg]->locs[var_idx[HAP1]],
281-
vars[HAP1][ctg]->refs[var_idx[HAP1]],
282-
vars[HAP1][ctg]->alts[var_idx[HAP1]],
283-
vars[HAP1][ctg]->orig_gts[var_idx[HAP1]],
284-
vars[HAP1][ctg]->gt_quals[var_idx[HAP1]],
285-
vars[HAP1][ctg]->var_quals[var_idx[HAP1]],
286-
vars[HAP1][ctg]->phase_sets[var_idx[HAP1]]);
287-
/* printf("adding 1|0 var= %s:%d\t%s\t%s\t%s\n", */
288-
/* ctg.data(), */
289-
/* vars[HAP1][ctg]->poss[var_idx[HAP1]], */
290-
/* vars[HAP1][ctg]->refs[var_idx[HAP1]].data(), */
291-
/* vars[HAP1][ctg]->alts[var_idx[HAP1]].data(), */
292-
/* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
293-
/* ); */
294-
var_idx[HAP1]++;
295-
updated[HAP1] = true;
278+
merged_vars->add_var(
279+
vars[HAP1][ctg]->poss[var_idx[HAP1]],
280+
vars[HAP1][ctg]->rlens[var_idx[HAP1]],
281+
vars[HAP1][ctg]->types[var_idx[HAP1]],
282+
vars[HAP1][ctg]->locs[var_idx[HAP1]],
283+
vars[HAP1][ctg]->refs[var_idx[HAP1]],
284+
vars[HAP1][ctg]->alts[var_idx[HAP1]],
285+
vars[HAP1][ctg]->orig_gts[var_idx[HAP1]],
286+
vars[HAP1][ctg]->gt_quals[var_idx[HAP1]],
287+
vars[HAP1][ctg]->var_quals[var_idx[HAP1]],
288+
vars[HAP1][ctg]->phase_sets[var_idx[HAP1]]);
289+
if (print) printf("adding 1|0 var= %s:%d\t%s\t%s\t%s\n",
290+
ctg.data(),
291+
vars[HAP1][ctg]->poss[var_idx[HAP1]],
292+
vars[HAP1][ctg]->refs[var_idx[HAP1]].data(),
293+
vars[HAP1][ctg]->alts[var_idx[HAP1]].data(),
294+
gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data()
295+
);
296+
var_idx[HAP1]++;
297+
updated[HAP1] = true;
296298
} else if (var_idx[HAP2] < vars[HAP2][ctg]->n) { // only hap2 vars left
297-
merged_vars->add_var(
298-
vars[HAP2][ctg]->poss[var_idx[HAP2]],
299-
vars[HAP2][ctg]->rlens[var_idx[HAP2]],
300-
vars[HAP2][ctg]->types[var_idx[HAP2]],
301-
vars[HAP2][ctg]->locs[var_idx[HAP2]],
302-
vars[HAP2][ctg]->refs[var_idx[HAP2]],
303-
vars[HAP2][ctg]->alts[var_idx[HAP2]],
304-
vars[HAP2][ctg]->orig_gts[var_idx[HAP2]],
305-
vars[HAP2][ctg]->gt_quals[var_idx[HAP2]],
306-
vars[HAP2][ctg]->var_quals[var_idx[HAP2]],
307-
vars[HAP2][ctg]->phase_sets[var_idx[HAP2]]);
308-
/* printf("adding 0|1 var= %s:%d\t%s\t%s\t%s\n", */
309-
/* ctg.data(), */
310-
/* vars[HAP2][ctg]->poss[var_idx[HAP2]], */
311-
/* vars[HAP2][ctg]->refs[var_idx[HAP2]].data(), */
312-
/* vars[HAP2][ctg]->alts[var_idx[HAP2]].data(), */
313-
/* gt_strs[vars[HAP1][ctg]->orig_gts[var_idx[HAP1]]].data() */
314-
/* ); */
315-
var_idx[HAP2]++;
316-
updated[HAP2] = true;
299+
merged_vars->add_var(
300+
vars[HAP2][ctg]->poss[var_idx[HAP2]],
301+
vars[HAP2][ctg]->rlens[var_idx[HAP2]],
302+
vars[HAP2][ctg]->types[var_idx[HAP2]],
303+
vars[HAP2][ctg]->locs[var_idx[HAP2]],
304+
vars[HAP2][ctg]->refs[var_idx[HAP2]],
305+
vars[HAP2][ctg]->alts[var_idx[HAP2]],
306+
vars[HAP2][ctg]->orig_gts[var_idx[HAP2]],
307+
vars[HAP2][ctg]->gt_quals[var_idx[HAP2]],
308+
vars[HAP2][ctg]->var_quals[var_idx[HAP2]],
309+
vars[HAP2][ctg]->phase_sets[var_idx[HAP2]]);
310+
if (print) printf("adding 0|1 var= %s:%d\t%s\t%s\t%s\n",
311+
ctg.data(),
312+
vars[HAP2][ctg]->poss[var_idx[HAP2]],
313+
vars[HAP2][ctg]->refs[var_idx[HAP2]].data(),
314+
vars[HAP2][ctg]->alts[var_idx[HAP2]].data(),
315+
gt_strs[vars[HAP2][ctg]->orig_gts[var_idx[HAP2]]].data());
316+
var_idx[HAP2]++;
317+
updated[HAP2] = true;
317318
}
318319

319320
/////////////////////
@@ -332,22 +333,24 @@ void superclusterData::add_callset_vars(int callset,
332333
// update reaches
333334
for (int h = 0; h < HAPS; h++) {
334335
if (clust_idx[h] < vars[h][ctg]->nc) {
335-
if (vars[h][ctg]->left_reaches[clust_idx[h]] <=
336+
if (clust_idx[h^1] >= vars[h^1][ctg]->nc ||
337+
vars[h][ctg]->left_reaches[clust_idx[h]] <=
336338
vars[h^1][ctg]->left_reaches[clust_idx[h^1]]) {
339+
// TODO: why no std::min here?
337340
next_left_reach = vars[h][ctg]->left_reaches[clust_idx[h]];
338341
next_right_reach = std::max(next_right_reach,
339342
vars[h][ctg]->right_reaches[clust_idx[h]]);
340343
}
341344
}
342345
}
343-
/* printf("curr = (%d, %d)\tnext = (%d, %d)\n", */
344-
/* curr_left_reach, curr_right_reach, */
345-
/* next_left_reach, next_right_reach); */
346+
if (print) printf("curr = (%d, %d)\tnext = (%d, %d)\n",
347+
curr_left_reach, curr_right_reach,
348+
next_left_reach, next_right_reach);
346349

347350
// starting new supercluster
348351
if (next_left_reach > curr_right_reach) {
349-
/* printf("new supercluster, adding curr = %d (%d, %d)\n\n", */
350-
/* curr_var_idx, curr_left_reach, curr_right_reach); */
352+
if (print) printf("new supercluster, adding curr = %d (%d, %d)\n",
353+
curr_var_idx, curr_left_reach, curr_right_reach);
351354

352355
// save reaches of prev cluster
353356
merged_vars->clusters.push_back(curr_var_idx);
@@ -365,8 +368,9 @@ void superclusterData::add_callset_vars(int callset,
365368
next_right_reach = std::numeric_limits<int>::min();
366369
for (int h = 0; h < HAPS; h++) {
367370
if (clust_idx[h] < vars[h][ctg]->nc &&
371+
(clust_idx[h^1] >= vars[h^1][ctg]->nc ||
368372
vars[h][ctg]->left_reaches[clust_idx[h]] <=
369-
vars[h^1][ctg]->left_reaches[clust_idx[h^1]]) {
373+
vars[h^1][ctg]->left_reaches[clust_idx[h^1]])) {
370374
next_left_reach = std::min(next_left_reach,
371375
vars[h][ctg]->left_reaches[clust_idx[h]]);
372376
next_right_reach = std::max(next_right_reach,
@@ -375,7 +379,7 @@ void superclusterData::add_callset_vars(int callset,
375379
}
376380

377381
} else { // same supercluster
378-
/* printf("same supercluster\n"); */
382+
if (print) printf("same supercluster\n");
379383
curr_left_reach = std::min(curr_left_reach, next_left_reach);
380384
curr_right_reach = std::max(curr_right_reach, next_right_reach);
381385
}

0 commit comments

Comments
 (0)