Skip to content

Commit e2b8f91

Browse files
committed
supported branch_num_fg in cb tables
1 parent 76ea4db commit e2b8f91

File tree

2 files changed

+15
-13
lines changed

2 files changed

+15
-13
lines changed

csubst/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.4.12'
1+
__version__ = '1.4.13'

csubst/foreground.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ def combinations_count(n, r):
2424
def get_df_clade_size(g, trait_name):
2525
num_branch = max([ n.numerical_label for n in g['tree'].traverse() ])
2626
branch_ids = numpy.arange(num_branch)
27-
cols = ['branch_id','size','is_fg_stem']
27+
cols = ['branch_id','size','is_fg_stem_'+trait_name]
2828
df_clade_size = pandas.DataFrame(index=branch_ids, columns=cols)
2929
df_clade_size.loc[:,'branch_id'] = branch_ids
30-
df_clade_size.loc[:,'is_fg_stem'] = False
30+
df_clade_size.loc[:,'is_fg_stem_'+trait_name] = False
3131
for node in g['tree'].traverse():
3232
if node.is_root():
3333
continue
@@ -36,7 +36,7 @@ def get_df_clade_size(g, trait_name):
3636
is_fg = getattr(node, 'is_fg_'+trait_name)
3737
is_parent_fg = getattr(node.up, 'is_fg_'+trait_name)
3838
if (is_fg)&(~is_parent_fg):
39-
df_clade_size.at[bid,'is_fg_stem'] = True
39+
df_clade_size.at[bid,'is_fg_stem_'+trait_name] = True
4040
return df_clade_size
4141

4242
def foreground_clade_randomization(df_clade_size, g, sample_original_foreground=False):
@@ -51,7 +51,7 @@ def foreground_clade_randomization(df_clade_size, g, sample_original_foreground=
5151
if sample_original_foreground:
5252
is_size = (size_array==size)
5353
else:
54-
is_size = ((size_array==size)&(~df_clade_size.loc[:,'is_fg_stem']))
54+
is_size = ((size_array==size)&(~df_clade_size.loc[:,'is_fg_stem_'+trait_name]))
5555
count += is_size.sum()
5656
if (count >= g['min_clade_bin_count']):
5757
bins = numpy.append(bins, size)
@@ -63,9 +63,9 @@ def foreground_clade_randomization(df_clade_size, g, sample_original_foreground=
6363
print(txt.format(bins.shape[0]-1, ', '.join([ str(s) for s in bins ]), ', '.join([ str(s) for s in counts ])))
6464
bins = bins[::-1]
6565
df_clade_size.loc[:,'bin'] = numpy.digitize(size_array, bins, right=False)
66-
is_fg = (df_clade_size.loc[:,'is_fg_stem']==True)
66+
is_fg = (df_clade_size.loc[:,'is_fg_stem_'+trait_name]==True)
6767
fg_bins = df_clade_size.loc[is_fg,'bin']
68-
df_clade_size.loc[:,'is_fg_stem_randomized'] = df_clade_size.loc[:,'is_fg_stem']
68+
df_clade_size.loc[:,'is_fg_stem_randomized'] = df_clade_size.loc[:,'is_fg_stem_'+trait_name]
6969
df_clade_size.loc[:,'is_blocked'] = False
7070
for bin in fg_bins.unique():
7171
is_bin = (df_clade_size.loc[:,'bin']==bin)
@@ -295,7 +295,7 @@ def get_foreground_branch(g, simulate=False):
295295
def print_num_possible_permuted_combinations(df_clade_size, sample_original_foreground):
296296
import scipy
297297
num_possible_permutation_combination = 1
298-
is_fg_stem = df_clade_size.loc[:, 'is_fg_stem'].values
298+
is_fg_stem = df_clade_size.loc[:, 'is_fg_stem_'+trait_name].values
299299
for bin_no in df_clade_size.loc[:, 'bin'].unique():
300300
is_bin = (df_clade_size.loc[:, 'bin'] == bin_no)
301301
num_bin_fg = (is_bin & is_fg_stem).sum()
@@ -367,10 +367,6 @@ def get_marginal_branch(g):
367367
f.write(str(x)+'\n')
368368
return g
369369

370-
def calculate_fg_or_mg_branch_num(row, bid_cols, id_set, arity):
371-
branch_id_set = set(row[bid_cols])
372-
return arity - len(branch_id_set.difference(id_set))
373-
374370
def get_foreground_branch_num(cb, g):
375371
start_time = time.time()
376372
bid_cols = cb.columns[cb.columns.str.startswith('branch_id_')]
@@ -380,7 +376,8 @@ def get_foreground_branch_num(cb, g):
380376
for id_key,newcol in zip(['fg_ids','mg_ids'],['branch_num_fg_'+trait_name,'branch_num_mg_'+trait_name]):
381377
id_set = set(g[id_key][trait_name])
382378
cb.loc[:,newcol] = 0
383-
cb[newcol] = cb.apply(calculate_fg_or_mg_branch_num, axis=1, args=(bid_cols, id_set, arity))
379+
for bid_col in bid_cols:
380+
cb.loc[(cb[bid_col].isin(id_set)),newcol] += 1
384381
cb.loc[:,'is_fg_'+trait_name] = 'N'
385382
cb.loc[(cb.loc[:,'branch_num_fg_'+trait_name]==arity),'is_fg_'+trait_name] = 'Y'
386383
for i in numpy.arange(g['fg_dependent_id_combinations'][trait_name].shape[0]):
@@ -394,6 +391,11 @@ def get_foreground_branch_num(cb, g):
394391
is_mg = (cb['branch_num_fg_'+trait_name]>0) & (cb['branch_num_mg_'+trait_name]>0)
395392
is_mg = (is_mg) & ((cb['branch_num_fg_'+trait_name] + cb['branch_num_mg_'+trait_name])==arity)
396393
cb.loc[is_mg,'is_mf_'+trait_name] = 'Y'
394+
df_clade_size = get_df_clade_size(g, trait_name)
395+
fg_stem_bids = df_clade_size.loc[df_clade_size.loc[:,'is_fg_stem_'+trait_name],'branch_id'].values
396+
cb.loc[:,'branch_num_fg_stem_'+trait_name] = 0
397+
for bid_col in bid_cols:
398+
cb.loc[(cb[bid_col].isin(fg_stem_bids)),'branch_num_fg_stem_'+trait_name] += 1
397399
is_fg = (cb['is_fg_'+trait_name]=='Y')
398400
is_enough_stat = table.get_cutoff_stat_bool_array(cb=cb, cutoff_stat_str=g['cutoff_stat'])
399401
num_enough = is_enough_stat.sum()

0 commit comments

Comments
 (0)