@@ -24,10 +24,10 @@ def combinations_count(n, r):
2424def get_df_clade_size (g , trait_name ):
2525 num_branch = max ([ n .numerical_label for n in g ['tree' ].traverse () ])
2626 branch_ids = numpy .arange (num_branch )
27- cols = ['branch_id' ,'size' ,'is_fg_stem' ]
27+ cols = ['branch_id' ,'size' ,'is_fg_stem_' + trait_name ]
2828 df_clade_size = pandas .DataFrame (index = branch_ids , columns = cols )
2929 df_clade_size .loc [:,'branch_id' ] = branch_ids
30- df_clade_size .loc [:,'is_fg_stem' ] = False
30+ df_clade_size .loc [:,'is_fg_stem_' + trait_name ] = False
3131 for node in g ['tree' ].traverse ():
3232 if node .is_root ():
3333 continue
@@ -36,7 +36,7 @@ def get_df_clade_size(g, trait_name):
3636 is_fg = getattr (node , 'is_fg_' + trait_name )
3737 is_parent_fg = getattr (node .up , 'is_fg_' + trait_name )
3838 if (is_fg )& (~ is_parent_fg ):
39- df_clade_size .at [bid ,'is_fg_stem' ] = True
39+ df_clade_size .at [bid ,'is_fg_stem_' + trait_name ] = True
4040 return df_clade_size
4141
4242def foreground_clade_randomization (df_clade_size , g , sample_original_foreground = False ):
@@ -51,7 +51,7 @@ def foreground_clade_randomization(df_clade_size, g, sample_original_foreground=
5151 if sample_original_foreground :
5252 is_size = (size_array == size )
5353 else :
54- is_size = ((size_array == size )& (~ df_clade_size .loc [:,'is_fg_stem' ]))
54+ is_size = ((size_array == size )& (~ df_clade_size .loc [:,'is_fg_stem_' + trait_name ]))
5555 count += is_size .sum ()
5656 if (count >= g ['min_clade_bin_count' ]):
5757 bins = numpy .append (bins , size )
@@ -63,9 +63,9 @@ def foreground_clade_randomization(df_clade_size, g, sample_original_foreground=
6363 print (txt .format (bins .shape [0 ]- 1 , ', ' .join ([ str (s ) for s in bins ]), ', ' .join ([ str (s ) for s in counts ])))
6464 bins = bins [::- 1 ]
6565 df_clade_size .loc [:,'bin' ] = numpy .digitize (size_array , bins , right = False )
66- is_fg = (df_clade_size .loc [:,'is_fg_stem' ]== True )
66+ is_fg = (df_clade_size .loc [:,'is_fg_stem_' + trait_name ]== True )
6767 fg_bins = df_clade_size .loc [is_fg ,'bin' ]
68- df_clade_size .loc [:,'is_fg_stem_randomized' ] = df_clade_size .loc [:,'is_fg_stem' ]
68+ df_clade_size .loc [:,'is_fg_stem_randomized' ] = df_clade_size .loc [:,'is_fg_stem_' + trait_name ]
6969 df_clade_size .loc [:,'is_blocked' ] = False
7070 for bin in fg_bins .unique ():
7171 is_bin = (df_clade_size .loc [:,'bin' ]== bin )
@@ -295,7 +295,7 @@ def get_foreground_branch(g, simulate=False):
295295def print_num_possible_permuted_combinations (df_clade_size , sample_original_foreground ):
296296 import scipy
297297 num_possible_permutation_combination = 1
298- is_fg_stem = df_clade_size .loc [:, 'is_fg_stem' ].values
298+ is_fg_stem = df_clade_size .loc [:, 'is_fg_stem_' + trait_name ].values
299299 for bin_no in df_clade_size .loc [:, 'bin' ].unique ():
300300 is_bin = (df_clade_size .loc [:, 'bin' ] == bin_no )
301301 num_bin_fg = (is_bin & is_fg_stem ).sum ()
@@ -367,10 +367,6 @@ def get_marginal_branch(g):
367367 f .write (str (x )+ '\n ' )
368368 return g
369369
370- def calculate_fg_or_mg_branch_num (row , bid_cols , id_set , arity ):
371- branch_id_set = set (row [bid_cols ])
372- return arity - len (branch_id_set .difference (id_set ))
373-
374370def get_foreground_branch_num (cb , g ):
375371 start_time = time .time ()
376372 bid_cols = cb .columns [cb .columns .str .startswith ('branch_id_' )]
@@ -380,7 +376,8 @@ def get_foreground_branch_num(cb, g):
380376 for id_key ,newcol in zip (['fg_ids' ,'mg_ids' ],['branch_num_fg_' + trait_name ,'branch_num_mg_' + trait_name ]):
381377 id_set = set (g [id_key ][trait_name ])
382378 cb .loc [:,newcol ] = 0
383- cb [newcol ] = cb .apply (calculate_fg_or_mg_branch_num , axis = 1 , args = (bid_cols , id_set , arity ))
379+ for bid_col in bid_cols :
380+ cb .loc [(cb [bid_col ].isin (id_set )),newcol ] += 1
384381 cb .loc [:,'is_fg_' + trait_name ] = 'N'
385382 cb .loc [(cb .loc [:,'branch_num_fg_' + trait_name ]== arity ),'is_fg_' + trait_name ] = 'Y'
386383 for i in numpy .arange (g ['fg_dependent_id_combinations' ][trait_name ].shape [0 ]):
@@ -394,6 +391,11 @@ def get_foreground_branch_num(cb, g):
394391 is_mg = (cb ['branch_num_fg_' + trait_name ]> 0 ) & (cb ['branch_num_mg_' + trait_name ]> 0 )
395392 is_mg = (is_mg ) & ((cb ['branch_num_fg_' + trait_name ] + cb ['branch_num_mg_' + trait_name ])== arity )
396393 cb .loc [is_mg ,'is_mf_' + trait_name ] = 'Y'
394+ df_clade_size = get_df_clade_size (g , trait_name )
395+ fg_stem_bids = df_clade_size .loc [df_clade_size .loc [:,'is_fg_stem_' + trait_name ],'branch_id' ].values
396+ cb .loc [:,'branch_num_fg_stem_' + trait_name ] = 0
397+ for bid_col in bid_cols :
398+ cb .loc [(cb [bid_col ].isin (fg_stem_bids )),'branch_num_fg_stem_' + trait_name ] += 1
397399 is_fg = (cb ['is_fg_' + trait_name ]== 'Y' )
398400 is_enough_stat = table .get_cutoff_stat_bool_array (cb = cb , cutoff_stat_str = g ['cutoff_stat' ])
399401 num_enough = is_enough_stat .sum ()
0 commit comments