@@ -138,20 +138,30 @@ def add_substitution_labels(df, SN, sub_type, SN_colors, ax, g):
138138 return ax
139139
140140def plot_barchart (df , g ):
141- sub_types = {
142- '_sub' :'Branch-wise\n substitutions\n in the entire tree' ,
143- '_sub_' :'Branch-wise\n substitutions\n in the targets' ,
144- 'any2spe' :'Posterior prob.\n of any2spe' ,
145- 'any2dif' :'Posterior prob.\n of any2dif' ,
146- }
147- SN_color_all = {
148- '_sub' : {'N' :'black' , 'S' :'gainsboro' },
149- '_sub_' : {'N' :'black' , 'S' :'gainsboro' },
150- 'any2spe' : {'N' :'red' , 'S' :'gainsboro' },
151- 'any2dif' : {'N' :'blue' , 'S' :'gainsboro' },
152- }
141+ if g ['single_branch_mode' ]:
142+ sub_types = {
143+ '_sub' :'Branch-wise\n substitutions\n in the entire tree' ,
144+ 'any2any' :'Branch-wise\n substitutions\n in the targets' , # Identical to branch-wise substitutions in the targets
145+ }
146+ SN_color_all = {
147+ '_sub' : {'N' :'black' , 'S' :'gainsboro' },
148+ 'any2any' : {'N' :'purple' , 'S' :'gainsboro' }, # Identical to branch-wise substitutions in the targets
149+ }
150+ else :
151+ sub_types = {
152+ '_sub' :'Branch-wise\n substitutions\n in the entire tree' ,
153+ '_sub_' :'Branch-wise\n substitutions\n in the targets' ,
154+ 'any2spe' :'Posterior prob.\n of any2spe' ,
155+ 'any2dif' :'Posterior prob.\n of any2dif' ,
156+ }
157+ SN_color_all = {
158+ '_sub' : {'N' :'black' , 'S' :'gainsboro' },
159+ '_sub_' : {'N' :'black' , 'S' :'gainsboro' },
160+ 'any2spe' : {'N' :'red' , 'S' :'gainsboro' },
161+ 'any2dif' : {'N' :'blue' , 'S' :'gainsboro' },
162+ }
153163 num_row = len (sub_types )
154- fig ,axes = matplotlib .pyplot .subplots (nrows = num_row , ncols = 1 , figsize = (7.2 , 4.8 ), sharex = True )
164+ fig ,axes = matplotlib .pyplot .subplots (nrows = num_row , ncols = 1 , figsize = (7.2 , 1.2 * len ( sub_types ) ), sharex = True )
155165 axes = axes .flat
156166 i = 0
157167 NS_ymax = df .loc [:,['N_sub' ,'S_sub' ]].sum (axis = 1 ).max () + 0.5
@@ -687,27 +697,37 @@ def pdb_sequence_search(g):
687697 pdb_id = None
688698 return pdb_id
689699
700+ def combinatorial2single_columns (df ):
701+ for SN in ['OCS' ,'OCN' ]:
702+ for anc in ['any' ,'spe' ,'dif' ]:
703+ for des in ['any' , 'spe' , 'dif' ]:
704+ col = SN + anc + '2' + des
705+ if col in df .columns :
706+ df = df .drop (labels = col , axis = 1 )
707+ return df
708+
690709def main_site (g ):
691710 if g ['pdb' ] is not None :
692711 from csubst import parser_pymol
693- if g ['pdb' ] == 'besthit' :
694- g ['run_pdb_sequence_search' ] = True
695- else :
696- g ['run_pdb_sequence_search' ] = False
697712 print ("Reading and parsing input files." , flush = True )
698713 g ['codon_table' ] = genetic_code .get_codon_table (ncbi_id = g ['genetic_code' ])
699714 g = tree .read_treefile (g )
700715 g = parser_misc .generate_intermediate_files (g )
701716 g = parser_misc .annotate_tree (g )
702717 g = parser_misc .read_input (g )
703- g , g [ 'state_nuc' ], g [ 'state_cdn' ], g [ 'state_pep' ] = parser_misc .prep_state (g )
718+ g = parser_misc .prep_state (g )
704719 N_tensor = substitution .get_substitution_tensor (state_tensor = g ['state_pep' ], mode = 'asis' , g = g , mmap_attr = 'N' )
705720 N_tensor = substitution .apply_min_sub_pp (g , N_tensor )
706721 S_tensor = substitution .get_substitution_tensor (state_tensor = g ['state_cdn' ], mode = 'syn' , g = g , mmap_attr = 'S' )
707722 S_tensor = substitution .apply_min_sub_pp (g , S_tensor )
708723 g = add_branch_id_list (g )
709724 for branch_ids in g ['branch_id_list' ]:
710725 print ('\n Processing branch_ids: {}' .format (',' .join ([ str (bid ) for bid in branch_ids ])), flush = True )
726+ if len (branch_ids )== 1 :
727+ print ('Single branch mode. Substitutions, rather than combinatorial substitutions, will be mapped.' )
728+ g ['single_branch_mode' ] = True
729+ else :
730+ g ['single_branch_mode' ] = False
711731 g ['branch_ids' ] = branch_ids
712732 g ['site_outdir' ] = './csubst_site.branch_id' + ',' .join ([ str (bid ) for bid in branch_ids ])
713733 if not os .path .exists (g ['site_outdir' ]):
@@ -749,6 +769,8 @@ def main_site(g):
749769 else :
750770 out_file = 'csubst_site.' + re .sub ('.pdb$' , '' , os .path .basename (g ['pdb' ]))+ '.tsv'
751771 out_path = os .path .join (g ['site_outdir' ], out_file )
772+ if g ['single_branch_mode' ]:
773+ df = combinatorial2single_columns (df )
752774 df .to_csv (out_path , sep = "\t " , index = False , float_format = g ['float_format' ], chunksize = 10000 )
753775 print ('To visualize the convergence probability on protein structure, please see: https://github.com/kfuku52/csubst/wiki' )
754776 print ('' )
0 commit comments