Skip to content

Commit 93f1b67

Browse files
committed
supported single branch mode in csubst site
1 parent 629fd45 commit 93f1b67

File tree

7 files changed

+63
-26
lines changed

7 files changed

+63
-26
lines changed

csubst/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.0.8'
1+
__version__ = '1.1.0'

csubst/csubst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,11 @@ if __name__ == "__main__":
134134
psr_fg.add_argument('--mg_sister_stem_only', metavar='yes|no', default='yes', type=strtobool,
135135
help='default=%(default)s: Set "yes" to exclude non-stem branches of sister lineages.')
136136
psr_fg.add_argument('--fg_clade_permutation', metavar='INT', default=0, type=int,
137-
help='default=%(default)s: Randomly select the same/similar number and size of clades as foreground '
137+
help='default=%(default)s: Experimental. Randomly select the same/similar number and size of clades as foreground '
138138
'and run analysis N times to obtain a permutation-based P value of convergence. '
139139
'At least 1000 is recommended.')
140140
psr_fg.add_argument('--min_clade_bin_count', metavar='INT', default=10, type=int,
141-
help='default=%(default)s: Minimum number of branches per bin for foreground clade permutation.')
141+
help='default=%(default)s: Experimental. Minimum number of branches per bin for foreground clade permutation. ')
142142

143143
# dataset
144144
dataset = subparsers.add_parser('dataset', help='see `csubst dataset -h` or https://github.com/kfuku52/csubst/wiki', parents=[])
@@ -204,7 +204,7 @@ if __name__ == "__main__":
204204
'Used only with --pdb.')
205205
site.add_argument('--remove_ligand', metavar='CODE1,CODE2,CODE3,...', default='', type=str,
206206
help='default=%(default)s: Comma-delimited list of PDB ligand codes to be removed. '
207-
'e.g., "so4,po4,bme"'
207+
'e.g., "so4,po4,bme". '
208208
'Used only with --pdb.')
209209
site.add_argument('--mask_subunit', metavar='yes|no', default='yes', type=strtobool,
210210
help='default=%(default)s: Whether to mask unrelated subunits. '
@@ -292,7 +292,7 @@ if __name__ == "__main__":
292292
'This option should be used with --omegaC_method "modelfree".')
293293
analyze.add_argument('--asrv', metavar='no|pool|sn|each|file', default='each', type=str,
294294
choices=['no', 'pool', 'sn', 'each', 'file'],
295-
help='default=%(default)s: Correct among-site rate variation in omega/quantile calculation. '
295+
help='default=%(default)s: Experimental. Correct among-site rate variation in omega/quantile calculation. '
296296
'"no", No ASRV, meaning the uniform rate among sites. '
297297
'"pool", All categories of substitutions are pooled to calculate a single set of ASRV. '
298298
'"sn", Synonymous and nonsynonymous substitutions are processed individually '

csubst/main_analyze.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def main_analyze(g):
120120
g = parser_misc.generate_intermediate_files(g)
121121
g = parser_misc.annotate_tree(g)
122122
g = parser_misc.read_input(g)
123-
g,g['state_nuc'],g['state_cdn'],g['state_pep'] = parser_misc.prep_state(g)
123+
g = parser_misc.prep_state(g)
124124

125125
sequence.write_alignment('csubst_alignment_codon.fa', mode='codon', g=g)
126126
sequence.write_alignment('csubst_alignment_aa.fa', mode='aa', g=g)

csubst/main_site.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -138,20 +138,30 @@ def add_substitution_labels(df, SN, sub_type, SN_colors, ax, g):
138138
return ax
139139

140140
def plot_barchart(df, g):
141-
sub_types = {
142-
'_sub':'Branch-wise\nsubstitutions\nin the entire tree',
143-
'_sub_':'Branch-wise\nsubstitutions\nin the targets',
144-
'any2spe':'Posterior prob.\nof any2spe',
145-
'any2dif':'Posterior prob.\nof any2dif',
146-
}
147-
SN_color_all = {
148-
'_sub': {'N':'black', 'S':'gainsboro'},
149-
'_sub_': {'N':'black', 'S':'gainsboro'},
150-
'any2spe': {'N':'red', 'S':'gainsboro'},
151-
'any2dif': {'N':'blue', 'S':'gainsboro'},
152-
}
141+
if g['single_branch_mode']:
142+
sub_types = {
143+
'_sub':'Branch-wise\nsubstitutions\nin the entire tree',
144+
'any2any':'Branch-wise\nsubstitutions\nin the targets', # Identical to branch-wise substitutions in the targets
145+
}
146+
SN_color_all = {
147+
'_sub': {'N':'black', 'S':'gainsboro'},
148+
'any2any': {'N':'purple', 'S':'gainsboro'}, # Identical to branch-wise substitutions in the targets
149+
}
150+
else:
151+
sub_types = {
152+
'_sub':'Branch-wise\nsubstitutions\nin the entire tree',
153+
'_sub_':'Branch-wise\nsubstitutions\nin the targets',
154+
'any2spe':'Posterior prob.\nof any2spe',
155+
'any2dif':'Posterior prob.\nof any2dif',
156+
}
157+
SN_color_all = {
158+
'_sub': {'N':'black', 'S':'gainsboro'},
159+
'_sub_': {'N':'black', 'S':'gainsboro'},
160+
'any2spe': {'N':'red', 'S':'gainsboro'},
161+
'any2dif': {'N':'blue', 'S':'gainsboro'},
162+
}
153163
num_row = len(sub_types)
154-
fig,axes = matplotlib.pyplot.subplots(nrows=num_row, ncols=1, figsize=(7.2, 4.8), sharex=True)
164+
fig,axes = matplotlib.pyplot.subplots(nrows=num_row, ncols=1, figsize=(7.2, 1.2*len(sub_types)), sharex=True)
155165
axes = axes.flat
156166
i = 0
157167
NS_ymax = df.loc[:,['N_sub','S_sub']].sum(axis=1).max() + 0.5
@@ -687,27 +697,37 @@ def pdb_sequence_search(g):
687697
pdb_id = None
688698
return pdb_id
689699

700+
def combinatorial2single_columns(df):
701+
for SN in ['OCS','OCN']:
702+
for anc in ['any','spe','dif']:
703+
for des in ['any', 'spe', 'dif']:
704+
col = SN+anc+'2'+des
705+
if col in df.columns:
706+
df = df.drop(labels=col, axis=1)
707+
return df
708+
690709
def main_site(g):
691710
if g['pdb'] is not None:
692711
from csubst import parser_pymol
693-
if g['pdb'] =='besthit':
694-
g['run_pdb_sequence_search'] = True
695-
else:
696-
g['run_pdb_sequence_search'] = False
697712
print("Reading and parsing input files.", flush=True)
698713
g['codon_table'] = genetic_code.get_codon_table(ncbi_id=g['genetic_code'])
699714
g = tree.read_treefile(g)
700715
g = parser_misc.generate_intermediate_files(g)
701716
g = parser_misc.annotate_tree(g)
702717
g = parser_misc.read_input(g)
703-
g,g['state_nuc'],g['state_cdn'],g['state_pep'] = parser_misc.prep_state(g)
718+
g = parser_misc.prep_state(g)
704719
N_tensor = substitution.get_substitution_tensor(state_tensor=g['state_pep'], mode='asis', g=g, mmap_attr='N')
705720
N_tensor = substitution.apply_min_sub_pp(g, N_tensor)
706721
S_tensor = substitution.get_substitution_tensor(state_tensor=g['state_cdn'], mode='syn', g=g, mmap_attr='S')
707722
S_tensor = substitution.apply_min_sub_pp(g, S_tensor)
708723
g = add_branch_id_list(g)
709724
for branch_ids in g['branch_id_list']:
710725
print('\nProcessing branch_ids: {}'.format(','.join([ str(bid) for bid in branch_ids ])), flush=True)
726+
if len(branch_ids)==1:
727+
print('Single branch mode. Substitutions, rather than combinatorial substitutions, will be mapped.')
728+
g['single_branch_mode'] = True
729+
else:
730+
g['single_branch_mode'] = False
711731
g['branch_ids'] = branch_ids
712732
g['site_outdir'] = './csubst_site.branch_id'+','.join([ str(bid) for bid in branch_ids ])
713733
if not os.path.exists(g['site_outdir']):
@@ -749,6 +769,8 @@ def main_site(g):
749769
else:
750770
out_file = 'csubst_site.'+re.sub('.pdb$', '', os.path.basename(g['pdb']))+'.tsv'
751771
out_path = os.path.join(g['site_outdir'], out_file)
772+
if g['single_branch_mode']:
773+
df = combinatorial2single_columns(df)
752774
df.to_csv(out_path, sep="\t", index=False, float_format=g['float_format'], chunksize=10000)
753775
print('To visualize the convergence probability on protein structure, please see: https://github.com/kfuku52/csubst/wiki')
754776
print('')

csubst/param.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ def get_global_parameters(args):
3737
elif (g['float_type']==64):
3838
g['float_type'] = numpy.float64
3939
g['float_tol'] = 10**-9
40+
if 'pdb' in g.keys():
41+
if g['pdb']=='besthit':
42+
g['run_pdb_sequence_search'] = True
43+
else:
44+
g['run_pdb_sequence_search'] = False
4045
if 'percent_biased_sub' in g.keys():
4146
assert (g['percent_biased_sub']<100), '--percent_biased_sub should be <100.'
4247
if 'float_digit' in g.keys():

csubst/parser_misc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,10 @@ def prep_state(g):
218218
elif g['input_data_type'] == 'cdn':
219219
state_cdn = parser_iqtree.get_state_tensor(g)
220220
state_pep = sequence.cdn2pep_state(state_cdn=state_cdn, g=g)
221-
return g,state_nuc,state_cdn,state_pep
221+
g['state_nuc'] = state_nuc
222+
g['state_cdn'] = state_cdn
223+
g['state_pep'] = state_pep
224+
return g
222225

223226
def read_exchangeability_matrix(file, codon_orders):
224227
txt = pkg_resources.resource_string(__name__, file)

csubst/parser_pymol.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pymol
44

55
from io import StringIO
6-
#import itertools
6+
import copy
77
import os
88
import re
99
import subprocess
@@ -214,13 +214,20 @@ def set_substitution_colors(df, g, object_names, N_sub_cols):
214214
color_sites['OCNany2dif'].append(codon_site)
215215
elif (prob_single_sub>=g['pymol_min_single_prob']):
216216
color_sites['single_sub'].append(codon_site)
217+
if g['single_branch_mode']:
218+
color_sites['single_branch_N'] = copy.deepcopy(color_sites['OCNany2spe'])
219+
del color_sites['OCNany2spe']
220+
del color_sites['OCNany2dif']
221+
del color_sites['single_sub']
217222
for key in color_sites.keys():
218223
if key=='OCNany2spe':
219224
hex_value = utility.rgb_to_hex(r=1, g=0, b=0)
220225
elif key=='OCNany2dif':
221226
hex_value = utility.rgb_to_hex(r=0, g=0, b=1)
222227
elif key=='single_sub':
223228
hex_value = utility.rgb_to_hex(r=0.4, g=0.4, b=0.4)
229+
elif key=='single_branch_N':
230+
hex_value = utility.rgb_to_hex(r=0.5, g=0, b=0.5)
224231
print('Amino acid sites with {} will be painted with {}.'.format(key, hex_value), flush=True)
225232
txt_resi = '+'.join([str(site) for site in color_sites[key]])
226233
cmd_color = "color {}, {} and chain {} and resi {}"

0 commit comments

Comments
 (0)