@@ -24,9 +24,7 @@ def filter_entry(entry, tum_id, norm_id, tid_idx, nid_idx, v_idx, h_idx, maf_exc
24
24
return None
25
25
26
26
27
- def process_maf (
28
- maf_fn , new_maf , maf_exc , tum_id , norm_id
29
- ):
27
+ def process_maf (maf_fn , new_maf , maf_exc , tum_id , norm_id ):
30
28
"""
31
29
Iterate over maf file, skipping header lines since the files are being merged.
32
30
With possiblility of mixed source, search headers
@@ -69,9 +67,7 @@ def process_maf(
69
67
cur_maf .close ()
70
68
71
69
72
- def process_tbl (
73
- cbio_dx , file_meta_dict , print_head
74
- ):
70
+ def process_tbl (cbio_dx , file_meta_dict , print_head ):
75
71
"""
76
72
Probaby a less likely scenario, but can split out into multiple projects based on dict
77
73
"""
@@ -84,28 +80,10 @@ def process_tbl(
84
80
for cbio_tum_id in file_meta_dict [cbio_dx ]:
85
81
cbio_norm_id = file_meta_dict [cbio_dx ][cbio_tum_id ]["cbio_norm_id" ]
86
82
fname = file_meta_dict [cbio_dx ][cbio_tum_id ]["fname" ]
87
- sys .stderr .write (
88
- "Found relevant maf to process for "
89
- + " "
90
- + cbio_tum_id
91
- + " "
92
- + cbio_norm_id
93
- + " "
94
- + file_meta_dict [cbio_dx ][cbio_tum_id ]["kf_tum_id" ]
95
- + " "
96
- + file_meta_dict [cbio_dx ][cbio_tum_id ]["kf_norm_id" ]
97
- + " "
98
- + fname
99
- + "\n "
100
- )
101
- sys .stderr .flush ()
102
- process_maf (
103
- maf_dir + fname ,
104
- new_maf ,
105
- maf_exc ,
106
- cbio_tum_id ,
107
- cbio_norm_id ,
108
- )
83
+ print ("Found relevant maf to process for {} {} {} {} {}" .format (
84
+ cbio_tum_id , cbio_norm_id , file_meta_dict [cbio_dx ][cbio_tum_id ]["kf_tum_id" ], file_meta_dict [cbio_dx ][cbio_tum_id ]["kf_norm_id" ], fname ),
85
+ file = sys .stderr )
86
+ process_maf (maf_dir + fname , new_maf , maf_exc , cbio_tum_id , cbio_norm_id )
109
87
x += 1
110
88
sys .stderr .write (
111
89
"Completed processing " + str (x ) + " entries in " + cbio_dx + "\n "
@@ -130,7 +108,7 @@ def process_tbl(
130
108
"-i" , "--header" , action = "store" , dest = "header" , help = "File with maf header only"
131
109
)
132
110
parser .add_argument (
133
- "-m" , "--maf-dir " , action = "store" , dest = "maf_dir " , help = "maf file directory "
111
+ "-m" , "--maf-dirs " , action = "store" , dest = "maf_dirs " , help = "comma-separated list of maf file directories "
134
112
)
135
113
parser .add_argument (
136
114
"-j" ,
@@ -155,10 +133,21 @@ def process_tbl(
155
133
args = parser .parse_args ()
156
134
with open (args .config_file ) as f :
157
135
config_data = json .load (f )
158
- # get maf file ext
159
- maf_dir = args .maf_dir
160
- if maf_dir [- 1 ] != "/" :
161
- maf_dir += "/"
136
+ # Create symlinks to mafs in one place for ease of processing
137
+ maf_dir = "MAFS/"
138
+ maf_dirs_in = args .maf_dirs
139
+ print ("Symlinking maf files from {} to {}" .format (maf_dirs_in , maf_dir ), file = sys .stderr )
140
+ os .makedirs ("MAFS" , exist_ok = True )
141
+ for dirname in maf_dirs_in .split ("," ):
142
+ abs_path = os .path .abspath (dirname )
143
+ for fname in os .listdir (dirname ):
144
+ try :
145
+ src = os .path .join (abs_path , fname )
146
+ dest = os .path .join (maf_dir , fname )
147
+ os .symlink (src , dest )
148
+ except Exception as e :
149
+ print (e , file = sys .stderr )
150
+ print ("Could not sym link {} in {}" .format (fname , dirname ))
162
151
# If DGD maf only, else if both, dgd maf wil be handled separately, or not at all if no dgd and kf only
163
152
164
153
file_meta_dict = get_file_metadata (args .table , "DGD_MAF" )
0 commit comments