Skip to content

Commit c2b5bfc

Browse files
authored
Merge pull request #53 from kids-first/feature/mb-fix-pandas
🔧 Fixes for numpy pandas compatibility
2 parents 1c9075a + 1d784a1 commit c2b5bfc

File tree

2 files changed

+15
-21
lines changed

2 files changed

+15
-21
lines changed

scripts/cnv_3_gistic_style.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import sys
44
import argparse
5-
import concurrent.futures
65
import json
76
import subprocess
87
import re
@@ -113,26 +112,24 @@ def mt_adjust_cn(obj):
113112
# sample list would be cbio ids
114113
samp_list = list(data.columns)[1:]
115114
bs_cbio_dict = {}
116-
# fid_dict = {}
117115
for samp_id in samp_list:
118116
bs_id = file_meta_dict[cbio_dx][samp_id]["kf_tum_id"]
119117
bs_cbio_dict[bs_id] = samp_id
120118
high_gain = config_data["cnv_high_gain"]
121119

122120
x = 1
123121
m = 50
124-
with concurrent.futures.ThreadPoolExecutor(config_data["threads"]) as executor:
125-
results = {
126-
executor.submit(mt_adjust_cn, bs_id): bs_id for bs_id in bs_cbio_dict
127-
}
128-
for result in concurrent.futures.as_completed(results):
129-
if result.result()[0] == 1:
130-
"Had trouble processing object " + result.result([1] + "\n")
131-
sys.exit(1)
132-
if x % m == 0:
133-
sys.stderr.write("Processed " + str(x) + " samples\n")
134-
sys.stderr.flush()
135-
x += 1
122+
123+
for bs_id in bs_cbio_dict:
124+
exit_code, object = mt_adjust_cn(bs_id)
125+
if exit_code == 1:
126+
sys.stderr.write("Had trouble processing object " + object + "\n")
127+
sys.exit(1)
128+
if x % m == 0:
129+
sys.stderr.write("Processed " + str(x) + " samples\n")
130+
sys.stderr.flush()
131+
x += 1
132+
136133
sys.stderr.write("Conversion completed. Writing results to file\n")
137134
new_fname = cbio_dx = (
138135
args.merged_cnv_dir + "/" + parts.group(1) + ".discrete_cnvs.txt"

scripts/get_files_from_manifest.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,12 @@ def mt_type_download(file_type):
128128
sys.stderr.write("Concatenating manifests\n")
129129
sys.stderr.flush()
130130
manifest_list = args.manifest.split(",")
131-
manifest_concat = pd.DataFrame()
131+
manifest_df_list = []
132132
for manifest in manifest_list:
133133
sys.stderr.write("Processing " + manifest + "\n")
134-
current = pd.read_csv(manifest, sep=None)
135-
if manifest_concat.empty:
136-
manifest_concat = current.copy()
137-
else:
138-
manifest_concat = manifest_concat.append(current, ignore_index=True)
134+
manifest_df_list.append(pd.read_csv(manifest, sep=None))
139135
# In the event that s3_path is empty, replace with str to trigger later sbg download
136+
manifest_concat = pd.concat(manifest_df_list, ignore_index=True)
140137
manifest_concat.s3_path = manifest_concat.s3_path.fillna('None')
141138
file_types = args.fts.split(",")
142139
# subset concatenated manifests
@@ -185,7 +182,7 @@ def mt_type_download(file_type):
185182
key_dict[key]['session'] = boto3.Session(profile_name=key)
186183
key_dict[key]['dl_client'] = key_dict[key]['session'].client("s3", config=client_config)
187184
else:
188-
key_dict[key]['manifest'] = key_dict[key]['manifest'].append(selected[selected['s3_path'].str.startswith(bucket)], ignore_index=True)
185+
key_dict[key]['manifest'] = pd.concat([key_dict[key]['manifest'], selected[selected['s3_path'].str.startswith(bucket)]], ignore_index=True)
189186
if args.sbg_profile is not None:
190187
check = 1
191188
config = sbg.Config(profile=args.sbg_profile)

0 commit comments

Comments
 (0)