Merge pull request #53 from kids-first/feature/mb-fix-pandas

migbro · web-flow · commit c2b5bfc0a2d4 · 2023-11-09T13:20:28.000-05:00
🔧 Fixes for numpy pandas compatibility
diff --git a/scripts/cnv_3_gistic_style.py b/scripts/cnv_3_gistic_style.py
@@ -2,7 +2,6 @@
 
 import sys
 import argparse
-import concurrent.futures
 import json
 import subprocess
 import re
@@ -113,26 +112,24 @@ def mt_adjust_cn(obj):
     # sample list would be cbio ids
     samp_list = list(data.columns)[1:]
     bs_cbio_dict = {}
-    # fid_dict = {}
     for samp_id in samp_list:
         bs_id = file_meta_dict[cbio_dx][samp_id]["kf_tum_id"]
         bs_cbio_dict[bs_id] = samp_id
     high_gain = config_data["cnv_high_gain"]
 
     x = 1
     m = 50
-    with concurrent.futures.ThreadPoolExecutor(config_data["threads"]) as executor:
-        results = {
-            executor.submit(mt_adjust_cn, bs_id): bs_id for bs_id in bs_cbio_dict
-        }
-        for result in concurrent.futures.as_completed(results):
-            if result.result()[0] == 1:
-                "Had trouble processing object " + result.result([1] + "\n")
-                sys.exit(1)
-            if x % m == 0:
-                sys.stderr.write("Processed " + str(x) + " samples\n")
-                sys.stderr.flush()
-            x += 1
+
+    for bs_id in bs_cbio_dict:
+        exit_code, object = mt_adjust_cn(bs_id)
+        if exit_code == 1:
+            sys.stderr.write("Had trouble processing object " + object + "\n")
+            sys.exit(1)
+        if x % m == 0:
+            sys.stderr.write("Processed " + str(x) + " samples\n")
+            sys.stderr.flush()
+        x += 1
+
     sys.stderr.write("Conversion completed.  Writing results to file\n")
     new_fname = cbio_dx = (
         args.merged_cnv_dir + "/" + parts.group(1) + ".discrete_cnvs.txt"
diff --git a/scripts/get_files_from_manifest.py b/scripts/get_files_from_manifest.py
@@ -128,15 +128,12 @@ def mt_type_download(file_type):
 sys.stderr.write("Concatenating manifests\n")
 sys.stderr.flush()
 manifest_list = args.manifest.split(",")
-manifest_concat = pd.DataFrame()
+manifest_df_list = []
 for manifest in manifest_list:
     sys.stderr.write("Processing " + manifest + "\n")
-    current = pd.read_csv(manifest, sep=None)
-    if manifest_concat.empty:
-        manifest_concat = current.copy()
-    else:
-        manifest_concat = manifest_concat.append(current, ignore_index=True)
+    manifest_df_list.append(pd.read_csv(manifest, sep=None))
 # In the event that s3_path is empty, replace with str to trigger later sbg download
+manifest_concat = pd.concat(manifest_df_list, ignore_index=True)
 manifest_concat.s3_path = manifest_concat.s3_path.fillna('None')
 file_types = args.fts.split(",")
 # subset concatenated manifests
@@ -185,7 +182,7 @@ def mt_type_download(file_type):
                 key_dict[key]['session'] = boto3.Session(profile_name=key)
                 key_dict[key]['dl_client'] = key_dict[key]['session'].client("s3", config=client_config)
             else:
-                key_dict[key]['manifest'] = key_dict[key]['manifest'].append(selected[selected['s3_path'].str.startswith(bucket)], ignore_index=True)
+                key_dict[key]['manifest'] = pd.concat([key_dict[key]['manifest'], selected[selected['s3_path'].str.startswith(bucket)]], ignore_index=True)
 if args.sbg_profile is not None:
     check = 1
     config = sbg.Config(profile=args.sbg_profile)