Skip to content

Commit

Permalink
Merge pull request #61 from kids-first/feature/pbta-updates-2024-06-24
Browse files Browse the repository at this point in the history
🔧 Fix Missing Field
  • Loading branch information
migbro authored Jun 20, 2024
2 parents 49157bb + 8007e68 commit 61738ac
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 7 deletions.
5 changes: 3 additions & 2 deletions COLLABORATIONS/openTARGETS/header_desc.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ cohort_participant_id EXTERNAL_PATIENT_ID External Patient Identifier 0 1 STRING
formatted_sample_id SAMPLE_ID 1 0 STRING 98 7316-1069-T-353281.WGS
Kids_First_Biospecimen_ID SPECIMEN_ID KFDRC tumor biopsecimen ID 1 0 STRING 13 BS_A9S5HT6P
broad_histology CANCER_TYPE 1 0 STRING 12 Benign tumor
molecular_subtype MOLECULAR_SUBTYPE Molecular subtype defined by WHO 2021 guidelines 1 0 STRING 12 EPN, PF A
cancer_group HISTOLOGY 1 0 STRING 11 Adenoma
harmonized_diagnosis CANCER_TYPE_DETAILED 1 0 STRING 10 Adenoma
primary_site TUMOR_TISSUE_SITE 1 0 STRING 9 Suprasellar/Hypothalamic/Pituitary
tumor_descriptor TUMOR_TYPE 1 0 STRING 8 Initial CNS Tumor
composition SAMPLE_TYPE 1 0 STRING 7 Solid Tissue
cohort COHORT Source study cohort name 1 0 STRING 6
sub_cohort SUB_COHORT Source study sub-cohort name 1 0 STRING 6
cohort COHORT Source study cohort name 1 0 STRING 6 PBTA
sub_cohort SUB_COHORT Source study sub-cohort name 1 0 STRING 6 DGD
CNS_region 1 0 STRING 5 Suprasellar
tumor_ploidy 1 0 NUMBER 4 3
tumor_fraction 1 0 NUMBER 3 0.476369391
Expand Down
32 changes: 30 additions & 2 deletions STUDY_CONFIGS/pbta_all_treatment_meta_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,37 @@
"cbio_name": "data_clinical_timeline_imaging.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE",
"data_filename": "data_clinical_timeline_imaging.txt"
"datatype": "TIMELINE"
}
},
"clinical_event": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_clinical_event.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
},
"specimen": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_specimen.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
},
"surgery": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_surgery.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
},
"treatment": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_treatment.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion scripts/get_study_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def generic_print(out_file, rows, colnames):
out_file.write("\t".join(colnames) + "\n")
for row in rows:
# convert None to empty str
new_row = [str(i or '') for i in row]
new_row = ["" if i is None else str(i) for i in row]
out_file.write("\t".join(new_row) + "\n")
out_file.close()
return 0
Expand Down
48 changes: 48 additions & 0 deletions scripts/get_study_participant_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python3
"""
Script to pull patient IDs from a study on pedcbioportal
"""

import argparse
from bravado.client import SwaggerClient
from bravado.requests_client import RequestsClient
from urllib.parse import urlparse

def main():
parser = argparse.ArgumentParser(
description="Pull patient IDs from a study on pedcbioportal"
)
parser.add_argument(
"-u", "--url", action="store", dest="url", help="url to search against", default="https://pedcbioportal.kidsfirstdrc.org/api/v2/api-docs"
)
parser.add_argument(
"-s", "--study", action="store", dest="study", help="Cancer study ID to compare on server"
)
parser.add_argument(
"-t", "--token", action="store", dest="token", help="Token file obtained from Web API"
)

args = parser.parse_args()
with open(args.token, 'r') as token_file:
token = token_file.read().rstrip().split(': ')[1]

url_object = urlparse(args.url)

http_client = RequestsClient()
http_client.set_api_key(
'{}'.format(url_object.hostname), 'Bearer {}'.format(token),
param_name='Authorization', param_in='header'
)

cbioportal = SwaggerClient.from_url(args.url,
http_client=http_client,
config={"validate_requests":False,
"validate_responses":False,
"validate_swagger_spec": False}
)

pt_list = cbioportal.Patients.getAllPatientsInStudyUsingGET(studyId=args.study).result()
print("\n".join([x.patientId for x in pt_list]))

if __name__ == '__main__':
main()
2 changes: 0 additions & 2 deletions scripts/organize_upload_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def process_meta_data(meta_data, output_dir, canc_study_id):
subprocess.call(cmd, shell=True)
except Exception as e:
sys.stderr.write(str(e) + " failed processing meta data file\n")
pdb.set_trace()
hold = 1


def process_clinical_data(meta_data, output_dir, canc_study_id):
Expand Down
File renamed without changes.
35 changes: 35 additions & 0 deletions utilities/subtract_by_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/python
"""
Script that can be cleaned up.
Removes entries from a table using a list of banned values.
Usage:
python subtract_by_id.py <id_list> <colname> <out_flag> <in_file>
To have no out_flag, provide "SKIP_THIS" as the value
"""

import sys
import pdb

id_list = {}
with open(sys.argv[1]) as rm_list:
for line in rm_list:
id_list[line.rstrip('\n')] = 0
colname = sys.argv[2]
out_flag = sys.argv[3]
with open(sys.argv[4]) as in_file:
head = next(in_file)
header = head.rstrip('\n').split('\t')
c_idx = header.index(colname)
o_idx = None
if out_flag != "SKIP_THIS":
out_list = []
o_idx = header.index(out_flag)
print(head, end='')
for line in in_file:
info = line.rstrip('\n').split('\t')
if info[c_idx] not in id_list:
print(line, end='')
elif o_idx is not None:
out_list.append(info[o_idx])
if o_idx is not None:
print("\n".join(list(set(out_list))), file=sys.stderr)

0 comments on commit 61738ac

Please sign in to comment.