Skip to content

Commit

Permalink
Merge pull request #62 from kids-first/feature/mb-add-treatment-to-pb…
Browse files Browse the repository at this point in the history
…ta-all

✨ Add Treatment Views to pbta_all
  • Loading branch information
migbro authored Jun 21, 2024
2 parents 61738ac + 71885f6 commit 5d94e25
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@
"datatype": "SAMPLE_ATTRIBUTES"
}
}

}
},
"study": {
Expand Down
63 changes: 37 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,44 @@ In the end, if you named your output dir `processed`, you'll end up with this ex
processed
└── pbta_all
├── case_lists
│   ├── cases_3way_complete.txt
│   ├── cases_RNA_Seq_v2_mRNA.txt
│   ├── cases_all.txt
│   ├── cases_cna.txt
│   ├── cases_cnaseq.txt
│   ├── cases_sequenced.txt
│   └── cases_sv.txt
├── data_CNA.txt -> /home/ubuntu/mount/pbta_all/merged_cnvs/pbta_all.discrete_cnvs.txt
├── data_clinical_patient.txt -> /home/ubuntu/mount/pbta_all/datasheets/data_clinical_patient.txt
├── data_clinical_sample.txt -> /home/ubuntu/mount/pbta_all/datasheets/data_clinical_sample.txt
├── data_cna.seg.txt -> /home/ubuntu/mount/pbta_all/merged_cnvs/pbta_all.merged_seg.txt
├── data_linear_CNA.txt -> /home/ubuntu/mount/pbta_all/merged_cnvs/pbta_all.predicted_cnv.txt
├── data_mutations_extended.txt -> /home/ubuntu/mount/pbta_all/merged_mafs/pbta_all.maf
├── data_rna_seq_v2_mrna.txt -> /home/ubuntu/mount/pbta_all/merged_rsem/pbta_all.rsem_merged.txt
├── data_rna_seq_v2_mrna_median_Zscores.txt -> /home/ubuntu/mount/pbta_all/merged_rsem/pbta_all.rsem_merged_zscore.txt
├── data_sv.txt -> /home/ubuntu/mount/pbta_all/pbta_all.fusions.txt
├── meta_CNA.txt
├── meta_SV.txt
├── meta_clinical_patient.txt
├── meta_clinical_sample.txt
├── meta_cna.seg.txt
├── meta_linear_CNA.txt
├── meta_mutations_extended.txt
├── meta_rna_seq_v2_mrna.txt
├── meta_rna_seq_v2_mrna_median_Zscores.txt
└── meta_study.txt
│   ├── cases_3way_complete.txt
│   ├── cases_RNA_Seq_v2_mRNA.txt
│   ├── cases_all.txt
│   ├── cases_cna.txt
│   ├── cases_cnaseq.txt
│   ├── cases_sequenced.txt
│   └── cases_sv.txt
├── data_CNA.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_cnvs/pbta_all.discrete_cnvs.txt
├── data_clinical_patient.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_patient.txt
├── data_clinical_sample.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_sample.txt
├── data_clinical_timeline_clinical_event.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_timeline_clinical_event.txt
├── data_clinical_timeline_imaging.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_timeline_imaging.txt
├── data_clinical_timeline_specimen.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_timeline_specimen.txt
├── data_clinical_timeline_surgery.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_timeline_surgery.txt
├── data_clinical_timeline_treatment.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/datasheets/data_clinical_timeline_treatment.txt
├── data_cna.seg.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_cnvs/pbta_all.merged_seg.txt
├── data_linear_CNA.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_cnvs/pbta_all.predicted_cnv.txt
├── data_mutations_extended.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_mafs/pbta_all.maf
├── data_rna_seq_v2_mrna.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_rsem/pbta_all.rsem_merged.txt
├── data_rna_seq_v2_mrna_median_Zscores.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_rsem/pbta_all.rsem_merged_zscore.txt
├── data_sv.txt -> /home/ubuntu/volume/PORTAL_LOADS/pbta_all/merged_fusion/pbta_all.fusions.txt
├── meta_CNA.txt
├── meta_clinical_patient.txt
├── meta_clinical_sample.txt
├── meta_clinical_timeline_clinical_event.txt
├── meta_clinical_timeline_imaging.txt
├── meta_clinical_timeline_specimen.txt
├── meta_clinical_timeline_surgery.txt
├── meta_clinical_timeline_treatment.txt
├── meta_cna.seg.txt
├── meta_linear_CNA.txt
├── meta_mutations_extended.txt
├── meta_rna_seq_v2_mrna.txt
├── meta_rna_seq_v2_mrna_median_Zscores.txt
├── meta_study.txt
└── meta_sv.txt
```
Note! Most other studies won't have a timeline set of files.
# Details
Use this section as a reference in case your overconfidence got the best of you

Expand Down
12 changes: 8 additions & 4 deletions REFS/aws_bucket_key_pairs.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
s3://cds-246-phs002517-p30-fy20 NCI-AR
s3://cds-246-phs002517-sequencefiles-p30-fy20 NCI-AR
s3://cds-306-phs002517-x01 NCI-X01
s3://d3b-cds-working-bucket d3b
s3://kf-strides-study-us-east-1-prd-sd-bhjxbdqk kf
s3://kf-study-us-east-1-prd-sd-8y99qzjj saml
s3://d3b-cds-working-bucket Mgmt-Console-Dev-chopd3bprod@684194535433
s3://d3b-study-us-east-1-prd-pbta-staging Mgmt-Console-Dev-chopd3bprod@684194535433
s3://d3b-study-us-east-1-prd-sd-8y99qzjj Mgmt-Console-Dev-chopd3bprod@684194535433
s3://d3b-study-us-east-1-prd-sd-bhjxbdqk Mgmt-Console-Dev-chopd3bprod@684194535433
s3://d3b-study-us-east-1-prd-sd-hkrzzhfw Mgmt-Console-Dev-chopd3bprod@684194535433
s3://d3b-study-us-east-1-prd-sd-m3dbxd12 Mgmt-Console-Dev-chopd3bprod@684194535433
s3://kf-strides-study-us-east-1-prd-sd-bhjxbdqk Mgmt-Console-Dev-D3bCenter@232196027141
s3://kf-study-us-east-1-prd-sd-8y99qzjj Mgmt-Console-Dev-D3b@538745987955
64 changes: 64 additions & 0 deletions STUDY_CONFIGS/pbta_all_case_meta_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,46 @@
"genetic_alteration_type": "CLINICAL",
"datatype": "SAMPLE_ATTRIBUTES"
}
},
"imaging": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_imaging.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
},
"clinical_event": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_clinical_event.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
},
"specimen": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_specimen.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
},
"surgery": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_surgery.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
},
"treatment": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_treatment.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE"
}
}
}
},
Expand Down Expand Up @@ -274,6 +314,30 @@
"table": "prod_cbio.pbta_all_data_clinical_patient",
"out_file": "data_clinical_patient.txt"
},
"imaging_timeline": {
"table": "prod_cbio.pbta_all_treatment_data_clinical_timeline_imaging",
"out_file": "data_clinical_timeline_imaging.txt"
},
"specimen_timeline": {
"table": "prod_cbio.pbta_all_timeline_specimen",
"out_file": "data_clinical_timeline_specimen.txt"

},
"clinical_event_timeline": {
"table": "prod_cbio.pbta_all_timeline_clinical_event",
"out_file": "data_clinical_timeline_clinical_event.txt"

},
"surgery_timeline": {
"table": "prod_cbio.pbta_all_timeline_surgery",
"out_file": "data_clinical_timeline_surgery.txt"

},
"treatment_timeline": {
"table": "prod_cbio.pbta_all_timeline_treatment",
"out_file": "data_clinical_timeline_treatment.txt"

},
"genomics_etl": {
"table": "prod_cbio.pbta_all_genomics_etl_file",
"out_file": "cbio_file_name_id.txt"
Expand Down
13 changes: 6 additions & 7 deletions STUDY_CONFIGS/pbta_all_treatment_meta_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -183,21 +183,24 @@
"cbio_name": "data_clinical_timeline_specimen.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
"datatype": "TIMELINE"
}
},
"surgery": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_surgery.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
"datatype": "TIMELINE"
}
},
"treatment": {
"_comment": "see https://docs.cbioportal.org/file-formats/#event-types for detailed specifics",
"cbio_name": "data_clinical_timeline_treatment.txt",
"meta_file_attr": {
"genetic_alteration_type": "CLINICAL",
"datatype": "TIMELINE" }
"datatype": "TIMELINE"
}
}
}
},
Expand Down Expand Up @@ -309,10 +312,6 @@
"table": "brownm28_dev_schema_cbio.pbta_all_timeline_treatment",
"out_file": "data_clinical_timeline_treatment.txt"

},
"genomics_etl": {
"table": "brownm28_dev_schema_cbio.pbta_all_treatment_genomics_etl_file",
"out_file": "cbio_file_name_id.txt"
}
}
}
12 changes: 11 additions & 1 deletion scripts/get_files_from_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import os
import pandas as pd
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import pdb

Expand Down Expand Up @@ -173,6 +172,7 @@ def mt_type_download(file_type):
)
# setting up a key dict that, for each aws key, has an associated sesion and manifest to download with
key_dict = {}
bucket_errs = 0
with open(args.aws_tbl) as kl:
for line in kl:
(bucket, key) = line.rstrip('\n').split('\t')
Expand All @@ -183,6 +183,16 @@ def mt_type_download(file_type):
key_dict[key]['dl_client'] = key_dict[key]['session'].client("s3", config=client_config)
else:
key_dict[key]['manifest'] = pd.concat([key_dict[key]['manifest'], selected[selected['s3_path'].str.startswith(bucket)]], ignore_index=True)
# Test bucket access with that key, if it fails, print error then kill to not waste time
parse_url = urllib3.util.parse_url(bucket)
try:
key_dict[key]['dl_client'].list_objects(Bucket=parse_url.host)
except Exception as e:
bucket_errs = 1
print(e, file=sys.stderr)
print("Bucket access ERROR: {}\t{}".format(bucket, key), file=sys.stderr)
if bucket_errs:
exit(1)
if args.sbg_profile is not None:
check = 1
config = sbg.Config(profile=args.sbg_profile)
Expand Down
3 changes: 2 additions & 1 deletion scripts/get_study_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ def get_manifests(db_cur, config_dict):
tbl_name = config_data['database_pulls'][key]['table']
(rows, colnames) = generic_pull(cur, tbl_name)
out_fn = config_data['database_pulls'][key]['out_file']
if key == 'gene_file':
# all data_clinical sheets go in this dir
if out_fn.startswith('data_clinical_'):
out_fn = datasheet_dir + "/" + out_fn
out_file = open(out_fn, 'w')
generic_print(out_file, rows, colnames)
Expand Down

0 comments on commit 5d94e25

Please sign in to comment.