Skip to content

Commit ff516d5

Browse files
authored
Merge pull request #13 from kids-first/feature/add-oligo-study
🎉 added oligo study config
2 parents 45c04c1 + 989b070 commit ff516d5

File tree

2 files changed

+178
-2
lines changed

2 files changed

+178
-2
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ Below assumes you have already created the necessary tables from dbt
88
1. Copy over the approriate aws account key and download files. Example using `pbta_all` study:
99

1010
```sh
11-
python3 ~/tools/kf-cbioportal-etl/scripts/dev/get_files_from_manifest.py -m genomics_file_manifest.txt -f RSEM_gene,annofuse_filtered_fusions_tsv,annotated_public_outputs,ctrlfreec_pval,ctrlfreec_info,ctrlfreec_bam_seg -p saml 2> pbta_dl.log &
12-
python3 ~/tools/kf-cbioportal-etl/scripts/dev/get_files_from_manifest.py -m dgd_genomics_file_manifest.txt -f DGD_MAF,DGD_FUSION -p d3b 2> dgd_dl.log &
11+
python3 ~/tools/kf-cbioportal-etl/scripts/get_files_from_manifest.py -m genomics_file_manifest.txt -f RSEM_gene,annofuse_filtered_fusions_tsv,annotated_public_outputs,ctrlfreec_pval,ctrlfreec_info,ctrlfreec_bam_seg -p saml 2> pbta_dl.log &
12+
python3 ~/tools/kf-cbioportal-etl/scripts/get_files_from_manifest.py -m dgd_genomics_file_manifest.txt -f DGD_MAF,DGD_FUSION -p d3b 2> dgd_dl.log &
1313
```
1414

1515
1. Copy and edit `REFS/data_processing_config.json` and `REFS/pbta_all_case_meta_config.json` as needed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
{
2+
"merged_mafs": {
3+
"dir": "merged_mafs",
4+
"dtypes": {
5+
"mutation": {
6+
"ext": "maf",
7+
"cbio_name": "data_mutations_extended.txt",
8+
"meta_file_attr": {
9+
"stable_id": "mutations",
10+
"profile_name": "Mutations",
11+
"profile_description": "Consensus calls from strelka2, mutect2, lancet, and VarDict Java. Two or more callers required to pass, < 0.001 frequency in gnomAD, and min read depth 8 in normal sample, unless in a TERT promoter region or in a hotspot region (see https://www.cancerhotspots.org)",
12+
"genetic_alteration_type": "MUTATION_EXTENDED",
13+
"datatype": "MAF",
14+
"variant_classification_filter": "Silent,Intron,3'UTR,3'Flank,5'UTR,IGR,RNA",
15+
"show_profile_in_analysis_tab": "true"
16+
}
17+
}
18+
}
19+
},
20+
"merged_cnvs": {
21+
"dir": "merged_cnvs",
22+
"dtypes": {
23+
"linear": {
24+
"ext": "predicted_cnv.txt",
25+
"cbio_name": "data_linear_CNA.txt",
26+
"meta_file_attr": {
27+
"stable_id": "linear_CNA",
28+
"profile_name": "copy-number values",
29+
"profile_description": "Predicted copy number values from WGS (Continuous). Copy number calls obtained using ControlFreeC, filtering calls smaller than 50KB",
30+
"genetic_alteration_type": "COPY_NUMBER_ALTERATION",
31+
"datatype": "CONTINUOUS",
32+
"show_profile_in_analysis_tab": "false"
33+
}
34+
},
35+
"discrete": {
36+
"ext": "discrete_cnvs.txt",
37+
"cbio_name": "data_CNA.txt",
38+
"meta_file_attr": {
39+
"stable_id": "cna",
40+
"profile_name": "Binned copy-number values",
41+
"profile_description": "Predicted copy number values from WGS (Discrete). Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification",
42+
"genetic_alteration_type": "COPY_NUMBER_ALTERATION",
43+
"datatype": "DISCRETE",
44+
"show_profile_in_analysis_tab": "true"
45+
}
46+
},
47+
"segment": {
48+
"ext": "merged_seg.txt",
49+
"cbio_name": "data_cna.seg.txt",
50+
"meta_file_attr": {
51+
"description": "Somatic CNA data (copy number ratio from tumor samples minus ratio from matched normals)",
52+
"genetic_alteration_type": "COPY_NUMBER_ALTERATION",
53+
"datatype": "SEG",
54+
"reference_genome_id": "hg38"
55+
}
56+
}
57+
}
58+
},
59+
"merged_rsem": {
60+
"dir": "merged_rsem",
61+
"dtypes": {
62+
"counts": {
63+
"ext": "rsem_merged.txt",
64+
"cbio_name": "data_rna_seq_v2_mrna.txt",
65+
"meta_file_attr": {
66+
"stable_id": "rna_seq_v2_mrna",
67+
"profile_name": "RNA expression",
68+
"profile_description": "Expression levels from RNA-Seq (rsem FPKM)",
69+
"genetic_alteration_type": "MRNA_EXPRESSION",
70+
"datatype": "CONTINUOUS",
71+
"show_profile_in_analysis_tab": "false"
72+
}
73+
},
74+
"zscore": {
75+
"ext": "rsem_merged_zscore.txt",
76+
"cbio_name": "data_rna_seq_v2_mrna_median_Zscores.txt",
77+
"meta_file_attr": {
78+
"stable_id": "rna_seq_v2_mrna_median_Zscores",
79+
"profile_name": "RNA expression z-scores",
80+
"profile_description": "Expression levels from RNA-Seq, Z scores of log2(FPKM + 1) values",
81+
"genetic_alteration_type": "MRNA_EXPRESSION",
82+
"datatype": "Z-SCORE",
83+
"show_profile_in_analysis_tab": "true"
84+
}
85+
}
86+
}
87+
},
88+
"merged_fusion": {
89+
"dir": "merged_fusion",
90+
"dtypes": {
91+
"fusion": {
92+
"ext": "fusions.txt",
93+
"cbio_name": "data_fusions.txt",
94+
"meta_file_attr": {
95+
"stable_id": "fusion",
96+
"profile_name": "Predicted RNA fusions",
97+
"profile_description": "Fusion data using arriba and STAR Fusion, annotated and filtered using annoFuse.",
98+
"genetic_alteration_type": "FUSION",
99+
"datatype": "FUSION",
100+
"show_profile_in_analysis_tab": "true"
101+
}
102+
}
103+
}
104+
},
105+
"data_sheets": {
106+
"dir": "datasheets",
107+
"dtypes": {
108+
"patient": {
109+
"cbio_name": "data_clinical_patient.txt",
110+
"meta_file_attr": {
111+
"genetic_alteration_type": "CLINICAL",
112+
"datatype": "PATIENT_ATTRIBUTES"
113+
}
114+
},
115+
"sample": {
116+
"cbio_name": "data_clinical_sample.txt",
117+
"meta_file_attr": {
118+
"genetic_alteration_type": "CLINICAL",
119+
"datatype": "SAMPLE_ATTRIBUTES"
120+
}
121+
}
122+
}
123+
},
124+
"study": {
125+
"_comment": "If a big study being split into many, make cancer_study_identifer blank, dx will be used",
126+
"description": "Oligodendroglioma (oligo) is a primary malignant brain cancer that develops from oligodendrocytes. Oligodendroglioma is one of the more common types of glioma (which also includes glioblastoma multiforme and astrocytoma). It is a rare tumor that strikes people of all ages; on average, 1,500 new cases are diagnosed per year in the United States. Rarely, families with multiple affected members have been reported, suggesting that a predisposition for oligodendroglioma can be inherited from blood relatives. Because oligodendroglioma is so rare, it has not been well-studied, due in part to a lack of available biospecimens and data for research use. In collaboration and partnerships with Oligo Nation, CBTN or The Children's Brain Tumor Network and University of Pennsylvania, data types within this project encompass matched tumor/normal whole genome data (WGS), RNA-seq, and longitudinal clinical data. For updates, please see here: <a href=\"https://tinyurl.com/55cxz9am\">Release Notes</a>",
127+
"groups": "PUBLIC",
128+
"cancer_study_identifier": "oligo_nation",
129+
"type_of_cancer": "odg",
130+
"short_name": "oligo_nation",
131+
"reference_genome": "hg38",
132+
"display_name": "Oligo Nation (CBTN, Provisional)"
133+
},
134+
"cases_3way_complete": {
135+
"stable_id": "3way_complete",
136+
"case_list_name": "Tumor samples with mutatation, CNA and mRNA data",
137+
"case_list_description": "All tumor samples with mutation, CNA, and mRNA data",
138+
"case_list_category": "all_cases_with_mutation_and_cna_and_mrna_data"
139+
},
140+
"cases_all": {
141+
"stable_id": "all",
142+
"case_list_name": "All Tumors",
143+
"case_list_description": "All tumor samples",
144+
"case_list_category": "all_cases_in_study"
145+
},
146+
"cases_cnaseq": {
147+
"stable_id": "cnaseq",
148+
"case_list_name": "Tumor samples with mutatation and CNA data",
149+
"case_list_description": "All tumor samples with mutation and CNA data",
150+
"case_list_category": "all_cases_with_mutation_and_cna_data"
151+
},
152+
"cases_cna": {
153+
"stable_id": "cna",
154+
"case_list_name": "Tumor Samples with CNA data",
155+
"case_list_description": "All tumors with CNA data",
156+
"case_list_category": "all_cases_with_cna_data"
157+
},
158+
"cases_RNA_Seq_v2_mRNA": {
159+
"stable_id": "rna_seq_v2_mrna",
160+
"case_list_name": "Tumor Samples with mRNA data (RNA Seq V2)",
161+
"case_list_description": "All samples with mRNA expression data",
162+
"case_list_category": "all_cases_with_mrna_rnaseq_data"
163+
},
164+
"cases_sequenced": {
165+
"stable_id": "sequenced",
166+
"case_list_name": "Tumor samples with mutations",
167+
"case_list_description": "All tumor samples with mutation data",
168+
"case_list_category": "all_cases_with_mutation_data"
169+
},
170+
"cases_sv": {
171+
"stable_id": "sv",
172+
"case_list_name": "Tumor samples with fusions",
173+
"case_list_description": "All tumor samples with fusion data",
174+
"case_list_category": "all_cases_with_sv_data"
175+
}
176+
}

0 commit comments

Comments
 (0)