Skip to content

Commit 5c5f1cc

Browse files
committed
WIP pipeline changes
1 parent 07340e7 commit 5c5f1cc

File tree

5 files changed

+48
-68
lines changed

5 files changed

+48
-68
lines changed

pipeline/context_explorer/get_context_analysis.conseq

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
rule get_context_analysis:
22
inputs:
33
script=fileref("./get_context_analysis.py"),
4-
artifacts=all {"type" ~ "subtype_tree|subtype_context_matrix|repurposing_matrix_taiga_id|repurposing_list_taiga_id|prism_oncology_reference_auc_matrix"},
4+
artifacts=all {"type" ~ "subtype_tree|subtype_context_matrix|prism_oncology_reference_auc_matrix"},
55
# subtype_tree_taiga_id=all {"type":"subtype_tree"},
66
# context_matrix_taiga_id=all {"type":"subtype_context_matrix"},
77
gene_effect_taiga_id={"type":"raw-dep-matrix", "label": 'Chronos_Combined'},
88
gene_dependency_taiga_id={"type":"raw-dep-prob-matrix", "label": 'Chronos_Combined'},
9-
# repurposing_matrix_taiga_id=all {"type": "repurposing_matrix_taiga_id"},
10-
# repurposing_list_taiga_id=all {"type": "repurposing_list_taiga_id"},
9+
portal_compounds={"type": "drug-metadata", "name": "merged-drugs"},
10+
repurposing_matrix_taiga_id=all {"type": "drug_screen_auc_matrix", "label": "Repurposing_secondary_AUC"}
1111
# oncref_auc_taiga_id=all {"type":"prism_oncology_reference_auc_matrix"},
12-
compound_summaries=all {"type" ~ "compound-summary"},
13-
# compound_summary_repurposing={"type": "compound-summary", "dataset": "Rep_all_single_pt"},
14-
# compound_summary_oncref={"type": "compound-summary", "dataset": "Prism_oncology_AUC"},
1512
tda_table={"type":"tda-table"},
1613
outputs:
1714
{"type": "context_analysis", "filename": { "$filename": "context_analysis.csv"} }
@@ -20,23 +17,13 @@ rule get_context_analysis:
2017

2118
artifacts = {{ inputs.artifacts }}
2219

23-
compound_tables = {{ inputs.compound_summaries }}
24-
25-
# oncref_table_path is optional, because it should not be there in the public env
26-
oncref_table_path = [cmpd for cmpd in compound_tables if cmpd and 'dataset' in cmpd and cmpd['dataset'] == 'Prism_oncology_AUC']
27-
oncref_table_path = oncref_table_path[0] if len(oncref_table_path) > 0 else None
28-
29-
# repurposing_table_path is required in all envs
30-
repurposing_table_path = [cmpd for cmpd in compound_tables if cmpd and cmpd['dataset'] == 'Rep_all_single_pt']
31-
assert len(repurposing_table_path) == 1, f"Expected exactly one Rep_all_single_pt compound table, got {len(repurposing_table_path)}"
32-
3320
# transformed will be our newly constructed dict of name -> artifact
3421
transformed = {
3522
# handle the ones that couldn't uniquely be identified by type specially
36-
"repurposing_table_path": repurposing_table_path,
37-
"oncref_table_path": [] if oncref_table_path is None else [oncref_table_path],
3823
"gene_effect_taiga_id": [ {{ inputs.gene_effect_taiga_id }} ],
3924
"gene_dependency_taiga_id": [ {{ inputs.gene_dependency_taiga_id }} ],
25+
"portal_compounds_taiga_id": [ {{ inputs.portal_compounds }} ],
26+
"repurposing_matrix_taiga_id": [ {{ inputs.repurposing_matrix_taiga_id }} ],
4027
"tda_table": [ {{ inputs.tda_table }} ],
4128
"script": {{ inputs.script }}
4229
}
@@ -48,7 +35,6 @@ rule get_context_analysis:
4835
('subtype_tree_taiga_id','subtype_tree'),
4936
('context_matrix_taiga_id', 'subtype_context_matrix'),
5037
('repurposing_matrix_taiga_id', 'repurposing_matrix_taiga_id'),
51-
('repurposing_list_taiga_id', 'repurposing_list_taiga_id'),
5238
('oncref_auc_taiga_id', 'prism_oncology_reference_auc_matrix')]:
5339
artifact = by_type.get(type_name)
5440
transformed[dest_name] = [ artifact ] if artifact is not None else []

pipeline/context_explorer/get_context_analysis.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import warnings
66
import argparse
77
import json
8-
8+
from scripts.calculate_bimodality_coefficient import (
9+
bimodality_coefficient_for_cpd_viabilities,
10+
)
911
from taigapy import create_taiga_client_v3
1012

1113
MIN_GROUP_SIZE = 5
@@ -48,8 +50,8 @@ def load_crispr_data(
4850
return gene_effect, gene_dependency
4951

5052

51-
def load_prism_data(tc, repurposing_matrix_taiga_id, repurposing_list_taiga_id):
52-
Extended_Primary_Compound_List = tc.get(repurposing_list_taiga_id)
53+
def load_prism_data(tc, repurposing_matrix_taiga_id, portal_compounds_taiga_id):
54+
Portal_Compounds = tc.get(portal_compounds_taiga_id)
5355
Extended_Primary_Data_Matrix = tc.get(repurposing_matrix_taiga_id).T
5456
Data_Matrix_Discrete = Extended_Primary_Data_Matrix < np.log2(0.3)
5557
Data_Matrix_Discrete = Data_Matrix_Discrete.mask(
@@ -146,11 +148,9 @@ def load_all_data(
146148
gene_effect_taiga_id,
147149
gene_dependency_taiga_id,
148150
repurposing_matrix_taiga_id,
149-
repurposing_list_taiga_id,
150151
oncref_auc_taiga_id,
151-
repurposing_table_path,
152-
oncref_table_path,
153152
tda_table_path,
153+
portal_compounds_taiga_id,
154154
):
155155

156156
all_data_dict = dict()
@@ -182,7 +182,7 @@ def load_all_data(
182182
rep_sensitivity = load_prism_data(
183183
tc=tc,
184184
repurposing_matrix_taiga_id=repurposing_matrix_taiga_id,
185-
repurposing_list_taiga_id=repurposing_list_taiga_id,
185+
portal_compounds_taiga_id=portal_compounds_taiga_id,
186186
)
187187
datasets_to_test["PRISMRepurposing"] = rep_sensitivity
188188

@@ -469,35 +469,28 @@ def compute_context_explorer_results(inputs, out_filename):
469469
repurposing_matrix_taiga_id = get_id_or_file_name(
470470
taiga_ids_or_file_name["repurposing_matrix_taiga_id"]
471471
)
472-
repurposing_list_taiga_id = get_id_or_file_name(
473-
taiga_ids_or_file_name["repurposing_list_taiga_id"]
474-
)
475472
oncref_auc_taiga_id = get_id_or_file_name(
476473
taiga_ids_or_file_name["oncref_auc_taiga_id"]
477474
)
478475

479-
repurposing_table_path = get_id_or_file_name(
480-
taiga_ids_or_file_name["repurposing_table_path"], id_key="filename"
481-
)
482-
oncref_table_path = get_id_or_file_name(
483-
taiga_ids_or_file_name["oncref_table_path"], id_key="filename"
484-
)
485476
tda_table_path = get_id_or_file_name(
486477
taiga_ids_or_file_name["tda_table"], id_key="filename"
487478
)
488479

480+
portal_compounds_taiga_id = get_id_or_file_name(
481+
taiga_ids_or_file_name["portal_compounds_taiga_id"]
482+
)
483+
489484
### ---- LOAD DATA ---- ###
490485
data_dict = load_all_data(
491-
subtype_tree_taiga_id,
492-
context_matrix_taiga_id,
493-
gene_effect_taiga_id,
494-
gene_dependency_taiga_id,
495-
repurposing_matrix_taiga_id,
496-
repurposing_list_taiga_id,
497-
oncref_auc_taiga_id,
498-
repurposing_table_path,
499-
oncref_table_path,
500-
tda_table_path,
486+
subtype_tree_taiga_id=subtype_tree_taiga_id,
487+
context_matrix_taiga_id=context_matrix_taiga_id,
488+
gene_effect_taiga_id=gene_effect_taiga_id,
489+
gene_dependency_taiga_id=gene_dependency_taiga_id,
490+
repurposing_matrix_taiga_id=repurposing_matrix_taiga_id,
491+
oncref_auc_taiga_id=oncref_auc_taiga_id,
492+
tda_table_path=tda_table_path,
493+
portal_compounds_taiga_id=portal_compounds_taiga_id,
501494
)
502495

503496
context_explorer_results = compute_in_out_groups(**data_dict)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
3+
4+
def bimodality_coefficient_for_cpd_viabilities(cpd_viabilities: pd.Series) -> pd.Series:
5+
x = cpd_viabilities.dropna()
6+
num_viabilities = len(x)
7+
if num_viabilities > 20:
8+
s1 = np.mean(x)
9+
s2 = np.var(x)
10+
x_ = np.divide(np.subtract(x, s1), np.sqrt(s2))
11+
s3 = np.mean(np.power(x_, 3))
12+
s4 = np.mean(np.power(x_, 4))
13+
n = (1 - np.isnan(x)).sum()
14+
bimodality_coefficient = (np.power(s3, 2) + 1) / (
15+
s4 - 3 + 3 * np.power(n - 1, 2) / (np.multiply(n - 2, n - 3))
16+
)
17+
else:
18+
bimodality_coefficient = None
19+
20+
return bimodality_coefficient

pipeline/scripts/compound_summary_merge.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import sys
66

7+
from calculate_bimodality_coefficient import bimodality_coefficient_for_cpd_viabilities
8+
79
sys.path.append(".")
810
from hdf5_utils import read_hdf5
911

@@ -136,25 +138,6 @@ def get_dose_description(df):
136138
merged_df.to_csv(args.output_filename, index=False, na_rep="NA")
137139

138140

139-
def bimodality_coefficient_for_cpd_viabilities(cpd_viabilities: pd.Series) -> pd.Series:
140-
x = cpd_viabilities.dropna()
141-
num_viabilities = len(x)
142-
if num_viabilities > 20:
143-
s1 = np.mean(x)
144-
s2 = np.var(x)
145-
x_ = np.divide(np.subtract(x, s1), np.sqrt(s2))
146-
s3 = np.mean(np.power(x_, 3))
147-
s4 = np.mean(np.power(x_, 4))
148-
n = (1 - np.isnan(x)).sum()
149-
bimodality_coefficient = (np.power(s3, 2) + 1) / (
150-
s4 - 3 + 3 * np.power(n - 1, 2) / (np.multiply(n - 2, n - 3))
151-
)
152-
else:
153-
bimodality_coefficient = None
154-
155-
return bimodality_coefficient
156-
157-
158141
def get_sensitive_cell_lines_count(
159142
dataset_viabilities_df: pd.DataFrame, units: str
160143
) -> pd.Series:

pipeline/xrefs-common.conseq

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ add-if-missing {
197197

198198
####### Repurposing secondary screen
199199

200+
### NOTE if this changes it will also affect Context Explorer!
200201
add-if-missing {
201202
"type": "drug_screen_auc_matrix",
202203
"dataset_id": "processed-repurposing-secondary-e3aa.2/REPURPOSINGAUCMatrix",
@@ -307,7 +308,4 @@ add-if-missing {
307308
"dataset_id": "processed-gdsc-ee73.2/GDSC2ResponseCurves",
308309
"conditions_dataset_id": "processed-gdsc-ee73.2/GDSC2Log2ViabilityCollapsedConditions",
309310
"sample_id_prefix": "GDSC2"
310-
}
311-
312-
313-
311+
}

0 commit comments

Comments
 (0)