Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 5 additions & 19 deletions pipeline/context_explorer/get_context_analysis.conseq
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
rule get_context_analysis:
inputs:
script=fileref("./get_context_analysis.py"),
artifacts=all {"type" ~ "subtype_tree|subtype_context_matrix|repurposing_matrix_taiga_id|repurposing_list_taiga_id|prism_oncology_reference_auc_matrix"},
artifacts=all {"type" ~ "subtype_tree|subtype_context_matrix|prism_oncology_reference_auc_matrix"},
# subtype_tree_taiga_id=all {"type":"subtype_tree"},
# context_matrix_taiga_id=all {"type":"subtype_context_matrix"},
gene_effect_taiga_id={"type":"raw-dep-matrix", "label": 'Chronos_Combined'},
gene_dependency_taiga_id={"type":"raw-dep-prob-matrix", "label": 'Chronos_Combined'},
# repurposing_matrix_taiga_id=all {"type": "repurposing_matrix_taiga_id"},
# repurposing_list_taiga_id=all {"type": "repurposing_list_taiga_id"},
portal_compounds={"type": "drug-metadata", "name": "merged-drugs"},
repurposing_matrix_taiga_id=all {"type": "drug_screen_auc_matrix", "label": "Repurposing_secondary_AUC"}
# oncref_auc_taiga_id=all {"type":"prism_oncology_reference_auc_matrix"},
compound_summaries=all {"type" ~ "compound-summary"},
# compound_summary_repurposing={"type": "compound-summary", "dataset": "Rep_all_single_pt"},
# compound_summary_oncref={"type": "compound-summary", "dataset": "Prism_oncology_AUC"},
tda_table={"type":"tda-table"},
outputs:
{"type": "context_analysis", "filename": { "$filename": "context_analysis.csv"} }
Expand All @@ -20,23 +17,13 @@ rule get_context_analysis:

artifacts = {{ inputs.artifacts }}

compound_tables = {{ inputs.compound_summaries }}

# oncref_table_path is optional, because it should not be there in the public env
oncref_table_path = [cmpd for cmpd in compound_tables if cmpd and 'dataset' in cmpd and cmpd['dataset'] == 'Prism_oncology_AUC']
oncref_table_path = oncref_table_path[0] if len(oncref_table_path) > 0 else None

# repurposing_table_path is required in all envs
repurposing_table_path = [cmpd for cmpd in compound_tables if cmpd and cmpd['dataset'] == 'Rep_all_single_pt']
assert len(repurposing_table_path) == 1, f"Expected exactly one Rep_all_single_pt compound table, got {len(repurposing_table_path)}"

# transformed will be our newly constructed dict of name -> artifact
transformed = {
# handle the ones that couldn't uniquely be identified by type specially
"repurposing_table_path": repurposing_table_path,
"oncref_table_path": [] if oncref_table_path is None else [oncref_table_path],
"gene_effect_taiga_id": [ {{ inputs.gene_effect_taiga_id }} ],
"gene_dependency_taiga_id": [ {{ inputs.gene_dependency_taiga_id }} ],
"portal_compounds_taiga_id": [ {{ inputs.portal_compounds }} ],
"repurposing_matrix_taiga_id": [ {{ inputs.repurposing_matrix_taiga_id }} ],
"tda_table": [ {{ inputs.tda_table }} ],
"script": {{ inputs.script }}
}
Expand All @@ -48,7 +35,6 @@ rule get_context_analysis:
('subtype_tree_taiga_id','subtype_tree'),
('context_matrix_taiga_id', 'subtype_context_matrix'),
('repurposing_matrix_taiga_id', 'repurposing_matrix_taiga_id'),
('repurposing_list_taiga_id', 'repurposing_list_taiga_id'),
('oncref_auc_taiga_id', 'prism_oncology_reference_auc_matrix')]:
artifact = by_type.get(type_name)
transformed[dest_name] = [ artifact ] if artifact is not None else []
Expand Down
45 changes: 19 additions & 26 deletions pipeline/context_explorer/get_context_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import warnings
import argparse
import json

from scripts.calculate_bimodality_coefficient import (
bimodality_coefficient_for_cpd_viabilities,
)
from taigapy import create_taiga_client_v3

MIN_GROUP_SIZE = 5
Expand Down Expand Up @@ -48,8 +50,8 @@ def load_crispr_data(
return gene_effect, gene_dependency


def load_prism_data(tc, repurposing_matrix_taiga_id, repurposing_list_taiga_id):
Extended_Primary_Compound_List = tc.get(repurposing_list_taiga_id)
def load_prism_data(tc, repurposing_matrix_taiga_id, portal_compounds_taiga_id):
Portal_Compounds = tc.get(portal_compounds_taiga_id)
Extended_Primary_Data_Matrix = tc.get(repurposing_matrix_taiga_id).T
Data_Matrix_Discrete = Extended_Primary_Data_Matrix < np.log2(0.3)
Data_Matrix_Discrete = Data_Matrix_Discrete.mask(
Expand Down Expand Up @@ -146,11 +148,9 @@ def load_all_data(
gene_effect_taiga_id,
gene_dependency_taiga_id,
repurposing_matrix_taiga_id,
repurposing_list_taiga_id,
oncref_auc_taiga_id,
repurposing_table_path,
oncref_table_path,
tda_table_path,
portal_compounds_taiga_id,
):

all_data_dict = dict()
Expand Down Expand Up @@ -182,7 +182,7 @@ def load_all_data(
rep_sensitivity = load_prism_data(
tc=tc,
repurposing_matrix_taiga_id=repurposing_matrix_taiga_id,
repurposing_list_taiga_id=repurposing_list_taiga_id,
portal_compounds_taiga_id=portal_compounds_taiga_id,
)
datasets_to_test["PRISMRepurposing"] = rep_sensitivity

Expand Down Expand Up @@ -469,35 +469,28 @@ def compute_context_explorer_results(inputs, out_filename):
repurposing_matrix_taiga_id = get_id_or_file_name(
taiga_ids_or_file_name["repurposing_matrix_taiga_id"]
)
repurposing_list_taiga_id = get_id_or_file_name(
taiga_ids_or_file_name["repurposing_list_taiga_id"]
)
oncref_auc_taiga_id = get_id_or_file_name(
taiga_ids_or_file_name["oncref_auc_taiga_id"]
)

repurposing_table_path = get_id_or_file_name(
taiga_ids_or_file_name["repurposing_table_path"], id_key="filename"
)
oncref_table_path = get_id_or_file_name(
taiga_ids_or_file_name["oncref_table_path"], id_key="filename"
)
tda_table_path = get_id_or_file_name(
taiga_ids_or_file_name["tda_table"], id_key="filename"
)

portal_compounds_taiga_id = get_id_or_file_name(
taiga_ids_or_file_name["portal_compounds_taiga_id"]
)

### ---- LOAD DATA ---- ###
data_dict = load_all_data(
subtype_tree_taiga_id,
context_matrix_taiga_id,
gene_effect_taiga_id,
gene_dependency_taiga_id,
repurposing_matrix_taiga_id,
repurposing_list_taiga_id,
oncref_auc_taiga_id,
repurposing_table_path,
oncref_table_path,
tda_table_path,
subtype_tree_taiga_id=subtype_tree_taiga_id,
context_matrix_taiga_id=context_matrix_taiga_id,
gene_effect_taiga_id=gene_effect_taiga_id,
gene_dependency_taiga_id=gene_dependency_taiga_id,
repurposing_matrix_taiga_id=repurposing_matrix_taiga_id,
oncref_auc_taiga_id=oncref_auc_taiga_id,
tda_table_path=tda_table_path,
portal_compounds_taiga_id=portal_compounds_taiga_id,
)

context_explorer_results = compute_in_out_groups(**data_dict)
Expand Down
20 changes: 20 additions & 0 deletions pipeline/scripts/calculate_bimodality_coefficient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np


def bimodality_coefficient_for_cpd_viabilities(cpd_viabilities: pd.Series) -> pd.Series:
x = cpd_viabilities.dropna()
num_viabilities = len(x)
if num_viabilities > 20:
s1 = np.mean(x)
s2 = np.var(x)
x_ = np.divide(np.subtract(x, s1), np.sqrt(s2))
s3 = np.mean(np.power(x_, 3))
s4 = np.mean(np.power(x_, 4))
n = (1 - np.isnan(x)).sum()
bimodality_coefficient = (np.power(s3, 2) + 1) / (
s4 - 3 + 3 * np.power(n - 1, 2) / (np.multiply(n - 2, n - 3))
)
else:
bimodality_coefficient = None

return bimodality_coefficient
21 changes: 2 additions & 19 deletions pipeline/scripts/compound_summary_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import sys

from calculate_bimodality_coefficient import bimodality_coefficient_for_cpd_viabilities

sys.path.append(".")
from hdf5_utils import read_hdf5

Expand Down Expand Up @@ -136,25 +138,6 @@ def get_dose_description(df):
merged_df.to_csv(args.output_filename, index=False, na_rep="NA")


def bimodality_coefficient_for_cpd_viabilities(cpd_viabilities: pd.Series) -> pd.Series:
x = cpd_viabilities.dropna()
num_viabilities = len(x)
if num_viabilities > 20:
s1 = np.mean(x)
s2 = np.var(x)
x_ = np.divide(np.subtract(x, s1), np.sqrt(s2))
s3 = np.mean(np.power(x_, 3))
s4 = np.mean(np.power(x_, 4))
n = (1 - np.isnan(x)).sum()
bimodality_coefficient = (np.power(s3, 2) + 1) / (
s4 - 3 + 3 * np.power(n - 1, 2) / (np.multiply(n - 2, n - 3))
)
else:
bimodality_coefficient = None

return bimodality_coefficient


def get_sensitive_cell_lines_count(
dataset_viabilities_df: pd.DataFrame, units: str
) -> pd.Series:
Expand Down
6 changes: 2 additions & 4 deletions pipeline/xrefs-common.conseq
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ add-if-missing {

####### Repurposing secondary screen

### NOTE if this changes it will also affect Context Explorer!
add-if-missing {
"type": "drug_screen_auc_matrix",
"dataset_id": "processed-repurposing-secondary-e3aa.2/REPURPOSINGAUCMatrix",
Expand Down Expand Up @@ -307,7 +308,4 @@ add-if-missing {
"dataset_id": "processed-gdsc-ee73.2/GDSC2ResponseCurves",
"conditions_dataset_id": "processed-gdsc-ee73.2/GDSC2Log2ViabilityCollapsedConditions",
"sample_id_prefix": "GDSC2"
}



}