Skip to content

Commit ce6a220

Browse files
committed
CDRP chemical and phenotypic space notebook - SF4
1 parent 17662e9 commit ce6a220

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"import matplotlib.pyplot as plt\n",
11+
"import pandas as pd\n",
12+
"import seaborn as sb\n",
13+
"import umap"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"main = pd.read_csv('data/treatment_level_aux_combined.csv.gz')\n",
23+
"columns = [str(i) for i in range(672)]"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"fingerprints = np.load('data/fingerprints_cdrp.npz')['features']\n",
33+
"cdrp_smiles_scaffolds = pd.read_csv('data/cdrp_smiles_scaffolds.csv')\n",
34+
"Y = pd.read_csv(\"data/CDRP_MOA_MATCHES_official.csv\")"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"#get new UMAP embeddings\n",
44+
"reducer = umap.UMAP()\n",
45+
"embeddings = reducer.fit_transform(fingerprints)\n",
46+
"print(fingerprints.shape, embeddings.shape)\n",
47+
"aux = pd.concat((pd.DataFrame(embeddings, columns=[\"UMAP 1\", \"UMAP 2\"]), cdrp_smiles_scaffolds.reset_index()), axis=1)\n",
48+
"#aux\n",
49+
"aux = pd.merge(aux, Y, left_on = 'Metadata_BROAD_ID', right_on = 'Var1', how = 'left')\n",
50+
"\n",
51+
"#to read aux used in publication uncomment next line\n",
52+
"#aux = pd.read_csv('data/chemical_aux_umap.csv')\n",
53+
"\n",
54+
"#UMAP embeddings that were used for the supplementary figure are already in this repository\n",
55+
"#aux.to_csv('data/chemical_aux_umap.csv', index = False)\n",
56+
"\n",
57+
"sb.scatterplot(data=aux, x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"lightpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": null,
63+
"metadata": {},
64+
"outputs": [],
65+
"source": [
66+
"moas = []\n",
67+
"for k,r in Y.iterrows():\n",
68+
" for i in r[\"Metadata_moa.x\"].split(\"|\"):\n",
69+
" moas.append(i)\n",
70+
"\n",
71+
"moas = pd.DataFrame({'MoA': moas })"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"fig = plt.figure(figsize=(10,10))\n",
81+
"a = \"lipoxygenase inhibitor\"\n",
82+
"g = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
83+
"h = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
84+
"\n",
85+
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
86+
"x_lims = (None, None)\n",
87+
"y_lims = (None, None)\n",
88+
"plt.show()"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"selected_moas = ['adenosine receptor agonist', 'adrenergic receptor antagonist', 'dopamine receptor agonist', 'egfr inhibitor', \n",
98+
" 'estrogen receptor agonist', 'glucocorticoid receptor agonist', \"tyrosine kinase inhibitor\",\n",
99+
" 'opioid receptor antagonist', \"bacterial dna gyrase inhibitor\", \"hmgcr inhibitor\"]"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": null,
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"fig = plt.figure(figsize=(10,10))\n",
109+
"h = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
110+
"\n",
111+
"u = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
112+
"v = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
113+
"w = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
114+
"x = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
115+
"y = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
116+
"z = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
117+
"\n",
118+
"k = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
119+
"l = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
120+
"m = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
121+
"n = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
122+
"\n",
123+
"\n",
124+
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
125+
"x_lims = (None, None)\n",
126+
"y_lims = (None, None)\n",
127+
"plt.show()"
128+
]
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": null,
133+
"metadata": {},
134+
"outputs": [],
135+
"source": [
136+
"fig.savefig(\"chemical_space_moa.png\") \n",
137+
"fig.savefig(\"chemical_space_moa.svg\") "
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"fig = plt.figure(figsize=(10,10))\n",
147+
"h = sb.scatterplot(data=main[~main['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"X\", y=\"Y\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
148+
"\n",
149+
"u = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"X\", y=\"Y\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
150+
"v = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"X\", y=\"Y\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
151+
"w = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"X\", y=\"Y\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
152+
"x = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"X\", y=\"Y\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
153+
"y = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"X\", y=\"Y\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
154+
"z = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"X\", y=\"Y\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
155+
"\n",
156+
"k = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"X\", y=\"Y\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
157+
"l = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"X\", y=\"Y\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
158+
"m = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"X\", y=\"Y\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
159+
"n = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"X\", y=\"Y\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
160+
"\n",
161+
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
162+
"x_lims = (None, None)\n",
163+
"y_lims = (None, None)\n",
164+
"plt.show()"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": [
173+
"fig.savefig(\"phenotypic_space_moa.png\") \n",
174+
"fig.savefig(\"phenotypic_space_moa.svg\") "
175+
]
176+
}
177+
],
178+
"metadata": {
179+
"kernelspec": {
180+
"display_name": "Python 3 (ipykernel)",
181+
"language": "python",
182+
"name": "python3"
183+
},
184+
"language_info": {
185+
"codemirror_mode": {
186+
"name": "ipython",
187+
"version": 3
188+
},
189+
"file_extension": ".py",
190+
"mimetype": "text/x-python",
191+
"name": "python",
192+
"nbconvert_exporter": "python",
193+
"pygments_lexer": "ipython3",
194+
"version": "3.9.13"
195+
}
196+
},
197+
"nbformat": 4,
198+
"nbformat_minor": 4
199+
}

0 commit comments

Comments
 (0)