|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": null, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "import numpy as np\n", |
| 10 | + "import matplotlib.pyplot as plt\n", |
| 11 | + "import pandas as pd\n", |
| 12 | + "import seaborn as sb\n", |
| 13 | + "import umap" |
| 14 | + ] |
| 15 | + }, |
| 16 | + { |
| 17 | + "cell_type": "code", |
| 18 | + "execution_count": null, |
| 19 | + "metadata": {}, |
| 20 | + "outputs": [], |
| 21 | + "source": [ |
| 22 | + "main = pd.read_csv('data/treatment_level_aux_combined.csv.gz')\n", |
| 23 | + "columns = [str(i) for i in range(672)]" |
| 24 | + ] |
| 25 | + }, |
| 26 | + { |
| 27 | + "cell_type": "code", |
| 28 | + "execution_count": null, |
| 29 | + "metadata": {}, |
| 30 | + "outputs": [], |
| 31 | + "source": [ |
| 32 | + "fingerprints = np.load('data/fingerprints_cdrp.npz')['features']\n", |
| 33 | + "cdrp_smiles_scaffolds = pd.read_csv('data/cdrp_smiles_scaffolds.csv')\n", |
| 34 | + "Y = pd.read_csv(\"data/CDRP_MOA_MATCHES_official.csv\")" |
| 35 | + ] |
| 36 | + }, |
| 37 | + { |
| 38 | + "cell_type": "code", |
| 39 | + "execution_count": null, |
| 40 | + "metadata": {}, |
| 41 | + "outputs": [], |
| 42 | + "source": [ |
| 43 | + "#get new UMAP embeddings\n", |
| 44 | + "reducer = umap.UMAP()\n", |
| 45 | + "embeddings = reducer.fit_transform(fingerprints)\n", |
| 46 | + "print(fingerprints.shape, embeddings.shape)\n", |
| 47 | + "aux = pd.concat((pd.DataFrame(embeddings, columns=[\"UMAP 1\", \"UMAP 2\"]), cdrp_smiles_scaffolds.reset_index()), axis=1)\n", |
| 48 | + "#aux\n", |
| 49 | + "aux = pd.merge(aux, Y, left_on = 'Metadata_BROAD_ID', right_on = 'Var1', how = 'left')\n", |
| 50 | + "\n", |
| 51 | + "#to read aux used in publication uncomment next line\n", |
| 52 | + "#aux = pd.read_csv('data/chemical_aux_umap.csv')\n", |
| 53 | + "\n", |
| 54 | + "#UMAP embeddings that were used for the supplementary figure are already in this repository\n", |
| 55 | + "#aux.to_csv('data/chemical_aux_umap.csv', index = False)\n", |
| 56 | + "\n", |
| 57 | + "sb.scatterplot(data=aux, x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"lightpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)" |
| 58 | + ] |
| 59 | + }, |
| 60 | + { |
| 61 | + "cell_type": "code", |
| 62 | + "execution_count": null, |
| 63 | + "metadata": {}, |
| 64 | + "outputs": [], |
| 65 | + "source": [ |
| 66 | + "moas = []\n", |
| 67 | + "for k,r in Y.iterrows():\n", |
| 68 | + " for i in r[\"Metadata_moa.x\"].split(\"|\"):\n", |
| 69 | + " moas.append(i)\n", |
| 70 | + "\n", |
| 71 | + "moas = pd.DataFrame({'MoA': moas })" |
| 72 | + ] |
| 73 | + }, |
| 74 | + { |
| 75 | + "cell_type": "code", |
| 76 | + "execution_count": null, |
| 77 | + "metadata": {}, |
| 78 | + "outputs": [], |
| 79 | + "source": [ |
| 80 | + "fig = plt.figure(figsize=(10,10))\n", |
| 81 | + "a = \"lipoxygenase inhibitor\"\n", |
| 82 | + "g = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 83 | + "h = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 84 | + "\n", |
| 85 | + "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", |
| 86 | + "x_lims = (None, None)\n", |
| 87 | + "y_lims = (None, None)\n", |
| 88 | + "plt.show()" |
| 89 | + ] |
| 90 | + }, |
| 91 | + { |
| 92 | + "cell_type": "code", |
| 93 | + "execution_count": null, |
| 94 | + "metadata": {}, |
| 95 | + "outputs": [], |
| 96 | + "source": [ |
| 97 | + "selected_moas = ['adenosine receptor agonist', 'adrenergic receptor antagonist', 'dopamine receptor agonist', 'egfr inhibitor', \n", |
| 98 | + " 'estrogen receptor agonist', 'glucocorticoid receptor agonist', \"tyrosine kinase inhibitor\",\n", |
| 99 | + " 'opioid receptor antagonist', \"bacterial dna gyrase inhibitor\", \"hmgcr inhibitor\"]" |
| 100 | + ] |
| 101 | + }, |
| 102 | + { |
| 103 | + "cell_type": "code", |
| 104 | + "execution_count": null, |
| 105 | + "metadata": {}, |
| 106 | + "outputs": [], |
| 107 | + "source": [ |
| 108 | + "fig = plt.figure(figsize=(10,10))\n", |
| 109 | + "h = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 110 | + "\n", |
| 111 | + "u = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 112 | + "v = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 113 | + "w = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 114 | + "x = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 115 | + "y = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 116 | + "z = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 117 | + "\n", |
| 118 | + "k = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 119 | + "l = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 120 | + "m = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 121 | + "n = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 122 | + "\n", |
| 123 | + "\n", |
| 124 | + "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", |
| 125 | + "x_lims = (None, None)\n", |
| 126 | + "y_lims = (None, None)\n", |
| 127 | + "plt.show()" |
| 128 | + ] |
| 129 | + }, |
| 130 | + { |
| 131 | + "cell_type": "code", |
| 132 | + "execution_count": null, |
| 133 | + "metadata": {}, |
| 134 | + "outputs": [], |
| 135 | + "source": [ |
| 136 | + "fig.savefig(\"chemical_space_moa.png\") \n", |
| 137 | + "fig.savefig(\"chemical_space_moa.svg\") " |
| 138 | + ] |
| 139 | + }, |
| 140 | + { |
| 141 | + "cell_type": "code", |
| 142 | + "execution_count": null, |
| 143 | + "metadata": {}, |
| 144 | + "outputs": [], |
| 145 | + "source": [ |
| 146 | + "fig = plt.figure(figsize=(10,10))\n", |
| 147 | + "h = sb.scatterplot(data=main[~main['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"X\", y=\"Y\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 148 | + "\n", |
| 149 | + "u = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"X\", y=\"Y\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 150 | + "v = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"X\", y=\"Y\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 151 | + "w = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"X\", y=\"Y\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 152 | + "x = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"X\", y=\"Y\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 153 | + "y = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"X\", y=\"Y\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 154 | + "z = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"X\", y=\"Y\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 155 | + "\n", |
| 156 | + "k = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"X\", y=\"Y\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 157 | + "l = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"X\", y=\"Y\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 158 | + "m = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"X\", y=\"Y\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 159 | + "n = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"X\", y=\"Y\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n", |
| 160 | + "\n", |
| 161 | + "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", |
| 162 | + "x_lims = (None, None)\n", |
| 163 | + "y_lims = (None, None)\n", |
| 164 | + "plt.show()" |
| 165 | + ] |
| 166 | + }, |
| 167 | + { |
| 168 | + "cell_type": "code", |
| 169 | + "execution_count": null, |
| 170 | + "metadata": {}, |
| 171 | + "outputs": [], |
| 172 | + "source": [ |
| 173 | + "fig.savefig(\"phenotypic_space_moa.png\") \n", |
| 174 | + "fig.savefig(\"phenotypic_space_moa.svg\") " |
| 175 | + ] |
| 176 | + } |
| 177 | + ], |
| 178 | + "metadata": { |
| 179 | + "kernelspec": { |
| 180 | + "display_name": "Python 3 (ipykernel)", |
| 181 | + "language": "python", |
| 182 | + "name": "python3" |
| 183 | + }, |
| 184 | + "language_info": { |
| 185 | + "codemirror_mode": { |
| 186 | + "name": "ipython", |
| 187 | + "version": 3 |
| 188 | + }, |
| 189 | + "file_extension": ".py", |
| 190 | + "mimetype": "text/x-python", |
| 191 | + "name": "python", |
| 192 | + "nbconvert_exporter": "python", |
| 193 | + "pygments_lexer": "ipython3", |
| 194 | + "version": "3.9.13" |
| 195 | + } |
| 196 | + }, |
| 197 | + "nbformat": 4, |
| 198 | + "nbformat_minor": 4 |
| 199 | +} |
0 commit comments