62442katieb
diff --git a/‎BMA_ExportClusterExps_MNI.m ‎erroneous-matlab-kmeans/BMA_ExportClusterExps_MNI.m b/‎BMA_ExportClusterExps_MNI.m ‎erroneous-matlab-kmeans/BMA_ExportClusterExps_MNI.m
diff --git a/‎BMA_ExportClusterExps_Tal.m ‎erroneous-matlab-kmeans/BMA_ExportClusterExps_Tal.m b/‎BMA_ExportClusterExps_Tal.m ‎erroneous-matlab-kmeans/BMA_ExportClusterExps_Tal.m
diff --git a/‎BMA_ModeledActivationImgs_MNI.m ‎erroneous-matlab-kmeans/BMA_ModeledActivationImgs_MNI.m b/‎BMA_ModeledActivationImgs_MNI.m ‎erroneous-matlab-kmeans/BMA_ModeledActivationImgs_MNI.m
diff --git a/‎BMA_ModeledActivationImgs_Tal.m ‎erroneous-matlab-kmeans/BMA_ModeledActivationImgs_Tal.m b/‎BMA_ModeledActivationImgs_Tal.m ‎erroneous-matlab-kmeans/BMA_ModeledActivationImgs_Tal.m
diff --git a/‎BMA_ReadGingerALEText.m ‎erroneous-matlab-kmeans/BMA_ReadGingerALEText.m b/‎BMA_ReadGingerALEText.m ‎erroneous-matlab-kmeans/BMA_ReadGingerALEText.m
diff --git a/‎BMA_corr_MAmaps.m ‎erroneous-matlab-kmeans/BMA_corr_MAmaps.m b/‎BMA_corr_MAmaps.m ‎erroneous-matlab-kmeans/BMA_corr_MAmaps.m
diff --git a/‎BMA_kmeans_MNI.m ‎erroneous-matlab-kmeans/BMA_kmeans_MNI.m b/‎BMA_kmeans_MNI.m ‎erroneous-matlab-kmeans/BMA_kmeans_MNI.m
diff --git a/‎BMA_kmeans_Tal.m ‎erroneous-matlab-kmeans/BMA_kmeans_Tal.m b/‎BMA_kmeans_Tal.m ‎erroneous-matlab-kmeans/BMA_kmeans_Tal.m
diff --git a/‎BrainMapMetaData_NormalActs.mat ‎erroneous-matlab-kmeans/BrainMapMetaData_NormalActs.mat b/‎BrainMapMetaData_NormalActs.mat ‎erroneous-matlab-kmeans/BrainMapMetaData_NormalActs.mat
diff --git a/‎kmeans_metrics.m ‎erroneous-matlab-kmeans/kmeans_metrics.m b/‎kmeans_metrics.m ‎erroneous-matlab-kmeans/kmeans_metrics.m
diff --git a/‎evaluating_clustering_solutions.ipynb
+266 b/‎evaluating_clustering_solutions.ipynb
+266
diff --git a/‎fwd_rev_decoding_annotations.ipynb
+695 b/‎fwd_rev_decoding_annotations.ipynb
+695
diff --git a/‎re-clustering.ipynb
+344 b/‎re-clustering.ipynb
+344
@@ -0,0 +1,344 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from os.path import join, basename\n",
+    "import re\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pylab\n",
+    "import pyale\n",
+    "import nibabel as nib\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.cluster import k_means, dbscan, SpectralClustering, FeatureAgglomeration\n",
+    "from sklearn.metrics import silhouette_score, normalized_mutual_info_score\n",
+    "from scipy.cluster import hierarchy\n",
+    "import scipy.io as sio\n",
+    "import seaborn as sns\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from math import log\n",
+    "\n",
+    "def variation_of_information(X, Y):\n",
+    "    #from https://gist.github.com/jwcarr/626cbc80e0006b526688\n",
+    "    n = float(sum([len(x) for x in X]))\n",
+    "    sigma = 0.0\n",
+    "    for x in X:\n",
+    "        p = len(x) / n\n",
+    "        for y in Y:\n",
+    "            q = len(y) / n\n",
+    "            r = len(set(x) & set(y)) / n\n",
+    "            if r > 0.0:\n",
+    "                sigma += r * (log(r / p, 2) + log(r / q, 2))\n",
+    "    return abs(sigma)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "out_dir = '/Users/Katie/Dropbox/Data/Naturalistic/NewSolution/python_output'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "matlab_nat = sio.loadmat('/Users/Katie/Dropbox/Data/Naturalistic/NewSolution/Naturalistic_11.25.17.mat')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "corrmat = matlab_nat['corrmat']\n",
+    "corrmat2 = np.corrcoef(corrmat)\n",
+    "minus_r = 1 - corrmat2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Kernel k-means clustering with k = 2: 1.20s\n",
+      "Ward's clustering with k = 2: 0.01s\n",
+      "K-means clustering with k = 2: 5.59s\n",
+      "Kernel k-means clustering with k = 3: 1.96s\n",
+      "Ward's clustering with k = 3: 0.01s\n",
+      "K-means clustering with k = 3: 7.44s\n",
+      "Kernel k-means clustering with k = 4: 2.74s\n",
+      "Ward's clustering with k = 4: 0.01s\n",
+      "K-means clustering with k = 4: 7.83s\n",
+      "Kernel k-means clustering with k = 5: 2.71s\n",
+      "Ward's clustering with k = 5: 0.01s\n",
+      "K-means clustering with k = 5: 8.55s\n",
+      "Kernel k-means clustering with k = 6: 2.58s\n",
+      "Ward's clustering with k = 6: 0.01s\n",
+      "K-means clustering with k = 6: 8.52s\n",
+      "Kernel k-means clustering with k = 7: 2.93s\n",
+      "Ward's clustering with k = 7: 0.01s\n",
+      "K-means clustering with k = 7: 9.48s\n",
+      "Kernel k-means clustering with k = 8: 3.19s\n",
+      "Ward's clustering with k = 8: 0.01s\n",
+      "K-means clustering with k = 8: 9.67s\n",
+      "Kernel k-means clustering with k = 9: 3.56s\n",
+      "Ward's clustering with k = 9: 0.01s\n",
+      "K-means clustering with k = 9: 10.26s\n",
+      "Kernel k-means clustering with k = 10: 3.58s\n",
+      "Ward's clustering with k = 10: 0.01s\n",
+      "K-means clustering with k = 10: 10.78s\n"
+     ]
+    }
+   ],
+   "source": [
+    "k = np.arange(2, 11)\n",
+    "k_labels = {}\n",
+    "kk_labels = {}\n",
+    "w_labels = {}\n",
+    "\n",
+    "for i in k:\n",
+    "    #Kernel Kmeans nonlinear\n",
+    "    start = time.time()\n",
+    "    kernelk = SpectralClustering(n_clusters=i, eigen_solver=None, n_init=1000, gamma=1.0, affinity='rbf', assign_labels='kmeans', degree=3, coef0=1, n_jobs=1)\n",
+    "    labels = kernelk.fit_predict(minus_r)\n",
+    "    print \"Kernel k-means clustering with k = {0}: %.2fs\".format(i) % (time.time() - start) \n",
+    "    #np.savetxt('/Users/Katie/Dropbox/Data/Naturalistic/NewSolution/kernelk_labels_{0}.txt'.format(i), labels, delimiter='\\t')\n",
+    "    kk_labels[i] = labels\n",
+    "\n",
+    "    #Ward's\n",
+    "    #calculate pdist (1 - r) \n",
+    "    start = time.time()\n",
+    "    ward = FeatureAgglomeration(n_clusters=i, linkage='ward', memory='nilearn_cache')\n",
+    "    ward.fit(minus_r)\n",
+    "    #np.savetxt('/Users/Katie/Dropbox/Data/Naturalistic/NewSolution/ward_labels_{0}.txt'.format(i), ward.labels_, delimiter='\\t')\n",
+    "    w_labels[i] = ward.labels_\n",
+    "    print \"Ward's clustering with k = {0}: %.2fs\".format(i) % (time.time() - start)\n",
+    "\n",
+    "\n",
+    "    #KMeans clustering\n",
+    "    start = time.time()\n",
+    "    [centroid, label, inertia] = k_means(minus_r, i, init='k-means++', precompute_distances='auto',\n",
+    "                                         n_init=1000, max_iter=1023, verbose=False, tol=0.0001,\n",
+    "                                         random_state=None, copy_x=True, n_jobs=1, algorithm='auto',\n",
+    "                                         return_n_iter=False)\n",
+    "    k_labels[i] = label\n",
+    "    #np.savetxt('/Users/Katie/Dropbox/Data/Naturalistic/NewSolution/kmeans_labels_{0}.txt'.format(i), label, delimiter='\\t')\n",
+    "    print \"K-means clustering with k = {0}: %.2fs\".format(i) % (time.time() - start)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import cluster\n",
+    "\n",
+    "eps = np.arange(0.01, 2, 0.01)\n",
+    "db_labels = {}\n",
+    "\n",
+    "for i in eps:\n",
+    "    [core, labels] = cluster.dbscan(minus_r, eps=i, metric='precomputed')\n",
+    "    db_labels['eps = {0}'.format(i)] = labels\n",
+    "\n",
+    "dbscan_out = pd.Series(db_labels)\n",
+    "dbscan_out.to_csv('{0}/dbscan.txt'.format(out_dir), sep='\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k_sils = []\n",
+    "kk_sils = []\n",
+    "w_sils = []\n",
+    "matlab_sils = []\n",
+    "mk_kk_mutual_info = []\n",
+    "mk_w_mutual_info = []\n",
+    "mk_pk_mutual_info = []\n",
+    "\n",
+    "\n",
+    "k = np.arange(2, 11)\n",
+    "\n",
+    "for i in k:\n",
+    "    j = i-2\n",
+    "    \n",
+    "    matlab_soln = matlab_nat['IDX'][0, j].flatten()\n",
+    "    matlab_silhouette = silhouette_score(minus_r, matlab_soln, metric='precomputed', random_state=None)\n",
+    "    matlab_sils.append(matlab_silhouette)\n",
+    "    \n",
+    "    \n",
+    "    k_silhouette = silhouette_score(minus_r, k_labels[i], metric='precomputed', random_state=None)\n",
+    "    k_sils.append(k_silhouette)\n",
+    "\n",
+    "    kk_silhouette = silhouette_score(minus_r, kk_labels[i], metric='precomputed', random_state=None)\n",
+    "    kk_sils.append(kk_silhouette)\n",
+    "\n",
+    "    w_silhouette = silhouette_score(minus_r, w_labels[i], metric='precomputed', random_state=None)\n",
+    "    w_sils.append(w_silhouette)\n",
+    "    \n",
+    "    k_labels[i].reshape(-1, 1)\n",
+    "    kk_labels[i].reshape(-1, 1)\n",
+    "    w_labels[i].reshape(-1, 1)\n",
+    "\n",
+    "    mk_kk_nmi = normalized_mutual_info_score(matlab_soln, kk_labels[i])\n",
+    "    mk_kk_mutual_info.append(mk_kk_nmi)\n",
+    "    \n",
+    "    mk_pk_nmi = normalized_mutual_info_score(k_labels[i], matlab_soln)\n",
+    "    mk_pk_mutual_info.append(mk_pk_nmi)\n",
+    "\n",
+    "    mk_w_nmi = normalized_mutual_info_score(matlab_soln, w_labels[i])\n",
+    "    mk_w_mutual_info.append(mk_w_nmi)\n",
+    "\n",
+    "np.savetxt('{0}/wards_silhouettes.txt'.format(out_dir), w_sils, delimiter='\\t')\n",
+    "np.savetxt('{0}/kernel_kmean_silhouettes.txt'.format(out_dir), kk_sils, delimiter='\\t')\n",
+    "np.savetxt('{0}/python_kmean_silhouettes.txt'.format(out_dir), k_sils, delimiter='\\t')\n",
+    "np.savetxt('{0}/matlab_kmean_silhouettes.txt'.format(out_dir), matlab_sils, delimiter='\\t')\n",
+    "\n",
+    "np.savetxt('{0}/mk_kk_nmi.txt'.format(out_dir), mk_kk_mutual_info, delimiter='\\t')\n",
+    "np.savetxt('{0}/mk_w_nmi.txt'.format(out_dir), mk_w_mutual_info, delimiter='\\t')\n",
+    "np.savetxt('{0}/mk_pk_nmi.txt'.format(out_dir), mk_pk_mutual_info, delimiter='\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "k_clusters_idx = {}\n",
+    "kk_clusters_idx = {}\n",
+    "w_clusters_idx = {}\n",
+    "mk_clusters_idx = \n",
+    "\n",
+    "for i in k:\n",
+    "    k_clusters = []\n",
+    "    kk_clusters = []\n",
+    "    w_clusters = []\n",
+    "    for j in range(i):\n",
+    "        k_clusters.append(list(np.where(k_labels[i-2] == j)[0]))\n",
+    "        kk_clusters.append(list(np.where(kk_labels[i-2] == j)[0]))\n",
+    "        w_clusters.append(list(np.where(w_labels[i-2] == j)[0]))\n",
+    "    k_clusters_idx['Solution {0}'.format(i)] = k_clusters\n",
+    "    kk_clusters_idx['Solution {0}'.format(i)] = kk_clusters\n",
+    "    w_clusters_idx['Solution {0}'.format(i)] = w_clusters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vi_k = {}\n",
+    "vi_kk = {}\n",
+    "vi_w = {}\n",
+    "vi_mk = {}\n",
+    "\n",
+    "\n",
+    "for i in k[:-1]:\n",
+    "    j = k_clusters_idx['Solution {0}'.format(i)]\n",
+    "    z = k_clusters_idx['Solution {0}'.format(i+1)]\n",
+    "    k_vi = variation_of_information(j, z)\n",
+    "    vi_k[i+1] = k_vi\n",
+    "    \n",
+    "    j = kk_clusters_idx['Solution {0}'.format(i)]\n",
+    "    z = kk_clusters_idx['Solution {0}'.format(i+1)]\n",
+    "    kk_vi = variation_of_information(j, z)\n",
+    "    vi_kk[i+1] = kk_vi\n",
+    "                                                                                          \n",
+    "    j = w_clusters_idx['Solution {0}'.format(i)]\n",
+    "    z = w_clusters_idx['Solution {0}'.format(i+1)]\n",
+    "    w_vi = variation_of_information(j, z)\n",
+    "    vi_w[i+1] = w_vi\n",
+    "    \n",
+    "\n",
+    "wards_vi = pd.Series(vi_w)\n",
+    "#wards_vi.to_csv('{0}/wards_vi.csv'.format(out_dir), sep=',')\n",
+    "\n",
+    "kmeans_vi = pd.Series(vi_k)\n",
+    "#kmeans_vi.to_csv('{0}/pythonkmean_vi.csv'.format(out_dir), sep=',')\n",
+    "\n",
+    "kernelk_vi = pd.Series(vi_kk)\n",
+    "#kernelk_vi.to_csv('{0}/kernelkmean_vi.csv'.format(out_dir), sep=',')\n",
+    "\n",
+    "vi_mk = [0.831299855, 0.894586133, 1.064756853, 0.575203598, 0.854956836, 0.809800235, 0.827846322, 1.277838909]\n",
+    "matlabk_vi = pd.Series(vi_mk, index=[3, 4, 5, 6, 7, 8, 9, 10])\n",
+    "#matlabk_vi.to_csv('{0}/matlabkmean_vi.csv'.format(out_dir), sep=',')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "variation_info = pd.DataFrame([kernelk_vi, matlabk_vi, kmeans_vi, wards_vi]).transpose()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "variation_info.to_csv('{0}/variation_of_information.csv'.format(out_dir), sep=',')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}