updated examples and tests

leschultz · Jan 28, 2024 · a326f22 · a326f22
1 parent 88cec52
commit a326f22
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 34 deletions.
diff --git a/examples/jupyter/tutorial_1.ipynb b/examples/jupyter/tutorial_1.ipynb
@@ -55,11 +55,9 @@
     "from sklearn.preprocessing import StandardScaler\n",
     "from sklearn.pipeline import Pipeline\n",
     "\n",
-    "from madml.ml.splitters import BootstrappedLeaveClusterOut\n",
-    "from madml.models.space import distance_model\n",
-    "from madml.models.combine import domain_model\n",
-    "from madml.models.uq import calibration_model\n",
-    "from madml.ml.assessment import nested_cv\n",
+    "from madml.models import dissimilarity, calibration, domain, combine\n",
+    "from madml.splitters import BootstrappedLeaveClusterOut\n",
+    "from madml.assess import nested_cv\n",
     "from madml import datasets"
    ]
   },
@@ -101,10 +99,10 @@
    },
    "outputs": [],
    "source": [
-    "    data = datasets.load('diffusion')\n",
-    "    X = data['data']\n",
-    "    y = data['target']\n",
-    "    g = data['class_name']"
+    "data = datasets.load('diffusion')\n",
+    "X = data['data']\n",
+    "y = data['target']\n",
+    "g = data['class_name']"
    ]
   },
   {
@@ -134,7 +132,7 @@
    },
    "outputs": [],
    "source": [
-    "ds_model = distance_model(dist='kde')"
+    "ds_model = dissimilarity(dis='kde')"
    ]
   },
   {
@@ -154,7 +152,7 @@
    },
    "outputs": [],
    "source": [
-    "uq_model = calibration_model(params=[0.0, 1.0])"
+    "uq_model = calibration(params=[0.0, 1.0])"
    ]
   },
   {
@@ -271,7 +269,7 @@
    },
    "outputs": [],
    "source": [
-    "model = domain_model(gs_model, ds_model, uq_model, splits)\n",
+    "model = combine(gs_model, ds_model, uq_model, splits)\n",
     "model.fit(X, y, g)"
    ]
   },
@@ -288,8 +286,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cv = nested_cv(X, y, g, model, splits, save='./runs')\n",
-    "_, model = cv.assess()"
+    "cv = nested_cv(model, X, y, splitters=splits)\n",
+    "df, df_bin, fit_model = cv.test()"
    ]
   },
   {
@@ -318,7 +316,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Maybe the predefined thresholds for domain are insufficient. We can instead use some manual thresholds as a list of tuples with <('dissimilarity measure', 'domain of id or od', 'threshold')> as follows:"
+    "Maybe the predefined thresholds for domain are insufficient. We can instead use some manual thresholds as a single model:"
    ]
   },
   {
@@ -327,7 +325,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = model.predict(X, [('dist', 'id', 0.75), ('dist', 'id_bin', 0.2)])\n",
+    "df = model.predict(X, 0.5)\n",
     "print(df)"
    ]
   }

diff --git a/examples/jupyter/tutorial_2.ipynb b/examples/jupyter/tutorial_2.ipynb
@@ -41,10 +41,10 @@
     "from madml.hosting.docker import dockerhub_model\n",
     "from madml import datasets\n",
     "\n",
-    "container_name = 'leschultz/diffusion_example'\n",
+    "container_name = 'leschultz/test:latest'\n",
     "model = dockerhub_model(container_name)\n",
     "\n",
-    "X = datasets.load('diffusion')['data']\n",
+    "X = datasets.load('strength')['data']\n",
     "y = model.predict(X)\n",
     "print(y)"
    ]

diff --git a/src/madml/datasets.py b/src/madml/datasets.py
@@ -349,8 +349,6 @@ def list_data():
                  'friedman1',
                  'strength',
                  'fluence',
-                 'make_regression',
-                 'fetch_california_housing',
                  ]
 
     return datanames
diff --git a/tests/test_run.py b/tests/test_run.py
@@ -5,10 +5,9 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import Pipeline
 
-from madml.models.space import distance_model
-from madml.models.combine import domain_model
-from madml.models.uq import calibration_model
-from madml.ml.assessment import nested_cv
+from madml.models import dissimilarity, calibration, domain, combine
+from madml.splitters import BootstrappedLeaveClusterOut
+from madml.assess import nested_cv
 from madml import datasets
 
 import unittest
@@ -22,20 +21,20 @@ def test_ml(self):
         Test a quick run.
         '''
 
-        run_name = 'run'
+        run_name = 'output'
 
         # Load data
-        data = datasets.load('diffusion')
+        data = datasets.load('strength')
         df = data['frame']
         X = data['data']
         y = data['target']
         g = data['class_name']
 
         # ML Distance model
-        ds_model = distance_model(dist='kde')
+        ds_model = dissimilarity(dis='kde')
 
         # ML UQ function
-        uq_model = calibration_model(params=[0.0, 0.1])
+        uq_model = calibration(params=[0.0, 0.1])
 
         # ML
         scale = StandardScaler()
@@ -44,8 +43,6 @@ def test_ml(self):
         # The grid to do grid search
         grid = {}
         grid['model__n_estimators'] = [100]
-        grid['model__max_features'] = [None]
-        grid['model__max_depth'] = [None]
 
         # The ML Pipeline
         pipe = Pipeline(steps=[
@@ -63,10 +60,13 @@ def test_ml(self):
         # Types of sampling to test
         splits = [('fit', RepeatedKFold(n_repeats=1))]
 
-        # Fit models
-        model = domain_model(gs_model, ds_model, uq_model, splits)
-        cv = nested_cv(X, y, g, model, splits, save=run_name)
-        cv.assess()
+        # Assess models
+        model = combine(gs_model, ds_model, uq_model, splits)
+        cv = nested_cv(model, X, y, splitters=splits)
+        df, df_bin, fit_model = cv.test()
+
+        # Full fit model and write results.
+        cv.write_results(run_name)
 
         # Clean up directory
         shutil.rmtree(run_name)