Skip to content

Commit

Permalink
updated examples and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
leschultz committed Jan 28, 2024
1 parent 88cec52 commit a326f22
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 34 deletions.
30 changes: 14 additions & 16 deletions examples/jupyter/tutorial_1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,9 @@
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"from madml.ml.splitters import BootstrappedLeaveClusterOut\n",
"from madml.models.space import distance_model\n",
"from madml.models.combine import domain_model\n",
"from madml.models.uq import calibration_model\n",
"from madml.ml.assessment import nested_cv\n",
"from madml.models import dissimilarity, calibration, domain, combine\n",
"from madml.splitters import BootstrappedLeaveClusterOut\n",
"from madml.assess import nested_cv\n",
"from madml import datasets"
]
},
Expand Down Expand Up @@ -101,10 +99,10 @@
},
"outputs": [],
"source": [
" data = datasets.load('diffusion')\n",
" X = data['data']\n",
" y = data['target']\n",
" g = data['class_name']"
"data = datasets.load('diffusion')\n",
"X = data['data']\n",
"y = data['target']\n",
"g = data['class_name']"
]
},
{
Expand Down Expand Up @@ -134,7 +132,7 @@
},
"outputs": [],
"source": [
"ds_model = distance_model(dist='kde')"
"ds_model = dissimilarity(dis='kde')"
]
},
{
Expand All @@ -154,7 +152,7 @@
},
"outputs": [],
"source": [
"uq_model = calibration_model(params=[0.0, 1.0])"
"uq_model = calibration(params=[0.0, 1.0])"
]
},
{
Expand Down Expand Up @@ -271,7 +269,7 @@
},
"outputs": [],
"source": [
"model = domain_model(gs_model, ds_model, uq_model, splits)\n",
"model = combine(gs_model, ds_model, uq_model, splits)\n",
"model.fit(X, y, g)"
]
},
Expand All @@ -288,8 +286,8 @@
"metadata": {},
"outputs": [],
"source": [
"cv = nested_cv(X, y, g, model, splits, save='./runs')\n",
"_, model = cv.assess()"
"cv = nested_cv(model, X, y, splitters=splits)\n",
"df, df_bin, fit_model = cv.test()"
]
},
{
Expand Down Expand Up @@ -318,7 +316,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Maybe the predefined thresholds for domain are insufficient. We can instead use some manual thresholds as a list of tuples with <('dissimilarity measure', 'domain of id or od', 'threshold')> as follows:"
"Maybe the predefined thresholds for domain are insufficient. We can instead use some manual thresholds as a single model:"
]
},
{
Expand All @@ -327,7 +325,7 @@
"metadata": {},
"outputs": [],
"source": [
"df = model.predict(X, [('dist', 'id', 0.75), ('dist', 'id_bin', 0.2)])\n",
"df = model.predict(X, 0.5)\n",
"print(df)"
]
}
Expand Down
4 changes: 2 additions & 2 deletions examples/jupyter/tutorial_2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"from madml.hosting.docker import dockerhub_model\n",
"from madml import datasets\n",
"\n",
"container_name = 'leschultz/diffusion_example'\n",
"container_name = 'leschultz/test:latest'\n",
"model = dockerhub_model(container_name)\n",
"\n",
"X = datasets.load('diffusion')['data']\n",
"X = datasets.load('strength')['data']\n",
"y = model.predict(X)\n",
"print(y)"
]
Expand Down
2 changes: 0 additions & 2 deletions src/madml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,6 @@ def list_data():
'friedman1',
'strength',
'fluence',
'make_regression',
'fetch_california_housing',
]

return datanames
28 changes: 14 additions & 14 deletions tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from madml.models.space import distance_model
from madml.models.combine import domain_model
from madml.models.uq import calibration_model
from madml.ml.assessment import nested_cv
from madml.models import dissimilarity, calibration, domain, combine
from madml.splitters import BootstrappedLeaveClusterOut
from madml.assess import nested_cv
from madml import datasets

import unittest
Expand All @@ -22,20 +21,20 @@ def test_ml(self):
Test a quick run.
'''

run_name = 'run'
run_name = 'output'

# Load data
data = datasets.load('diffusion')
data = datasets.load('strength')
df = data['frame']
X = data['data']
y = data['target']
g = data['class_name']

# ML Distance model
ds_model = distance_model(dist='kde')
ds_model = dissimilarity(dis='kde')

# ML UQ function
uq_model = calibration_model(params=[0.0, 0.1])
uq_model = calibration(params=[0.0, 0.1])

# ML
scale = StandardScaler()
Expand All @@ -44,8 +43,6 @@ def test_ml(self):
# The grid to do grid search
grid = {}
grid['model__n_estimators'] = [100]
grid['model__max_features'] = [None]
grid['model__max_depth'] = [None]

# The ML Pipeline
pipe = Pipeline(steps=[
Expand All @@ -63,10 +60,13 @@ def test_ml(self):
# Types of sampling to test
splits = [('fit', RepeatedKFold(n_repeats=1))]

# Fit models
model = domain_model(gs_model, ds_model, uq_model, splits)
cv = nested_cv(X, y, g, model, splits, save=run_name)
cv.assess()
# Assess models
model = combine(gs_model, ds_model, uq_model, splits)
cv = nested_cv(model, X, y, splitters=splits)
df, df_bin, fit_model = cv.test()

# Full fit model and write results.
cv.write_results(run_name)

# Clean up directory
shutil.rmtree(run_name)
Expand Down

0 comments on commit a326f22

Please sign in to comment.