Skip to content

Commit

Permalink
Changed the examples so that they look like what I have in the paper.…
Browse files Browse the repository at this point in the history
… I could add more, but wont
  • Loading branch information
leschultz committed Oct 9, 2023
1 parent 891772d commit b37b80a
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 13 deletions.
4 changes: 0 additions & 4 deletions examples/single_runs/bw_rf/make_runs.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
#!/bin/bash

sets=(
"friedman1"
"fluence"
"diffusion"
"steel_yield_strength"
"super_cond"
)

grid=(0.001 0.01 0.1 1.0 10.0 100.0 1000.0 False)
Expand Down
22 changes: 22 additions & 0 deletions examples/single_runs/calibration_rf/make_runs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

sets=(
"friedman1"
"fluence"
"diffusion"
"steel_yield_strength"
"super_cond"
)

mkdir -p runs
for i in "${sets[@]}"
do

echo ${i}
cp -r template "runs/${i}"
cd "runs/${i}"

sed -i "s/replace_data/'${i}'/g" fit.py

cd - > /dev/null
done
9 changes: 9 additions & 0 deletions examples/single_runs/calibration_rf/submit_jobs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

submit=submit.sh
for i in $(find ${1} -type f -name ${submit})
do
cd $(dirname ${i})
sbatch ${submit}
cd - > /dev/null
done
68 changes: 68 additions & 0 deletions examples/single_runs/calibration_rf/template/fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from sklearn.cluster import AgglomerativeClustering
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from madml.ml.splitters import BootstrappedLeaveClusterOut
from madml.models.space import distance_model
from madml.models.combine import domain_model
from madml.models.uq import calibration_model
from madml.ml.assessment import nested_cv
from madml import datasets

import numpy as np


def main():

run_name = 'run'
data_name = replace_data

# Load data
data = datasets.load(data_name)
X = data['data']
y = data['target']
g = data['class_name']
n_repeats = 5

# ML Distance model
ds_model = distance_model(dist='kde')

# ML UQ function
uq_model = calibration_model(params=[0.0, 1.0])

# ML
scale = StandardScaler()
model = RandomForestRegressor()

# The grid for grid search
grid = {}
grid['model__n_estimators'] = [100]

# The machine learning pipeline
pipe = Pipeline(steps=[
('scaler', scale),
('model', model),
])

# The gridsearch model
gs_model = GridSearchCV(
pipe,
grid,
cv=((slice(None), slice(None)),), # No splits
)

# Types of sampling to test
splits = [('fit', RepeatedKFold(n_repeats=n_repeats))]

# Fit models
model = domain_model(gs_model, ds_model, uq_model, splits)
cv = nested_cv(X, y, g, model, splits, save=run_name)
cv.assess()
cv.push('leschultz/cmg-rf-{}:latest'.format(data_name))


if __name__ == '__main__':
main()
6 changes: 6 additions & 0 deletions examples/single_runs/calibration_rf/template/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

export PYTHONPATH=$(pwd)/../../../../../src:$PYTHONPATH

rm -rf run
python3 fit.py
10 changes: 10 additions & 0 deletions examples/single_runs/calibration_rf/template/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/sh
#SBATCH --partition=morgan
#SBATCH --time=7-00:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=40
#SBATCH --mem-per-cpu=4000
#SBATCH --error=job.e.%J
#SBATCH --output=job.o.%J

./run.sh
4 changes: 0 additions & 4 deletions examples/single_runs/gt_rf/make_runs.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
#!/bin/bash

sets=(
"friedman1"
"fluence"
"diffusion"
"steel_yield_strength"
"super_cond"
)

gtgrid=(
Expand Down
4 changes: 0 additions & 4 deletions examples/single_runs/kernel_rf/make_runs.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
#!/bin/bash

sets=(
"friedman1"
"fluence"
"diffusion"
"steel_yield_strength"
"super_cond"
)

grid=("gaussian" "tophat" "epanechnikov" "exponential" "linear" "cosine")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Package information
name = 'madml'
version = '0.9.3' # Need to increment every time to push to PyPI
version = '0.9.4' # Need to increment every time to push to PyPI
description = 'Application domain of machine learning in materials science.'
url = 'https://github.com/leschultz/'\
'materials_application_domain_machine_learning.git'
Expand Down

0 comments on commit b37b80a

Please sign in to comment.