Skip to content

Commit

Permalink
New dictionary entry for confidence plots
Browse files Browse the repository at this point in the history
  • Loading branch information
leschultz committed May 7, 2024
1 parent 465fa6e commit bfdb48a
Show file tree
Hide file tree
Showing 13 changed files with 222 additions and 7 deletions.
2 changes: 1 addition & 1 deletion examples/bandwidth_grid/template/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
export PYTHONPATH=$(pwd)/../../../../../../src:$PYTHONPATH

rm -rf run
python3 fit.py
time python3 fit.py
1 change: 0 additions & 1 deletion examples/gpr/make_runs.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash

data=(
"friedman1"
"fluence"
"diffusion"
"strength"
Expand Down
2 changes: 1 addition & 1 deletion examples/gpr/template/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
export PYTHONPATH=$(pwd)/../../../../../src:$PYTHONPATH

rm -rf run
python3 fit.py
time python3 fit.py
2 changes: 1 addition & 1 deletion examples/kernel_grid/template/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
export PYTHONPATH=$(pwd)/../../../../../../src:$PYTHONPATH

rm -rf run
python3 fit.py
time python3 fit.py
46 changes: 46 additions & 0 deletions examples/minmax_scaler/make_runs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

data=(
"friedman1"
"fluence"
"diffusion"
"strength"
"supercond"
)

models=(
"rf"
)

for i in "${data[@]}"
do

for j in "${models[@]}"
do

echo "Making (data, model)=(${i}, ${j})"
job_dir="runs/data_${i}/model_${j}"

mkdir -p ${job_dir}
cp -r template/* ${job_dir}
cd ${job_dir}

# Define the repeats
if [ "${i}" == "fluence" ] && [ "${j}" == "bnn" ]; then
r=3
elif [ "${i}" == "friedman1" ] && [ "${j}" == "bnn" ]; then
r=3
elif [ "${i}" == "supercond" ] && [ "${j}" == "bnn" ]; then
r=2
else
r=5
fi

sed -i "s/replace_data/'${i}'/g" fit.py
sed -i "s/replace_model/'${j}'/g" fit.py
sed -i "s/replace_repeats/${r}/g" fit.py

cd - > /dev/null

done
done
9 changes: 9 additions & 0 deletions examples/minmax_scaler/submit_jobs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

submit=submit.sh
for i in $(find ${1} -type f -name ${submit})
do
cd $(dirname ${i})
qsub ${submit}
cd - > /dev/null
done
81 changes: 81 additions & 0 deletions examples/minmax_scaler/template/fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

from madml.models import dissimilarity, calibration, combine
from madml.splitters import BootstrappedLeaveClusterOut
from madml.assess import nested_cv
from madml import datasets

from mods import return_model


def main():

run_name = 'output'
data_name = replace_data
model = replace_model

# Load data
data = datasets.load(data_name)
X = data['data']
y = data['target']

# MADML parameters
bins = 10
n_repeats = replace_repeats

# ML Distance model
ds_model = dissimilarity(dis='kde')

# ML UQ function
uq_model = calibration(params=[0.0, 1.0])

# ML
scale = MinMaxScaler()
model = return_model(model, X)

# The grid for grid search
grid = {}

# The machine learning pipeline
pipe = Pipeline(steps=[
('scaler', scale),
('model', model),
])

# The gridsearch model
gs_model = GridSearchCV(
pipe,
grid,
cv=((slice(None), slice(None)),), # No splits
scoring='neg_mean_squared_error',
)

# Types of sampling to test
splits = [('fit', RepeatedKFold(n_repeats=n_repeats, n_splits=5))]

# Boostrap, cluster data, and generate splits
for clusters in [2, 3]:

# Cluster Splits
top_split = BootstrappedLeaveClusterOut(
AgglomerativeClustering,
n_repeats=n_repeats,
n_clusters=clusters,
)

splits.append(('agglo_{}'.format(clusters), top_split))

# Assess models
model = combine(gs_model, ds_model, uq_model, splits, bins=bins)
cv = nested_cv(model, X, y, splitters=splits)
df, df_bin, fit_model = cv.test(
save_outer_folds=run_name,
)


if __name__ == '__main__':
main()
62 changes: 62 additions & 0 deletions examples/minmax_scaler/template/mods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor
from scikeras.wrappers import KerasRegressor
from keras.layers import Dense, Dropout
from keras.models import Sequential
from sklearn.svm import SVR


def return_model(name, X):

if name == 'rf':
return RandomForestRegressor(n_estimators=100)

elif name == 'bols':
return BaggingRegressor(LinearRegression(), n_estimators=100)

elif name == 'bsvr':
return BaggingRegressor(SVR(), n_estimators=100)

elif name == 'bnn':
model = KerasRegressor(
build_fn=keras_model,
shape=X.shape[1],
epochs=500,
batch_size=100,
verbose=0,
)

return BaggingRegressor(model, n_estimators=10)

else:
raise 'No model matching name.'


def keras_model(shape):

n = 100
model = Sequential()
model.add(Dense(
n,
input_dim=shape,
kernel_initializer='normal',
activation='relu'
))
model.add(Dropout(0.3))
model.add(Dense(
n,
kernel_initializer='normal',
activation='relu'
))
model.add(Dropout(0.3))
model.add(Dense(
1,
kernel_initializer='normal'
))
model.compile(
loss='mean_squared_error',
optimizer='adam'
)

return model
6 changes: 6 additions & 0 deletions examples/minmax_scaler/template/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

export PYTHONPATH=$(pwd)/../../../../../src:$PYTHONPATH

rm -rf run
time python3 fit.py
9 changes: 9 additions & 0 deletions examples/minmax_scaler/template/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#PBS -S /bin/bash
#PBS -q bardeen
#PBS -l select=1:ncpus=16:mpiprocs=16
#PBS -l walltime=72:00:00
#PBS -N job

cd $PBS_O_WORKDIR

./run.sh
2 changes: 1 addition & 1 deletion examples/single_runs/template/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
export PYTHONPATH=$(pwd)/../../../../../src:$PYTHONPATH

rm -rf run
python3 fit.py
time python3 fit.py
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Package information
name = 'madml'
version = '2.6.4' # Need to increment every time to push to PyPI
version = '2.6.5' # Need to increment every time to push to PyPI
description = 'Application domain of machine learning in materials science.'
url = 'https://github.com/leschultz/'\
'materials_application_domain_machine_learning.git'
Expand Down
5 changes: 4 additions & 1 deletion src/madml/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def func(i, d, y, z):

return removed, rmse, area

def sub(x, y, ylabel, key, gt, gtlabel, metric, color):
def sub(x, y, ylabel, key, gt, gtlabel, auc, metric, color):

if key == r'$|y-\hat{y}|$':
name = 'residual'
Expand Down Expand Up @@ -122,6 +122,7 @@ def sub(x, y, ylabel, key, gt, gtlabel, metric, color):
'x': list(map(float, x)),
'y': list(map(float, y)),
'gt': float(gt),
'auc': auc,
}

plot_dump(
Expand Down Expand Up @@ -194,6 +195,7 @@ def sub(x, y, ylabel, key, gt, gtlabel, metric, color):
key,
gt_rmse,
r'$E^{RMSE/\sigma_{y}}_{c}$',
auc_rmse,
'rmse',
color,
)
Expand All @@ -204,6 +206,7 @@ def sub(x, y, ylabel, key, gt, gtlabel, metric, color):
key,
gt_area,
r'$E^{area}_{c}$',
auc_area,
'area',
color,
)
Expand Down

0 comments on commit bfdb48a

Please sign in to comment.