Skip to content

Commit a5a06ea

Browse files
committed
Changedthe lenght scale ofr gpr
1 parent a513bed commit a5a06ea

File tree

27 files changed

+669
-31
lines changed

27 files changed

+669
-31
lines changed

examples/gpr_instead_of_kde/make_runs.sh renamed to examples/gpr/gpr_scaled/make_runs.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ data=(
1010

1111
models=(
1212
"rf"
13-
"bols"
14-
"bsvr"
15-
"bnn"
1613
)
1714

1815
for i in "${data[@]}"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
export PYTHONPATH=$(pwd)/../../../../../../src:$PYTHONPATH
4+
5+
rm -rf run
6+
python3 fit.py
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
3+
data=(
4+
"friedman1"
5+
"fluence"
6+
"diffusion"
7+
"strength"
8+
"supercond"
9+
)
10+
11+
models=(
12+
"rf"
13+
)
14+
15+
for i in "${data[@]}"
16+
do
17+
18+
for j in "${models[@]}"
19+
do
20+
21+
echo "Making (data, model)=(${i}, ${j})"
22+
job_dir="runs/data_${i}/model_${j}"
23+
24+
mkdir -p ${job_dir}
25+
cp -r template/* ${job_dir}
26+
cd ${job_dir}
27+
28+
# Define the repeats
29+
if [ "${i}" == "fluence" ] && [ "${j}" == "bnn" ]; then
30+
r=3
31+
elif [ "${i}" == "friedman1" ] && [ "${j}" == "bnn" ]; then
32+
r=3
33+
elif [ "${i}" == "supercond" ] && [ "${j}" == "bnn" ]; then
34+
r=2
35+
else
36+
r=5
37+
fi
38+
39+
sed -i "s/replace_data/'${i}'/g" fit.py
40+
sed -i "s/replace_model/'${j}'/g" fit.py
41+
sed -i "s/replace_repeats/${r}/g" fit.py
42+
43+
cd - > /dev/null
44+
45+
done
46+
done
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
submit=submit.sh
4+
for i in $(find ${1} -type f -name ${submit})
5+
do
6+
cd $(dirname ${i})
7+
qsub ${submit}
8+
cd - > /dev/null
9+
done
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from sklearn.model_selection import RepeatedKFold
2+
from sklearn.model_selection import GridSearchCV
3+
from sklearn.preprocessing import StandardScaler
4+
from sklearn.pipeline import Pipeline
5+
6+
from madml.models import dissimilarity, calibration, combine
7+
from madml.assess import nested_cv
8+
from madml import datasets
9+
10+
from mods import return_model
11+
12+
13+
def main():
14+
15+
run_name = 'output'
16+
data_name = replace_data
17+
model = replace_model
18+
19+
# Load data
20+
data = datasets.load(data_name)
21+
X = data['data']
22+
y = data['target']
23+
24+
# MADML parameters
25+
bins = 10
26+
n_repeats = replace_repeats
27+
28+
# ML Distance model
29+
ds_model = dissimilarity(dis='gpr')
30+
31+
# ML UQ function
32+
uq_model = calibration(params=[0.0, 1.0])
33+
34+
# ML
35+
scale = StandardScaler()
36+
model = return_model(model, X)
37+
38+
# The grid for grid search
39+
grid = {}
40+
41+
# The machine learning pipeline
42+
pipe = Pipeline(steps=[
43+
('scaler', scale),
44+
('model', model),
45+
])
46+
47+
# The gridsearch model
48+
gs_model = GridSearchCV(
49+
pipe,
50+
grid,
51+
cv=((slice(None), slice(None)),), # No splits
52+
scoring='neg_mean_squared_error',
53+
)
54+
55+
# Types of sampling to test
56+
splits = [('fit', RepeatedKFold(n_repeats=n_repeats, n_splits=5))]
57+
58+
# Assess models
59+
model = combine(gs_model, ds_model, uq_model, splits, bins=bins)
60+
cv = nested_cv(model, X, y, splitters=splits)
61+
df, df_bin, fit_model = cv.test(
62+
save_outer_folds=run_name,
63+
)
64+
65+
66+
if __name__ == '__main__':
67+
main()
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from sklearn.ensemble import RandomForestRegressor
2+
from sklearn.linear_model import LinearRegression
3+
from sklearn.ensemble import BaggingRegressor
4+
from scikeras.wrappers import KerasRegressor
5+
from keras.layers import Dense, Dropout
6+
from keras.models import Sequential
7+
from sklearn.svm import SVR
8+
9+
10+
def return_model(name, X):
11+
12+
if name == 'rf':
13+
return RandomForestRegressor(n_estimators=100)
14+
15+
elif name == 'bols':
16+
return BaggingRegressor(LinearRegression(), n_estimators=100)
17+
18+
elif name == 'bsvr':
19+
return BaggingRegressor(SVR(), n_estimators=100)
20+
21+
elif name == 'bnn':
22+
model = KerasRegressor(
23+
build_fn=keras_model,
24+
shape=X.shape[1],
25+
epochs=500,
26+
batch_size=100,
27+
verbose=0,
28+
)
29+
30+
return BaggingRegressor(model, n_estimators=10)
31+
32+
else:
33+
raise 'No model matching name.'
34+
35+
36+
def keras_model(shape):
37+
38+
n = 100
39+
model = Sequential()
40+
model.add(Dense(
41+
n,
42+
input_dim=shape,
43+
kernel_initializer='normal',
44+
activation='relu'
45+
))
46+
model.add(Dropout(0.3))
47+
model.add(Dense(
48+
n,
49+
kernel_initializer='normal',
50+
activation='relu'
51+
))
52+
model.add(Dropout(0.3))
53+
model.add(Dense(
54+
1,
55+
kernel_initializer='normal'
56+
))
57+
model.compile(
58+
loss='mean_squared_error',
59+
optimizer='adam'
60+
)
61+
62+
return model
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
export PYTHONPATH=$(pwd)/../../../../../../src:$PYTHONPATH
4+
5+
rm -rf run
6+
python3 fit.py
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#PBS -S /bin/bash
2+
#PBS -q bardeen
3+
#PBS -l select=1:ncpus=16:mpiprocs=16
4+
#PBS -l walltime=72:00:00
5+
#PBS -N job
6+
7+
cd $PBS_O_WORKDIR
8+
9+
./run.sh
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
3+
data=(
4+
"friedman1"
5+
"fluence"
6+
"diffusion"
7+
"strength"
8+
"supercond"
9+
)
10+
11+
models=(
12+
"rf"
13+
)
14+
15+
for i in "${data[@]}"
16+
do
17+
18+
for j in "${models[@]}"
19+
do
20+
21+
echo "Making (data, model)=(${i}, ${j})"
22+
job_dir="runs/data_${i}/model_${j}"
23+
24+
mkdir -p ${job_dir}
25+
cp -r template/* ${job_dir}
26+
cd ${job_dir}
27+
28+
# Define the repeats
29+
if [ "${i}" == "fluence" ] && [ "${j}" == "bnn" ]; then
30+
r=3
31+
elif [ "${i}" == "friedman1" ] && [ "${j}" == "bnn" ]; then
32+
r=3
33+
elif [ "${i}" == "supercond" ] && [ "${j}" == "bnn" ]; then
34+
r=2
35+
else
36+
r=5
37+
fi
38+
39+
sed -i "s/replace_data/'${i}'/g" fit.py
40+
sed -i "s/replace_model/'${j}'/g" fit.py
41+
sed -i "s/replace_repeats/${r}/g" fit.py
42+
43+
cd - > /dev/null
44+
45+
done
46+
done
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
submit=submit.sh
4+
for i in $(find ${1} -type f -name ${submit})
5+
do
6+
cd $(dirname ${i})
7+
qsub ${submit}
8+
cd - > /dev/null
9+
done
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from sklearn.cluster import AgglomerativeClustering
2+
from sklearn.model_selection import RepeatedKFold
3+
from sklearn.model_selection import GridSearchCV
4+
from sklearn.preprocessing import StandardScaler
5+
from sklearn.pipeline import Pipeline
6+
7+
from madml.models import dissimilarity, calibration, combine
8+
from madml.splitters import BootstrappedLeaveClusterOut
9+
from madml.assess import nested_cv
10+
from madml import datasets
11+
12+
from mods import return_model
13+
14+
15+
def main():
16+
17+
run_name = 'output'
18+
data_name = replace_data
19+
model = replace_model
20+
21+
# Load data
22+
data = datasets.load(data_name)
23+
X = data['data']
24+
y = data['target']
25+
26+
# MADML parameters
27+
bins = 10
28+
n_repeats = replace_repeats
29+
30+
# ML Distance model
31+
ds_model = dissimilarity(dis='gpr', scale=False)
32+
33+
# ML UQ function
34+
uq_model = calibration(params=[0.0, 1.0])
35+
36+
# ML
37+
scale = StandardScaler()
38+
model = return_model(model, X)
39+
40+
# The grid for grid search
41+
grid = {}
42+
43+
# The machine learning pipeline
44+
pipe = Pipeline(steps=[
45+
('scaler', scale),
46+
('model', model),
47+
])
48+
49+
# The gridsearch model
50+
gs_model = GridSearchCV(
51+
pipe,
52+
grid,
53+
cv=((slice(None), slice(None)),), # No splits
54+
scoring='neg_mean_squared_error',
55+
)
56+
57+
# Types of sampling to test
58+
splits = [('fit', RepeatedKFold(n_repeats=n_repeats, n_splits=5))]
59+
60+
# Boostrap, cluster data, and generate splits
61+
for clusters in [2, 3]:
62+
63+
# Cluster Splits
64+
top_split = BootstrappedLeaveClusterOut(
65+
AgglomerativeClustering,
66+
n_repeats=n_repeats,
67+
n_clusters=clusters,
68+
)
69+
70+
splits.append(('agglo_{}'.format(clusters), top_split))
71+
72+
# Assess models
73+
model = combine(gs_model, ds_model, uq_model, splits, bins=bins)
74+
cv = nested_cv(model, X, y, splitters=splits)
75+
df, df_bin, fit_model = cv.test(
76+
save_outer_folds=run_name,
77+
)
78+
79+
80+
if __name__ == '__main__':
81+
main()

0 commit comments

Comments
 (0)