Skip to content

Commit

Permalink
Restored some prefious stuff and slight improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
leschultz committed Sep 18, 2023
1 parent a2b1499 commit fd53a88
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 26 deletions.
2 changes: 0 additions & 2 deletions examples/single_runs/wg_rf/make_runs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ sets=(

wtgrid=(
features
bandwidths
scores
none
)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Package information
name = 'madml'
version = '0.7.9' # Need to increment every time to push to PyPI
version = '0.8.0' # Need to increment every time to push to PyPI
description = 'Application domain of machine learning in materials science.'
url = 'https://github.com/leschultz/'\
'materials_application_domain_machine_learning.git'
Expand Down
50 changes: 32 additions & 18 deletions src/madml/models/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,43 @@ def fit(
self.kernel = 'epanechnikov'

if 'bandwidth' in self.kwargs.keys():
self.bandwidth = self.kwargs['bandwidth']
bandwidth = self.kwargs['bandwidth']
else:
self.bandwidth = estimate_bandwidth(X_train)
bandwidth = estimate_bandwidth(X_train)

self.model = KernelDensity(
kernel=self.kernel,
bandwidth=self.bandwidth,
)
if bandwidth > 0.0:
model = KernelDensity(
kernel=self.kernel,
bandwidth=bandwidth,
)

self.model.fit(X_train)
model.fit(X_train)
self.bandwidth = model.bandwidth_

dist = self.model.score_samples(X_train)
m = np.max(dist)
cut = 0.0 # No likelihood should be greater than that trained on
self.scaler = lambda x: np.maximum(cut, 1-np.exp(x-m))
dist = model.score_samples(X_train)
m = np.max(dist)

def pred(X):
out = model.score_samples(X)
out = out-m
out = np.exp(out)
out = 1-out
out = np.maximum(0.0, out)
return out

self.model = pred

else:
self.model = lambda x: np.repeat(1.0, len(x))

else:
self.model = lambda X_test: cdist(X_train, X_test, self.dist)

def pred(X):
out = cdist(X_train, X, self.dist)
out = np.mean(out, axis=0)
return out

self.model = pred

def predict(self, X):
'''
Expand All @@ -80,11 +99,6 @@ def predict(self, X):
if self.weigh == 'features':
X = X*self.weights

if self.dist == 'kde':
dist = self.model.score_samples(X)
dist = self.scaler(dist)

else:
dist = np.mean(self.model(X), axis=0)
dist = self.model(X)

return dist
11 changes: 6 additions & 5 deletions src/madml/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,22 +1122,23 @@ def pr(score, in_domain, pos_label, save=False):
baseline = [1 if i == pos_label else 0 for i in in_domain]
baseline = sum(baseline)/len(in_domain)
relative_base = 1-baseline # The amount of area to gain in PR
diff = auc_score-baseline

# AUC relative to the baseline
if relative_base == 0.0:
auc_relative = 0.0
else:
auc_relative = (auc_score-baseline)/relative_base
auc_relative = (diff)/relative_base

os.makedirs(save, exist_ok=True)

fig, ax = pl.subplots()

pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
pr_label = 'AUC: {:.2f}\nRelative AUC: {:.2f}'.format(
auc_score,
auc_relative
)
pr_label = 'AUC: {:.2f}\n'.format(auc_score)
pr_label += 'Relative AUC: {:.2f}\n'.format(auc_relative)
pr_label += 'AUC-Baseline: {:.2f}'.format(diff)

pr_display.plot(ax=ax, label=pr_label)

ax.hlines(
Expand Down

0 comments on commit fd53a88

Please sign in to comment.