Restored some prefious stuff and slight improvements

leschultz · Sep 18, 2023 · fd53a88 · fd53a88
1 parent a2b1499
commit fd53a88
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 26 deletions.
diff --git a/examples/single_runs/wg_rf/make_runs.sh b/examples/single_runs/wg_rf/make_runs.sh
@@ -11,8 +11,6 @@ sets=(
 
 wtgrid=(
 	features
-	bandwidths
-	scores
 	none
         )
 

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 # Package information
 name = 'madml'
-version = '0.7.9'  # Need to increment every time to push to PyPI
+version = '0.8.0'  # Need to increment every time to push to PyPI
 description = 'Application domain of machine learning in materials science.'
 url = 'https://github.com/leschultz/'\
       'materials_application_domain_machine_learning.git'

diff --git a/src/madml/models/space.py b/src/madml/models/space.py
@@ -48,24 +48,43 @@ def fit(
                 self.kernel = 'epanechnikov'
 
             if 'bandwidth' in self.kwargs.keys():
-                self.bandwidth = self.kwargs['bandwidth']
+                bandwidth = self.kwargs['bandwidth']
             else:
-                self.bandwidth = estimate_bandwidth(X_train)
+                bandwidth = estimate_bandwidth(X_train)
 
-            self.model = KernelDensity(
-                                       kernel=self.kernel,
-                                       bandwidth=self.bandwidth,
-                                       )
+            if bandwidth > 0.0:
+                model = KernelDensity(
+                                      kernel=self.kernel,
+                                      bandwidth=bandwidth,
+                                      )
 
-            self.model.fit(X_train)
+                model.fit(X_train)
+                self.bandwidth = model.bandwidth_
 
-            dist = self.model.score_samples(X_train)
-            m = np.max(dist)
-            cut = 0.0  # No likelihood should be greater than that trained on
-            self.scaler = lambda x: np.maximum(cut, 1-np.exp(x-m))
+                dist = model.score_samples(X_train)
+                m = np.max(dist)
+
+                def pred(X):
+                    out = model.score_samples(X)
+                    out = out-m
+                    out = np.exp(out)
+                    out = 1-out
+                    out = np.maximum(0.0, out)
+                    return out
+
+                self.model = pred
+
+            else:
+                self.model = lambda x: np.repeat(1.0, len(x))
 
         else:
-            self.model = lambda X_test: cdist(X_train, X_test, self.dist)
+
+            def pred(X):
+                out = cdist(X_train, X, self.dist)
+                out = np.mean(out, axis=0)
+                return out
+
+            self.model = pred
 
     def predict(self, X):
         '''
@@ -80,11 +99,6 @@ def predict(self, X):
         if self.weigh == 'features':
             X = X*self.weights
 
-        if self.dist == 'kde':
-            dist = self.model.score_samples(X)
-            dist = self.scaler(dist)
-
-        else:
-            dist = np.mean(self.model(X), axis=0)
+        dist = self.model(X)
 
         return dist
diff --git a/src/madml/plots.py b/src/madml/plots.py
@@ -1122,22 +1122,23 @@ def pr(score, in_domain, pos_label, save=False):
         baseline = [1 if i == pos_label else 0 for i in in_domain]
         baseline = sum(baseline)/len(in_domain)
         relative_base = 1-baseline  # The amount of area to gain in PR
+        diff = auc_score-baseline
 
         # AUC relative to the baseline
         if relative_base == 0.0:
             auc_relative = 0.0
         else:
-            auc_relative = (auc_score-baseline)/relative_base
+            auc_relative = (diff)/relative_base
 
         os.makedirs(save, exist_ok=True)
 
         fig, ax = pl.subplots()
 
         pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
-        pr_label = 'AUC: {:.2f}\nRelative AUC: {:.2f}'.format(
-                                                              auc_score,
-                                                              auc_relative
-                                                              )
+        pr_label = 'AUC: {:.2f}\n'.format(auc_score)
+        pr_label += 'Relative AUC: {:.2f}\n'.format(auc_relative)
+        pr_label += 'AUC-Baseline: {:.2f}'.format(diff)
+
         pr_display.plot(ax=ax, label=pr_label)
 
         ax.hlines(