Libensemble · jlnav · Aug 7, 2024 · Aug 7, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/libensemble/__init__.py b/libensemble/__init__.py
@@ -12,3 +12,4 @@
 from libensemble import logger
 
 from .ensemble import Ensemble
+from .generators import Generator
diff --git a/libensemble/gen_classes/gpCAM.py b/libensemble/gen_classes/gpCAM.py
@@ -0,0 +1,150 @@
+"""Generator class exposing gpCAM functionality"""
+
+import time
+from typing import List
+
+import numpy as np
+from gpcam import GPOptimizer as GP
+from numpy import typing as npt
+
+# While there are class / func duplicates - re-use functions.
+from libensemble.gen_funcs.persistent_gpCAM import (
+    _calculate_grid_distances,
+    _eval_var,
+    _find_eligible_points,
+    _generate_mesh,
+    _read_testpoints,
+)
+from libensemble.generators import LibensembleGenerator
+
+__all__ = [
+    "GP_CAM",
+    "GP_CAM_Covar",
+]
+
+
+# Note - batch size is set in wrapper currently - and passed to ask as n_trials.
+# To support empty ask(), add batch_size back in here.
+
+
+# Equivalent to function persistent_gpCAM_ask_tell
+class GP_CAM(LibensembleGenerator):
+    """
+    This generation function constructs a global surrogate of `f` values.
+
+    It is a batched method that produces a first batch uniformly random from
+    (lb, ub). On subequent iterations, it calls an optimization method to
+    produce the next batch of points. This optimization might be too slow
+    (relative to the simulation evaluation time) for some use cases.
+    """
+
+    def _initialize_gpcAM(self, user_specs):
+        """Extract user params"""
+        # self.b = user_specs["batch_size"]
+        self.lb = np.array(user_specs["lb"])
+        self.ub = np.array(user_specs["ub"])
+        self.n = len(self.lb)  # dimension
+        assert isinstance(self.n, int), "Dimension must be an integer"
+        assert isinstance(self.lb, np.ndarray), "lb must be a numpy array"
+        assert isinstance(self.ub, np.ndarray), "ub must be a numpy array"
+        self.all_x = np.empty((0, self.n))
+        self.all_y = np.empty((0, 1))
+        np.random.seed(0)
+
+    def __init__(self, H, persis_info, gen_specs, libE_info=None):
+        self.H = H
+        self.persis_info = persis_info
+        self.gen_specs = gen_specs
+        self.libE_info = libE_info
+
+        self.U = self.gen_specs["user"]
+        self._initialize_gpcAM(self.U)
+        self.my_gp = None
+        self.noise = 1e-8  # 1e-12
+
+    def ask_np(self, n_trials: int) -> npt.NDArray:
+        if self.all_x.shape[0] == 0:
+            self.x_new = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (n_trials, self.n))
+        else:
+            start = time.time()
+            self.x_new = self.my_gp.ask(
+                bounds=np.column_stack((self.lb, self.ub)),
+                n=n_trials,
+                pop_size=n_trials,
+                max_iter=1,
+            )["x"]
+            print(f"Ask time:{time.time() - start}")
+        H_o = np.zeros(n_trials, dtype=self.gen_specs["out"])
+        H_o["x"] = self.x_new
+        return H_o
+
+    def tell_np(self, calc_in: npt.NDArray) -> None:
+        if calc_in is not None:
+            self.y_new = np.atleast_2d(calc_in["f"]).T
+            nan_indices = [i for i, fval in enumerate(self.y_new) if np.isnan(fval)]
+            self.x_new = np.delete(self.x_new, nan_indices, axis=0)
+            self.y_new = np.delete(self.y_new, nan_indices, axis=0)
+
+            self.all_x = np.vstack((self.all_x, self.x_new))
+            self.all_y = np.vstack((self.all_y, self.y_new))
+
+            if self.my_gp is None:
+                self.my_gp = GP(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y)))
+            else:
+                self.my_gp.tell(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y)))
+            self.my_gp.train()
+
+
+class GP_CAM_Covar(GP_CAM):
+    """
+    This generation function constructs a global surrogate of `f` values.
+
+    It is a batched method that produces a first batch uniformly random from
+    (lb, ub) and on following iterations samples the GP posterior covariance
+    function to find sample points.
+    """
+
+    def __init__(self, H, persis_info, gen_specs, libE_info=None):
+        super().__init__(H, persis_info, gen_specs, libE_info)
+        self.test_points = _read_testpoints(self.U)
+        self.x_for_var = None
+        self.var_vals = None
+        if self.U.get("use_grid"):
+            self.num_points = 10
+            self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points)
+            self.r_low_init, self.r_high_init = _calculate_grid_distances(self.lb, self.ub, self.num_points)
+
+    def ask_np(self, n_trials: int) -> List[dict]:
+        if self.all_x.shape[0] == 0:
+            x_new = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (n_trials, self.n))
+        else:
+            if not self.U.get("use_grid"):
+                x_new = self.x_for_var[np.argsort(self.var_vals)[-n_trials:]]
+            else:
+                r_high = self.r_high_init
+                r_low = self.r_low_init
+                x_new = []
+                r_cand = r_high  # Let's start with a large radius and stop when we have batchsize points
+
+                sorted_indices = np.argsort(-self.var_vals)
+                while len(x_new) < n_trials:
+                    x_new = _find_eligible_points(self.x_for_var, sorted_indices, r_cand, n_trials)
+                    if len(x_new) < n_trials:
+                        r_high = r_cand
+                    r_cand = (r_high + r_low) / 2.0
+
+        self.x_new = x_new
+        H_o = np.zeros(n_trials, dtype=self.gen_specs["out"])
+        H_o["x"] = self.x_new
+        return H_o
+
+    def tell_np(self, calc_in: npt.NDArray):
+        if calc_in is not None:
+            super().tell(calc_in)
+            if not self.U.get("use_grid"):
+                n_trials = len(self.y_new)
+                self.x_for_var = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (10 * n_trials, self.n))
+
+            self.var_vals = _eval_var(
+                self.my_gp, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info
+            )
diff --git a/libensemble/gen_classes/sampling.py b/libensemble/gen_classes/sampling.py
@@ -0,0 +1,48 @@
+"""Generator classes providing points using sampling"""
+
+import numpy as np
+
+from libensemble.generators import LibensembleGenerator
+
+__all__ = [
+    "UniformSample",
+]
+
+
+class UniformSample(LibensembleGenerator):
+    """
+    This generator returns ``gen_specs["initial_batch_size"]`` uniformly
+    sampled points the first time it is called. Afterwards, it returns the
+    number of points given. This can be used in either a batch or asynchronous
+    mode by adjusting the allocation function.
+    """
+
+    def __init__(self, _, persis_info, gen_specs, libE_info=None) -> list:
+        # self.H = H
+        self.persis_info = persis_info
+        self.gen_specs = gen_specs
+        self.libE_info = libE_info
+        self._get_user_params(self.gen_specs["user"])
+
+    def ask_np(self, n_trials):
+        H_o = np.zeros(n_trials, dtype=self.gen_specs["out"])
+        H_o["x"] = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (n_trials, self.n))
+
+        if "obj_component" in H_o.dtype.fields:  # needs H_o - needs to be created in here.
+            H_o["obj_component"] = self.persis_info["rand_stream"].integers(
+                low=0, high=self.gen_specs["user"]["num_components"], size=n_trials
+            )
+        return H_o
+
+    def tell_np(self, calc_in):
+        pass  # random sample so nothing to tell
+
+    def _get_user_params(self, user_specs):
+        """Extract user params"""
+        # b = user_specs["initial_batch_size"]
+        self.ub = user_specs["ub"]
+        self.lb = user_specs["lb"]
+        self.n = len(self.lb)  # dimension
+        assert isinstance(self.n, int), "Dimension must be an integer"
+        assert isinstance(self.lb, np.ndarray), "lb must be a numpy array"
+        assert isinstance(self.ub, np.ndarray), "ub must be a numpy array"
diff --git a/libensemble/gen_funcs/persistent_gen_wrapper.py b/libensemble/gen_funcs/persistent_gen_wrapper.py
@@ -0,0 +1,38 @@
+import inspect
+
+import numpy as np
+
+from libensemble.message_numbers import EVAL_GEN_TAG, FINISHED_PERSISTENT_GEN_TAG, PERSIS_STOP, STOP_TAG
+from libensemble.tools.persistent_support import PersistentSupport
+from libensemble.utils.misc import np_to_list_dicts
+
+
+def persistent_gen_f(H, persis_info, gen_specs, libE_info):
+
+    ps = PersistentSupport(libE_info, EVAL_GEN_TAG)
+    U = gen_specs["user"]
+    b = U.get("initial_batch_size") or U.get("batch_size")
+
+    generator = U["generator"]
+    if inspect.isclass(generator):
+        gen = generator(H, persis_info, gen_specs, libE_info)
+    else:
+        gen = generator
+
+    tag = None
+    calc_in = None
+    while tag not in [STOP_TAG, PERSIS_STOP]:
+        H_o = gen.ask(b)
+        if isinstance(H_o, list):
+            H_o_arr = np.zeros(len(H_o), dtype=gen_specs["out"])
+            for i in range(len(H_o)):
+                for key in H_o[0].keys():
+                    H_o_arr[i][key] = H_o[i][key]
+            H_o = H_o_arr
+        tag, Work, calc_in = ps.send_recv(H_o)
+        gen.tell(np_to_list_dicts(calc_in))
+
+        if hasattr(calc_in, "__len__"):
+            b = len(calc_in)
+
+    return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,3 +12,4 @@
		from libensemble import logger

		from .ensemble import Ensemble
		from .generators import Generator