BAMresearch · aklawonn · Apr 27, 2022 · May 17, 2022 · May 17, 2022 · May 17, 2022
diff --git a/examples/example_surrogate_hartman6D.py b/examples/example_surrogate_hartman6D.py
@@ -28,23 +28,19 @@
 from probeye.definition.likelihood_model import GaussianLikelihoodModel
 from probeye.inference.emcee.solver import EmceeSolver
 from probeye.definition.distribution import Uniform
-from probeye.definition.surrogate_model import HarlowModelFactory
+from probeye.metamodeling.sampling import LatinHypercubeSampler, HarlowSampler
+from probeye.metamodeling.surrogating import HarlowSurrogate
 
 # local imports (inference data post-processing)
 from probeye.postprocessing.sampling_plots import create_pair_plot
 from probeye.postprocessing.sampling_plots import create_posterior_plot
 
 # Surrogate model imports
-from harlow.sampling import LatinHypercube, FuzzyLolaVoronoi
+from harlow.sampling import FuzzyLolaVoronoi, ProbabilisticSampler, LatinHypercube
 from harlow.surrogating import (
-    Surrogate,
-    ModelListGaussianProcess,
     VanillaGaussianProcess,
 )
 from harlow.utils.transforms import (
-    ExpandDims,
-    TensorTransform,
-    ChainTransform,
     Identity,
 )
 
@@ -64,9 +60,10 @@
 n_walkers = 20
 
 # Sampler settings
-n_init = 250
-n_iter = 0
-n_point_per_iter = 2
+n_init = 100
+n_iter = 5
+n_point_per_iter = 20
+stopping_criterium = -np.inf
 
 # Surrogate settings
 N_train_iter = 50
@@ -196,6 +193,7 @@ def generate_data():
 n_params = 4
 list_params = [[i for i in range(n_params)]] * len(sensor_names) * len(t_vec)
 
+# Kwargs to be passed to the surrogate model
 surrogate_kwargs = {
     "training_max_iter": N_train_iter,
     "list_params": list_params,
@@ -206,24 +204,29 @@ def generate_data():
     "output_transform": Identity,
 }
 
-# Generate surrogate class using model factory
-model_factory = HarlowModelFactory(
-    problem,
-    forward_model,
-    FuzzyLolaVoronoi,
-    VanillaGaussianProcess,
-    **surrogate_kwargs,
-)
+# Define the surrogate model
+surrogate_model = VanillaGaussianProcess(**surrogate_kwargs)
 
-# Sample
-model_factory.sample(
-    n_iter=n_iter,
+# Probeye's latin hypercube sampler
+lhs_sampler = LatinHypercubeSampler(problem)
+
+# An iterative sampler. Here we pass the surrogate ForwardModel directly to the sampler. However, it is
+# also possible to pass a surrogate model that will be included in a forward model after fitting.
+harlow_sampler = HarlowSampler(problem, forward_model, LatinHypercube, surrogate_model)
+
+# Sampler and fit
+harlow_sampler.sample(
     n_initial_point=n_init,
     n_new_point_per_iteration=n_point_per_iter,
+    n_iter=n_iter,
+    stopping_criterium=stopping_criterium,
 )
+harlow_sampler.fit()
 
-# Get forward model instance
-forward_surrogate_model = model_factory.get_harlow_model("FastModel")
+# Define the surrogate forward model
+forward_surrogate_model = HarlowSurrogate(
+    "SurrogateModel", surrogate_model, forward_model
+)
 
 # =========================================================================
 # Add forward models

diff --git a/probeye/definition/__init__.py b/probeye/definition/__init__.py
@@ -2,7 +2,6 @@
 from probeye.definition import inverse_problem
 from probeye.definition import inference_problem
 from probeye.definition import forward_model
-from probeye.definition import surrogate_model
 from probeye.definition import likelihood_model
 from probeye.definition import distribution
 from probeye.definition import parameter

diff --git a/probeye/metamodeling/__init__.py b/probeye/metamodeling/__init__.py
diff --git a/probeye/surrogate/initial_sampling.py → probeye/metamodeling/initial_sampling.py b/probeye/surrogate/initial_sampling.py → probeye/metamodeling/initial_sampling.py
diff --git a/probeye/definition/surrogate_model.py → probeye/metamodeling/sampling.py b/probeye/definition/surrogate_model.py → probeye/metamodeling/sampling.py
@@ -1,47 +1,199 @@
+# standard library
+from typing import Tuple, Callable
+import copy
+
+# third party imports
 import numpy as np
-from typing import Callable
+import pandas as pd
+from scipy.stats import qmc
 
 # local imports
 from probeye.definition.inverse_problem import InverseProblem
 from probeye.definition.forward_model import ForwardModelBase
+from probeye.inference.scipy.priors import translate_prior
+from probeye.subroutines import len_or_one
 
-# Harlow imports
-from harlow.sampling import Sampler
-from harlow.surrogating import Surrogate
+# external imports
+from harlow.sampling import Sampler as HarlowSamplerBase
+from harlow.surrogating import Surrogate as HarlowSurrogateBase
 from harlow.utils.helper_functions import latin_hypercube_sampling
 
 
-class SurrogateModelBase(ForwardModelBase):
+class SamplerBase:
+    """
+    Base class for probeye samplers
+    """
+
+    def __init__(self, problem: InverseProblem):
+        # the considered inverse problem
+        self.problem = problem
+
+        # the sampling happens before the problem is given to a solver; this means that
+        # the priors of the problem are merely descriptive and they have to be
+        # translated to have their respective computing routines
+        self.priors = copy.deepcopy(self.problem.priors)
+        for prior_template in self.problem.priors.values():
+            prm_name = prior_template.ref_prm
+            self.priors[prm_name] = translate_prior(prior_template)
+
+    def sample(
+        self, forward_model: ForwardModelBase, **kwargs
+    ) -> Tuple[pd.DataFrame, dict]:
+        raise NotImplementedError
+
+
+class LatinHypercubeSampler(SamplerBase):
     """
-    Base class for a surrogate model, i.e., a forward model that approximates another
-    (typically computationally more expensive) forward model.
+    Contains functionalities to provide samples of an inverse problem's parameters by
+    utilizing latin hypercube sampling. The generate samples are intended for training
+    of a surrogate model.
 
     Parameters
     ----------
-    name
-        The name of the surrogate model. Must be unique among all surrogate model's
-        names within a considered InverseProblem.
-    forward_model
-        The forward model object that the surrogate model approximates.
+    problem
+        The considered inverse problem.
     """
 
-    def __init__(self, name: str, forward_model: ForwardModelBase):
+    def __init__(self, problem):
+        super().__init__(problem)
+
+    def generate_samples(self, n_samples: int, seed: int = 1) -> np.ndarray:
+        """
+        Samples the problems latent parameter vector from the parameter's priors in
+        combination with latin hypercube sampling.
 
-        super().__init__(name)
+        Parameters
+        ----------
+        n_samples
+            The number of requested samples.
+        seed
+            Seed for random number generator.
 
-        # the surrogate model has access to the forward model it approximates; this
-        # forward model can be called (evaluating its response) from the surrogate model
-        # via self.forward_model.response(inp) where inp contains the input dictionary
-        self.forward_model = forward_model
+        Returns
+        -------
+        sample_array
+            The sampled latent parameter vectors. Each row corresponds to a single
+            latent parameter vector.
+        """
+
+        # make sure that all parameters are one-dimensional; it is not straight forward
+        # how to do LHS for general multivariate parameters
+        for prm_name in self.problem.latent_prms:
+            if self.problem.parameters[prm_name].dim > 1:
+                raise RuntimeError(
+                    f"The given problem has a multivariate parameter ('{prm_name}') "
+                    f"for which LHS is not supported."
+                )
+
+        # the dimension of the required hypercube is the number of parameters; note
+        # that this only holds since all parameters are 1D (see the check above)
+        dim = self.problem.n_latent_prms
 
-    def fit(self):
+        # create the requested number of latin hypercube samples with the requested dim.
+        lhd = qmc.LatinHypercube(d=dim, seed=seed).random(n=n_samples)
+
+        # this is going to be the array of the parameter samples; each row will
+        # correspond to a theta-vector
+        sample_array = np.zeros(lhd.shape)
+
+        # fill the prepared array for the parameter samples using the LHS samples
+        for prm_name, parameter in self.problem.parameters.items():
+            if parameter.is_latent:
+                idx = parameter.index
+                prior = self.priors[prm_name]
+                prms = self.problem.get_constants(prior.hyperparameters)
+                for lt_prm in self.problem.get_latent_prior_hyperparameters(prm_name):
+                    idx_lt = self.problem.parameters[lt_prm].index
+                    prms[lt_prm] = sample_array[:, idx_lt]
+                q = lhd[:, idx]
+                sample_array[:, idx] = prior(prms, "ppf", q, use_ref_prm=False)
+
+        return sample_array
+
+    def sample(
+        self,
+        forward_model: ForwardModelBase,
+        n_samples: int,
+        seed: int = 1,
+    ) -> Tuple[pd.DataFrame, dict]:
         """
-        Prepares the surrogate model by approximating the forward model in some way.
+        Generates a given number of training data for fitting a surrogate model. The
+        training data contains a number of parameter vectors (sampled using LHS) and
+        the corresponding model responses.
+
+        Parameters
+        ----------
+        forward_model
+            The forward model that should be evaluated.
+        n_samples
+            The number of parameter vectors the forward model should be evaluated for.
+        seed
+            Seed for random number generator.
+
+        Returns
+        -------
+        prm_samples_pd
+            The parameter samples the forward model was evaluated at.
+        responses_over_experiments
+            The keys are the names of the experiment the forward model is associated
+            with, while the values are 3D-arrays containing the forward model's
+            response. responses_over_experiments[i][j] will contain the forward  model's
+            response with the ith parameter-vector for the jth output sensor.
         """
-        pass
 
+        # get the forward model object with the given name and prepare the corresponding
+        # experimental in- and output dictionaries
+        forward_model.prepare_experimental_inputs_and_outputs()
+
+        # generate the latent parameter samples and convert it to a data frame to have
+        # the association between columns and parameter names
+        prm_samples = self.generate_samples(n_samples, seed=seed)
+        prm_samples_pd = pd.DataFrame(
+            prm_samples, columns=self.problem.get_theta_names()
+        )
+
+        # this dictionary will contain the forward model responses for each of the
+        # experiments associated with the forward model; so the keys will be experiment
+        # names while the values will be 3D-arrays with the forward model's responses;
+        # responses_over_experiments[i] will correspond to the response of the ith
+        # parameter vector; responses_over_experiments[i][j] will contain the forward
+        # model's response with the ith parameter vector for the jth output sensor
+        responses_over_experiments = {}
+
+        # here, the length of the vectors of the forward model's output sensors is
+        # determined; to that end, the forward model is evaluated once
+        first_exp_name = forward_model.experiment_names[0]
+        exp_inp = forward_model.input_from_experiments[first_exp_name]
+        first_theta = prm_samples[0]
+        prms_model = self.problem.get_parameters(first_theta, forward_model.prms_def)
+        inp = {**exp_inp, **prms_model}
+        response_dict = forward_model.response(inp)
+        # make sure that the vectors returned by each of the forward model's output
+        # sensors has the same length; otherwise an AssertionError is raised
+        length_set = set()
+        for value in response_dict.values():
+            length_set.add(len_or_one(value))
+        assert len(length_set) == 1
+        n_out_values = list(length_set)[0]
+
+        # evaluate the forward model for each experiment/parameter vector
+        for exp_name in forward_model.experiment_names:
+            exp_inp = forward_model.input_from_experiments[exp_name]
+            response_array = np.zeros(
+                (n_samples, forward_model.n_output_sensors, n_out_values)
+            )
+            for i, theta in enumerate(prm_samples):
+                prms_model = self.problem.get_parameters(theta, forward_model.prms_def)
+                inp = {**exp_inp, **prms_model}  # adds the two dictionaries
+                response_dict = forward_model.response(inp)
+                for j, response_vector in enumerate(response_dict.values()):
+                    response_array[i, j, :] = response_vector
+            responses_over_experiments[exp_name] = response_array
+
+        return prm_samples_pd, responses_over_experiments
 
-class HarlowModelFactory:
+
+class HarlowSampler(SamplerBase):
     """
     Model factory that returns a `ForwardModel` object, where the response
     is obtained from a fitted surrogate model.
@@ -65,8 +217,8 @@ def __init__(
         self,
         problem: InverseProblem,
         forward_model: ForwardModelBase,
-        sampler: Sampler,
-        surrogate_model: Surrogate,
+        sampler: HarlowSamplerBase,
+        surrogate_model: HarlowSurrogateBase,
         fit_points_x: np.ndarray = None,
         fit_points_y: np.ndarray = None,
         test_points_x: np.ndarray = None,
@@ -77,6 +229,8 @@ def __init__(
         **surrogate_kwargs,
     ):
 
+        super().__init__(problem)
+
         # Initialize
         self.surrogate_kwargs = surrogate_kwargs
         self.problem = problem
@@ -132,8 +286,8 @@ def __init__(
         # Get bounds
         self._get_bounds()
 
-        # Initialize surrogate
-        self.surrogate = surrogate_model(**surrogate_kwargs)
+        # Surrogate model
+        self.surrogate = surrogate_model
         self.func_pred = self.surrogate.predict
 
         # Initialize sampler