From 4d8e7c6c25d8393c2e07ddb3cc47634bde863b11 Mon Sep 17 00:00:00 2001
From: Faried Abu Zaid <fariedaz@gmail.com>
Date: Fri, 29 Sep 2023 19:58:03 +0200
Subject: [PATCH 1/4] rename experiment library

---
 experiments/cfair/config.yaml                |  4 +-
 experiments/fashion/config.yaml              |  6 +-
 experiments/mnist/config.yaml                | 59 +++++++++++++-------
 experiments/mnist/config_best.yaml           |  6 +-
 experiments/mnist/config_lu.yaml             |  6 +-
 experiments/synthetic/config.yaml            |  6 +-
 scripts/run-expreiment.py                    |  2 +-
 src/{experiments => explib}/__init__.py      |  0
 src/{experiments => explib}/base.py          |  0
 src/{experiments => explib}/config_parser.py |  0
 src/{experiments => explib}/datasets.py      |  1 -
 src/{experiments => explib}/hyperopt.py      |  2 +-
 tests/flows_test.py                          |  2 +-
 tests/mnist.yaml                             |  6 +-
 14 files changed, 59 insertions(+), 41 deletions(-)
 rename src/{experiments => explib}/__init__.py (100%)
 rename src/{experiments => explib}/base.py (100%)
 rename src/{experiments => explib}/config_parser.py (100%)
 rename src/{experiments => explib}/datasets.py (99%)
 rename src/{experiments => explib}/hyperopt.py (99%)

diff --git a/experiments/cfair/config.yaml b/experiments/cfair/config.yaml
index 9707459..9c989f1 100644
--- a/experiments/cfair/config.yaml
+++ b/experiments/cfair/config.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: cfair10_basedist_comparison
 experiments:
     - &exp_laplace
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_laplace
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
diff --git a/experiments/fashion/config.yaml b/experiments/fashion/config.yaml
index 32928cb..bbc0ce2 100644
--- a/experiments/fashion/config.yaml
+++ b/experiments/fashion/config.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: fashion_basedist_comparison
 experiments:
     - &exp_laplace
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_laplace
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.experiments.datasets.FashionMnistSplit
+          __object__: src.explib.datasets.FashionMnistSplit
           label: 0
         epochs: &epochs 10000
         patience: &patience 500
diff --git a/experiments/mnist/config.yaml b/experiments/mnist/config.yaml
index 2544eef..177e59d 100644
--- a/experiments/mnist/config.yaml
+++ b/experiments/mnist/config.yaml
@@ -2,15 +2,15 @@
 __object__: src.experiments.base.ExperimentCollection
 name: mnist_basedist_comparison
 experiments:
-    - &exp_laplace
+    - &exp_nice_lu_laplace
       __object__: src.experiments.hyperopt.HyperoptExperiment
-      name: mnist_laplace
+      name: mnist_nice_lu_laplace
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
-        max_t: 10000
-        grace_period: 10000
+        max_t: 1000000
+        grace_period: 1000000
         reduction_factor: 2
-      num_hyperopt_samples: &num_hyperopt_samples 25
+      num_hyperopt_samples: &num_hyperopt_samples 50
       gpus_per_trial: &gpus_per_trial 0
       cpus_per_trial: &cpus_per_trial 1
       tuner_params: &tuner_params
@@ -18,18 +18,18 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.veriflow.experiments.datasets.MnistSplit
+          __object__: src.experiments.datasets.MnistSplit
           digit: 0
-        epochs: &epochs 20000
+        epochs: &epochs 200000
         patience: &patience 50
         batch_size: &batch_size 
-          __eval__: tune.choice([8, 16, 32, 64])
+          __eval__: tune.choice([32])
         optim_cfg: &optim 
           optimizer:
             __class__: torch.optim.Adam 
           params:
             lr: 
-              __eval__: tune.loguniform(1e-2, 5e-4)
+              __eval__: tune.loguniform(1e-4, 1e-2)
             weight_decay: 0.0
         
         model_cfg: 
@@ -37,9 +37,9 @@ experiments:
             __class__: &model src.veriflow.flows.NiceFlow
           params:
             coupling_layers: &coupling_layers 
-              __eval__: tune.choice([ 2, 3, 4, 5])
+              __eval__: tune.choice([2, 3, 4, 5, 6, 7, 8, 9, 10])
             coupling_nn_layers: &coupling_nn_layers 
-              __eval__: tune.choice([[w]*l for w in [50, 100] for l in range(1, 3)])
+              __eval__: tune.choice([[w]*l for l in [1, 2, 3] for w in [50, 100, 200]])
             nonlinearity: &nonlinearity 
               __eval__: tune.choice([torch.nn.ReLU()])
             split_dim: &split_dim 50
@@ -50,14 +50,33 @@ experiments:
               scale: 
                 __eval__: torch.ones(100)
             permutation: &permutation LU
-    - &exp_normal
-      __overwrites__: *exp_laplace
-      name: mnist_normal
+    - &exp_nice_lu_normal
+      __overwrites__: *exp_nice_lu_laplace
+      name: mnist_nice_lu_normal
       model_cfg: 
+          params:
+            base_distribution: 
+              __object__: pyro.distributions.Normal
+              loc: 
+                __eval__: torch.zeros(100)
+              scale: 
+                __eval__: torch.ones(100)
+    - &exp_nice_rand_laplace
+      __overwrites__: *exp_nice_lu_laplace
+      model_cfg: 
+        name: mnist_nice_rand_laplace
         params:
-          base_distribution:
-            __object__: pyro.distributions.Normal
-            loc: 
-              __eval__: torch.zeros(100)
-            scale: 
-              __eval__: torch.ones(100)
+          permutation: random
+    - &exp_nice_rand_normal
+      __overwrites__: *exp_nice_lu_laplace
+      model_cfg: 
+        name: mnist_nice_rand_normal
+        params:
+          permutation: random
+          base_distribution: 
+              __object__: pyro.distributions.Normal
+              loc: 
+                __eval__: torch.zeros(100)
+              scale: 
+                __eval__: torch.ones(100)
+
diff --git a/experiments/mnist/config_best.yaml b/experiments/mnist/config_best.yaml
index ee91994..a633f09 100644
--- a/experiments/mnist/config_best.yaml
+++ b/experiments/mnist/config_best.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: mnist_basedist_comparison
 experiments:
     - &exp_laplace_best
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_normal_best
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.experiments.datasets.MnistSplit
+          __object__: src.explib.datasets.MnistSplit
           digit: 0
         epochs: &epochs 10000
         patience: &patience 50
diff --git a/experiments/mnist/config_lu.yaml b/experiments/mnist/config_lu.yaml
index 58b178d..c38f3cc 100644
--- a/experiments/mnist/config_lu.yaml
+++ b/experiments/mnist/config_lu.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.baseExperimentCollection
+__object__: src.explib.baseExperimentCollection
 name: mnist_basedist_comparison
 experiments:
     - &exp_laplace
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_laplace
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.experiments.datasets.MnistSplit
+          __object__: src.explib.datasets.MnistSplit
           digit: 0
         epochs: &epochs 20000
         patience: &patience 50
diff --git a/experiments/synthetic/config.yaml b/experiments/synthetic/config.yaml
index 633d963..9b8eb53 100644
--- a/experiments/synthetic/config.yaml
+++ b/experiments/synthetic/config.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: laplace_flows.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: mnist_basedist_comparison
 experiments:
     - &main
-      __object__:  laplace_flows.experiments.hyperopt.HyperoptExperiment
+      __object__:  src.explib.hyperopt.HyperoptExperiment
       name: normal_moons
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: laplace_flows.experiments.datasets.SyntheticSplit
+          __object__: src.explib.datasets.SyntheticSplit
           generator: make_moons
           params_train: &params_train
             n_samples: 100000
diff --git a/scripts/run-expreiment.py b/scripts/run-expreiment.py
index cfe53f1..b3fe034 100644
--- a/scripts/run-expreiment.py
+++ b/scripts/run-expreiment.py
@@ -3,7 +3,7 @@
 
 import click
 
-from src.experiments.config_parser import read_config
+from src.explib.config_parser import read_config
 
 Pathable = T.Union[str, os.PathLike]  # In principle one can cast it to os.path.Path
 
diff --git a/src/experiments/__init__.py b/src/explib/__init__.py
similarity index 100%
rename from src/experiments/__init__.py
rename to src/explib/__init__.py
diff --git a/src/experiments/base.py b/src/explib/base.py
similarity index 100%
rename from src/experiments/base.py
rename to src/explib/base.py
diff --git a/src/experiments/config_parser.py b/src/explib/config_parser.py
similarity index 100%
rename from src/experiments/config_parser.py
rename to src/explib/config_parser.py
diff --git a/src/experiments/datasets.py b/src/explib/datasets.py
similarity index 99%
rename from src/experiments/datasets.py
rename to src/explib/datasets.py
index 6b4407a..c4c9a83 100644
--- a/src/experiments/datasets.py
+++ b/src/explib/datasets.py
@@ -253,7 +253,6 @@ def __getitem__(self, index: int):
         x = self.transform(x)
         return x, 0
 
-
 class DataSplitFromCSV(DataSplit):
     def __init__(self, train: os.PathLike, test: os.PathLike, val: os.PathLike):
         self.train = train
diff --git a/src/experiments/hyperopt.py b/src/explib/hyperopt.py
similarity index 99%
rename from src/experiments/hyperopt.py
rename to src/explib/hyperopt.py
index 3bca779..5f2c344 100644
--- a/src/experiments/hyperopt.py
+++ b/src/explib/hyperopt.py
@@ -14,7 +14,7 @@
 from ray.air import RunConfig, session
 from torch.utils.data import DataLoader
 
-from src.experiments.base import Experiment
+from src.explib.base import Experiment
 from src.veriflow.flows import NiceFlow
 from src.veriflow.networks import AdditiveAffineNN
 from src.veriflow.transforms import ScaleTransform
diff --git a/tests/flows_test.py b/tests/flows_test.py
index 4c14312..c81caf7 100644
--- a/tests/flows_test.py
+++ b/tests/flows_test.py
@@ -1,7 +1,7 @@
 import os
 import typing as T
 
-from src.experiments.config_parser import read_config
+from src.explib.config_parser import read_config
 
 
 def test_mnist():
diff --git a/tests/mnist.yaml b/tests/mnist.yaml
index 0ecfde4..3c99f78 100644
--- a/tests/mnist.yaml
+++ b/tests/mnist.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: mnist_basedist_comparison
 experiments:
     - &exp_nice
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_nice
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.experiments.datasets.MnistSplit
+          __object__: src.explib.datasets.MnistSplit
           digit: 0
         epochs: &epochs 20
         patience: &patience 5

From 4a6a1955b8251ca20cdcf8c45d9d3d324eead047 Mon Sep 17 00:00:00 2001
From: Faried Abu Zaid <fariedaz@gmail.com>
Date: Fri, 29 Sep 2023 21:19:02 +0200
Subject: [PATCH 2/4] add epochs to fit routine

---
 src/veriflow/flows.py | 66 +++++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/veriflow/flows.py b/src/veriflow/flows.py
index a81429d..151d868 100644
--- a/src/veriflow/flows.py
+++ b/src/veriflow/flows.py
@@ -62,7 +62,8 @@ def fit(
         shuffe: bool = True,
         gradient_clip: float = None,
         device: torch.device = None,
-        jitter: float = 1e-4,
+        jitter: float = 1e-6,
+        epochs: int = 1
     ) -> float:
         """
         Wrapper function for the fitting procedure. Allows basic configuration of the optimizer and other
@@ -73,6 +74,8 @@ def fit(
             batch_size: number of samples per optimization step.
             optim: optimizer class.
             optimizer_params: optimizer parameter dictionary.
+            jitter: Determines the amount of jitter that is added if the optimization leaves the feasible region.
+            epochs: number of epochs.
 
         Returns:
             Loss curve (negative log-likelihood).
@@ -93,33 +96,36 @@ def fit(
             optim = optim(model.trainable_layers.parameters())
 
         N = len(data_train)
-        losses = []
-
-        if shuffe:
-            perm = np.random.choice(N, N, replace=False)
-            data_train = data_train[perm]
-
-        for idx in range(0, N, batch_size):
-            idx_end = min(idx + batch_size, N)
-            try:
-                sample = torch.Tensor(data_train[idx:idx_end][0]).to(device)
-            except:
-                continue
-            optim.zero_grad()
-            while not self.is_feasible():
-                self.add_jitter(jitter)
-            loss = -model.transform.log_prob(sample).mean()
-            losses.append(float(loss.detach()))
-            loss.backward()
-            if gradient_clip is not None:
-                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip)
-            optim.step()
-            while not self.is_feasible():
-                self.add_jitter(jitter)
-
-            model.transform.clear_cache()
-
-        return sum(losses) / len(losses)
+        
+        epoch_losses = []
+        for _ in range(epochs):
+            losses = []
+            if shuffe:
+                perm = np.random.choice(N, N, replace=False)
+                data_train = data_train[perm]
+
+            for idx in range(0, N, batch_size):
+                idx_end = min(idx + batch_size, N)
+                try:
+                    sample = torch.Tensor(data_train[idx:idx_end][0]).to(device)
+                except:
+                    continue
+                optim.zero_grad()
+                while not self.is_feasible():
+                    self.add_jitter(jitter)
+                loss = -model.transform.log_prob(sample).mean()
+                losses.append(float(loss.detach()))
+                loss.backward()
+                if gradient_clip is not None:
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip)
+                optim.step()
+                while not self.is_feasible():
+                    self.add_jitter(jitter)
+
+                model.transform.clear_cache()
+            epoch_losses.append(np.mean(losses))
+            
+        return epoch_losses
 
     def to_onnx(self, path: str, export_mode: export_modes = "log_prob") -> None:
         """Saves the model as onnx file
@@ -189,7 +195,7 @@ def __init__(
         split_dim: int,
         scale_every_coupling=False,
         nonlinearity: Optional[torch.nn.Module] = None,
-        permutation: Permutation = "random",
+        permutation: Permutation = "LU",
         *args,
         **kwargs,
     ) -> None:
@@ -199,7 +205,7 @@ def __init__(
             base_distribution: base distribution,
             coupling_layers: number of coupling layers. All coupling layers share the same architecture but not the same weights.
             coupling_nn_layers: number of neurons in the hidden layers of the dense neural network that computes the coupling loc parameter.
-            split_dim: split dimension for the coupling.
+            split_dim: split dimension for the coupling, i.e. input dimension of the conditioner.
             scale_every_coupling: if True, a scale transform is applied after every coupling layer. Otherwise, a single scale transform is applied after all coupling layers.
             nonlinearity: nonlinearity of the coupling network.
             permutation: permutation type. Can be "random" or "half".

From cfd6e1380e64157839fa737f0c321c1db4b4ad68 Mon Sep 17 00:00:00 2001
From: Faried Abu Zaid <fariedaz@gmail.com>
Date: Sat, 30 Sep 2023 00:48:25 +0200
Subject: [PATCH 3/4] Extend mnist experiment

---
 .gitignore                    |  2 ++
 experiments/mnist/config.yaml | 15 ++++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2e6e461..699a3be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 *.csv
 *.onnx
+*.pkl
+*.pt
 #
 .idea
 config_local.json
diff --git a/experiments/mnist/config.yaml b/experiments/mnist/config.yaml
index 177e59d..6fb6d27 100644
--- a/experiments/mnist/config.yaml
+++ b/experiments/mnist/config.yaml
@@ -1,9 +1,9 @@
 ---
-__object__: src.experiments.base.ExperimentCollection
+__object__: src.explib.base.ExperimentCollection
 name: mnist_basedist_comparison
 experiments:
     - &exp_nice_lu_laplace
-      __object__: src.experiments.hyperopt.HyperoptExperiment
+      __object__: src.explib.hyperopt.HyperoptExperiment
       name: mnist_nice_lu_laplace
       scheduler: &scheduler 
         __object__: ray.tune.schedulers.ASHAScheduler
@@ -18,7 +18,7 @@ experiments:
         mode: min
       trial_config:
         dataset: &dataset
-          __object__: src.experiments.datasets.MnistSplit
+          __object__: src.explib.datasets.MnistSplit
           digit: 0
         epochs: &epochs 200000
         patience: &patience 50
@@ -39,10 +39,11 @@ experiments:
             coupling_layers: &coupling_layers 
               __eval__: tune.choice([2, 3, 4, 5, 6, 7, 8, 9, 10])
             coupling_nn_layers: &coupling_nn_layers 
-              __eval__: tune.choice([[w]*l for l in [1, 2, 3] for w in [50, 100, 200]])
+              __eval__: tune.choice([[w]*l for l in [1, 2, 3, 4] for w in [10, 20, 50, 100, 200]])
             nonlinearity: &nonlinearity 
               __eval__: tune.choice([torch.nn.ReLU()])
-            split_dim: &split_dim 50
+            split_dim: 
+              __eval__: tune.choice([i for i in range(1, 51)])
             base_distribution: 
               __object__: pyro.distributions.Laplace
               loc: 
@@ -63,14 +64,14 @@ experiments:
                 __eval__: torch.ones(100)
     - &exp_nice_rand_laplace
       __overwrites__: *exp_nice_lu_laplace
+      name: mnist_nice_rand_laplace
       model_cfg: 
-        name: mnist_nice_rand_laplace
         params:
           permutation: random
     - &exp_nice_rand_normal
       __overwrites__: *exp_nice_lu_laplace
+      name: mnist_nice_rand_normal
       model_cfg: 
-        name: mnist_nice_rand_normal
         params:
           permutation: random
           base_distribution: 

From 70ce4dc2625b483828e8cf825c51a4e0bd4260f7 Mon Sep 17 00:00:00 2001
From: Faried Abu Zaid <fariedaz@gmail.com>
Date: Sat, 30 Sep 2023 00:48:54 +0200
Subject: [PATCH 4/4] improve hyperopt evaluation

---
 src/explib/config_parser.py | 59 +++++++++++++++++++------
 src/explib/hyperopt.py      | 86 ++++++++++++++++++++++++-------------
 2 files changed, 104 insertions(+), 41 deletions(-)

diff --git a/src/explib/config_parser.py b/src/explib/config_parser.py
index 39fede5..35c9d11 100644
--- a/src/explib/config_parser.py
+++ b/src/explib/config_parser.py
@@ -2,6 +2,7 @@
 from importlib import import_module
 from pathlib import Path
 from typing import Any, Dict, List, Union
+from pickle import load
 
 import yaml
 
@@ -113,17 +114,29 @@ def read_config(yaml_path: Union[str, Path]) -> dict:
     additional functionality:
 
     Special keys:
-    __class__: The value of this key is interpreted as the class name of the object.
-    The class is imported and stored in the result dictionary under the key <key>.
+    __class__: The value is interpreted as a class name and the corresponding class is imported.
+    __object__: The value is interpreted as a class, all other keys are interpreted as constructor arguments.
+        The key indicates that this (sub-)dictionary is interpreted as on object specification.
+    __eval__: The value is evaluated. All other keys in the (sub-)dictionary are ignored.
+        The keywords supports the core python languages. Additionally, tune and torch are already imported for convenience.
     Example:
-        entry in yaml: __class__model: laplace_flows.flows.NiceFlow)
-        entry in result: model: __import__("laplace_flows.flows.NiceFlow")
-    __tune__<key>: The value of this key is interpreted as a dictionary that contains the
-    configuration for the hyperparameter optimization using tune sample methods.
-    the directive is evaluated and the result in the result dictionary under the key <key>.
-    Example:
-        entry in yaml: __tune__lr: loguniform(1e-4, 1e-1)
-        entry in result: lr: eval("tune.loguniform(1e-4, 1e-1)")
+        ---
+        entry in yaml: 
+        model: 
+            __class__: src.verfiflow.flows.NiceFlow
+        entry in result: model: <src.verfiflow.flows.NiceFlow>
+        ---
+        entry in yaml: 
+        model: 
+            __object__: src.verfiflow.flows.NiceFlow
+            p1: 1
+            p2: 2
+        entry in result: model: <src.verfiflow.flows.NiceFlow(p1=1, p2=2)>
+        ---
+        entry in yaml: 
+        lr: 
+            __eval__: tune.loguniform(1e-4, 1e-1)
+        entry in result: lr: <tune.loguniform(1e-4, 1e-1)>
 
     :param yaml_path: Path to the yaml file.
     """
@@ -137,7 +150,7 @@ def read_config(yaml_path: Union[str, Path]) -> dict:
     return config
 
 
-def parse_raw_config(d: dict):
+def parse_raw_config(d: dict) -> Any:
     """Parses an unfolded raw config dictionary and returns the corresponding dictionary.
     Parsing includes the following steps:
     - Overwrites are applied (see apply_overwrite)
@@ -145,7 +158,10 @@ def parse_raw_config(d: dict):
     - The "__eval__" key is evaluated.
     - The "__class__" key is interpreted as a class name and the corresponding class is imported.
 
-    :param d: The raw config dictionary.
+    Args:
+        d: The raw config dictionary.
+    Returns:
+        The result after all semantics have been applied.
     """
     if isinstance(d, dict):
         d = apply_overwrite(d, recurse=False)
@@ -172,3 +188,22 @@ def parse_raw_config(d: dict):
         return result
     else:
         return d
+
+def from_checkpoint(params: str, state_dict: str) -> Any:
+    """Loads a model from a checkpoint.
+
+    Args:
+        params: Path to the file containing the model specification.
+        state_dict: Path to the file containing the state dict.
+    Returns:
+        The loaded model.
+    """
+    spec = load(open(params, "rb"))["model_cfg"]
+    model = spec["type"](**spec["params"])
+    
+    state_dict = torch.load(state_dict)
+    model.load_state_dict(state_dict)
+    
+    return model
+    
+    
\ No newline at end of file
diff --git a/src/explib/hyperopt.py b/src/explib/hyperopt.py
index 5f2c344..98d49d0 100644
--- a/src/explib/hyperopt.py
+++ b/src/explib/hyperopt.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import shutil
 import typing as T
 from datetime import datetime
 from typing import Any, Dict, Iterable, Literal
@@ -15,6 +16,7 @@
 from torch.utils.data import DataLoader
 
 from src.explib.base import Experiment
+from src.explib.config_parser import from_checkpoint
 from src.veriflow.flows import NiceFlow
 from src.veriflow.networks import AdditiveAffineNN
 from src.veriflow.transforms import ScaleTransform
@@ -67,11 +69,14 @@ def __init__(
         self.tuner_params = tuner_params
 
     @classmethod
-    def _trial(cls, config: T.Dict[str, T.Any], device: torch.device = "cpu"):
+    def _trial(cls, config: T.Dict[str, T.Any], device: torch.device = "cpu") -> Dict[str, float]:
         """Worker function for hyperparameter optimization.
-
-        Raises:
-            ValueError: _description_
+        
+        Args:
+            config (T.Dict[str, T.Any]): configuration
+            device (torch.device, optional): device. Defaults to "cpu".
+        Returns:
+            Dict[str, float]: trial performance metrics
         """
         if device is None:
             if torch.backends.mps.is_available():
@@ -99,7 +104,7 @@ def _trial(cls, config: T.Dict[str, T.Any], device: torch.device = "cpu"):
                 config["optim_cfg"]["params"],
                 batch_size=config["batch_size"],
                 device=device,
-            )
+            )[-1]
 
             val_loss = 0
             for i in range(0, len(data_val), config["batch_size"]):
@@ -108,27 +113,19 @@ def _trial(cls, config: T.Dict[str, T.Any], device: torch.device = "cpu"):
             val_loss /= len(data_val)
 
             session.report(
-                {"test_loss": "?", "train_loss": train_loss, "val_loss": val_loss},
+                {"train_loss": train_loss, "val_loss": val_loss},
                 checkpoint=None,
             )
             if val_loss < best_loss:
                 strikes = 0
                 best_loss = val_loss
                 torch.save(flow.state_dict(), "./checkpoint.pt")
-                test_loss = 0
-                for i in range(0, len(data_test), config["batch_size"]):
-                    j = min([len(data_test), i + config["batch_size"]])
-                    test_loss += float(
-                        -flow.log_prob(data_test[i:j][0].to(device)).sum()
-                    )
-                test_loss /= len(data_test)
             else:
                 strikes += 1
                 if strikes >= config["patience"]:
                     break
 
         return {
-            "test_loss_best": test_loss,
             "val_loss_best": best_loss,
             "val_loss": val_loss,
         }
@@ -177,23 +174,53 @@ def conduct(self, report_dir: os.PathLike, storage_path: os.PathLike = None):
         report_file = os.path.join(
             report_dir, f"report_{self.name}_" + exptime + ".csv"
         )
-        self._build_report(exppath, report_file=report_file)
-        # best_result = results.get_best_result("val_loss", "min")
+        results = self._build_report(exppath, report_file=report_file, config_prefix="param_")
+        best_result = results.iloc[results["val_loss_best"].argmax()]
 
-        # print("Best trial config: {}".format(best_result.config))
-        # print("Best trial final validation loss: {}".format(
-        #    best_result.metrics["val_loss"]))
-
-        # test_best_model(best_result)
-
-    def _build_report(self, expdir: str, report_file: str, config_prefix: str = ""):
+        self._test_best_model(best_result, exppath, report_dir)
+    
+    def _test_best_model(self, best_result: pd.Series, expdir: str, report_dir: str, device: torch.device = "cpu") -> pd.Series:
+        trial_id = best_result.trial_id
+        for d in os.listdir(expdir):
+            if trial_id in d:
+                shutil.copyfile(
+                    os.path.join(expdir, d, "checkpoint.pt"), 
+                    os.path.join(report_dir, f"{self.name}_best_model.pt")
+                )
+                shutil.copyfile(
+                    os.path.join(expdir, d, "params.pkl"), 
+                    os.path.join(report_dir, f"{self.name}_best_config.pkl")
+                )
+                break
+        
+        best_model = from_checkpoint(
+            os.path.join(report_dir, f"{self.name}_best_config.pkl"),
+            os.path.join(report_dir, f"{self.name}_best_model.pt")
+        )
+        
+        data_test = self.trial_config["dataset"].get_test()
+        test_loss = 0
+        for i in range(0, len(data_test), 100):
+            j = min([len(data_test), i + 100])
+            test_loss += float(
+                -best_model.log_prob(data_test[i:j][0].to(device)).sum()
+            )
+        test_loss /= len(data_test)
+        
+        best_result["test_loss"] = test_loss
+        best_result.to_csv(
+            os.path.join(report_dir, f"{self.name}_best_result.csv")
+        )
+        
+        return best_result
+        
+    def _build_report(self, expdir: str, report_file: str, config_prefix: str = "") -> pd.DataFrame:
         """Builds a report of the hyperopt experiment.
 
-        :param expdir: The expdir parameter is the path to the experiment directory (ray results folder).
-        :type expdir: str
-        :param report_file: The report_file parameter is the path to the report file.
-        :type report_file: str
-        :param config_prefix: The config_prefix parameter is the prefix for the config items.
+        Args:
+            expdir (str): The expdir parameter is the path to the experiment directory (ray results folder).
+            report_file (str): The report_file parameter is the path to the report file.
+            config_prefix: The config_prefix parameter is the prefix for the config items.
         """
         report = None
         print(os.listdir(expdir))
@@ -201,7 +228,7 @@ def _build_report(self, expdir: str, report_file: str, config_prefix: str = ""):
             if os.path.isdir(expdir + "/" + d):
                 try:
                     with open(expdir + "/" + d + "/result.json", "r") as f:
-                        result = json.loads('{"test_' + f.read().split('{"test_')[-1])
+                        result = json.loads('{"val_loss_best' + f.read().split('{"val_loss_best')[-1])
                 except:
                     print(f"error at {expdir + '/' + d}")
                     continue
@@ -224,3 +251,4 @@ def _build_report(self, expdir: str, report_file: str, config_prefix: str = ""):
 
         os.makedirs(os.path.dirname(report_file), exist_ok=True)
         report.to_csv(report_file, index=False)
+        return report