diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 028db20..167d5a4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -109,3 +109,4 @@ repos:
         files: src
         additional_dependencies:
           - types-pkg-resources==0.1.3
+          - types-PyYAML==6.0.12.9
diff --git a/src/utils/loggers.py b/src/utils/loggers.py
new file mode 100644
index 0000000..02d8d12
--- /dev/null
+++ b/src/utils/loggers.py
@@ -0,0 +1,183 @@
+import logging
+import math
+import os
+from importlib.util import find_spec
+from typing import List, Optional, Tuple
+
+import pandas as pd
+import yaml
+
+logger = logging.getLogger(__name__)
+
+
+def index_to_multiindex(
+    index: pd.Index, sep: str, max_levels: int, replace: Optional[Tuple[str, str]] = None
+) -> pd.MultiIndex:
+    if replace is None:
+        # dummy replace
+        replace = ("x", "x")
+    multi_index_str = [
+        c.replace(replace[0], replace[1]).split(sep, maxsplit=max_levels - 1) for c in index
+    ]
+    # same length
+    multi_index_str = [levels + [None] * (max_levels - len(levels)) for levels in multi_index_str]
+    names = index.name.split(sep) if index.name is not None else None
+    return pd.MultiIndex.from_tuples(tuples=multi_index_str, names=names)
+
+
+def multiindex_to_index(multiindex: pd.MultiIndex, sep: str) -> pd.Index:
+    index_str = [
+        sep.join([v for v in values if not (isinstance(v, float) and math.isnan(v))])
+        for values in multiindex
+    ]
+    name = sep.join([name for name in multiindex.names if name is not None]) or None
+    return pd.Index(index_str, name=name)
+
+
+def load_csv_run(
+    path: str, metric_prefix_whitelist: Optional[List[str]] = None
+) -> Optional[pd.DataFrame]:
+    # get metrics
+    metric_df = pd.read_csv(os.path.join(path, "metrics.csv"))
+    if metric_prefix_whitelist is not None:
+        if not isinstance(metric_prefix_whitelist, (list, tuple)):
+            metric_prefix_whitelist = [metric_prefix_whitelist]
+        cols = [
+            col
+            for col in metric_df.columns
+            if any(col.startswith(prefix) for prefix in metric_prefix_whitelist)
+        ]
+        metric_df = metric_df[cols]
+        # drop empty rows (e.g. train metrics when we filtered with "test/")
+        metric_df = metric_df.dropna()
+    if len(metric_df.columns) == 0:
+        logger.warning(f"no metric data available after filtering. path={path}")
+        return None
+    metric_df.columns = index_to_multiindex(
+        metric_df.columns, replace=("-", "/"), sep="/", max_levels=3
+    )
+
+    # get hyperparameters
+    with open(os.path.join(path, "hparams.yaml")) as f:
+        hparams = yaml.safe_load(f)
+    hparams_df = pd.json_normalize(hparams, sep="/")
+    hparams_df.columns = index_to_multiindex(hparams_df.columns, sep="/", max_levels=3)
+
+    # combine
+    # repeat to create a row for each row in metrics
+    hparams_repeated = pd.concat([hparams_df] * len(metric_df), axis="index", ignore_index=True)
+    # set index to join correctly (we can not use ignore_index=True because we want to keep the column labels)
+    hparams_repeated.index = metric_df.index
+    combined = pd.concat([metric_df, hparams_repeated], axis=1, keys=["metrics", "hparams"])
+    combined.index.name = "entry"
+
+    return combined
+
+
+def load_csv_experiment(
+    path: str, reduce_index_levels: bool = False, **kwargs
+) -> Optional[pd.DataFrame]:
+    if not os.path.isdir(path):
+        raise ValueError(f"experiment path={path} does not point to a directory")
+
+    subdirs = os.listdir(path)
+    data_dict_with_empty_entries = {
+        subdir: load_csv_run(path=os.path.join(path, subdir), **kwargs)
+        for subdir in subdirs
+        if os.path.isdir(os.path.join(path, subdir))
+    }
+    data_dict = {k: v for k, v in data_dict_with_empty_entries.items() if v is not None}
+    if len(data_dict) == 0:
+        logger.warning(f"no experiment data found in path={path}")
+        return None
+    run_index_names = list(data_dict.values())[0].index.names
+    combined = pd.concat(
+        data_dict.values(), keys=data_dict.keys(), names=["run"] + run_index_names
+    )
+    if reduce_index_levels:
+        if len(data_dict) != len(combined):
+            with_multiple_entries = [
+                run_id for run_id, run_data in data_dict.items() if len(run_data) > 1
+            ]
+            raise Exception(
+                f"can not reduce index levels, because there are multiple entries for a some runs: "
+                f"{with_multiple_entries}"
+            )
+        combined.index = combined.index.droplevel(run_index_names)
+    return combined
+
+
+def load_csv_data(path: str, **kwargs) -> Optional[pd.DataFrame]:
+    if not os.path.isdir(path):
+        raise ValueError(f"path={path} does not point to a directory")
+
+    subdirs = os.listdir(path)
+    data_dict_with_empty_entries = {
+        subdir: load_csv_experiment(path=os.path.join(path, subdir), **kwargs)
+        for subdir in subdirs
+        if os.path.isdir(os.path.join(path, subdir))
+    }
+    data_dict = {k: v for k, v in data_dict_with_empty_entries.items() if v is not None}
+    if len(data_dict) == 0:
+        logger.warning(f"no csv data found in path={path}")
+        return None
+    run_index_names = list(data_dict.values())[0].index.names
+    combined = pd.concat(
+        data_dict.values(), keys=data_dict.keys(), names=["experiment"] + run_index_names
+    )
+    return combined
+
+
+if __name__ == "__main__":
+    # result = load_csv_run(
+    #    path="logs/logger/csv/my-experiment/version_0",
+    #    #metric_prefix_whitelist=["test/"]
+    # )
+
+    # result = load_csv_experiment(
+    #    path="logs/logger/csv/my-experiment",
+    #    #metric_prefix_whitelist=["test/"],
+    # )
+
+    result = load_csv_data(
+        path="logs/logger/csv",
+        metric_prefix_whitelist="test/",
+        reduce_index_levels=True,
+    )
+
+    # Show bar plot for F1 values of a certain experiment (folder in the csv output directory),
+    # here "my-experiment".
+    #
+    # Note: the following may be required if running via PyCharm:
+    # import matplotlib as mpl
+    # mpl.use('TkAgg')
+
+    if result is None:
+        raise ValueError("result does not contain any entries")
+
+    experiment_name = "my-experiment"
+
+    # select the subset of relevant data
+    data_selected = result.xs(
+        key=experiment_name,
+        level="experiment",
+    )
+    # bring the data into the required format for bar plotting
+    data_plot = data_selected[("metrics", "test", "f1")].T
+    # replace the columns with some hyperparameters for better readability, here dataset.select_n.stop
+    data_plot.columns = data_selected[("hparams", "dataset", "select_n", "stop")]
+    # rename the column index name, this will be used as caption for the legend
+    data_plot.columns.name = "number of training documents"
+    # plot
+    # use plotly, if available
+    if find_spec("plotly"):
+        pd.options.plotting.backend = "plotly"
+        fig = data_plot.plot.bar(title=experiment_name.replace("-", "\n"), barmode="group")
+        fig.show()
+    else:
+        import matplotlib.pyplot as plt
+
+        fig = data_plot.plot.bar(title=experiment_name.replace("-", "\n"))
+        plt.show()
+
+    print("done")