From d09cd3cc7920c00e6cc9756d9be3a9b8900a0262 Mon Sep 17 00:00:00 2001 From: Nicholas Junge Date: Thu, 21 Mar 2024 16:12:44 +0100 Subject: [PATCH] Add transform submodule, parameter compression transform (#124) This is the better way of compressing parameters compared to directly in the benchmark runner, which steals responsibility of the transform that we just introduced. Refactors `nnbench.io.transform->nnbench.transforms`, the latter being its own submodule. This is useful to have when adding new builtin transforms, so that they do not have to go into a single file. --- docs/guides/transforms.md | 2 +- examples/transforms/transforms.py | 2 +- src/nnbench/transforms/__init__.py | 1 + .../{io/transforms.py => transforms/base.py} | 0 src/nnbench/transforms/params.py | 48 +++++++++++++++++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 src/nnbench/transforms/__init__.py rename src/nnbench/{io/transforms.py => transforms/base.py} (100%) create mode 100644 src/nnbench/transforms/params.py diff --git a/docs/guides/transforms.md b/docs/guides/transforms.md index 200b084..381c9ef 100644 --- a/docs/guides/transforms.md +++ b/docs/guides/transforms.md @@ -4,7 +4,7 @@ After a successful benchmark run execution, you end up with your metrics, contex In general, this data is a best-effort representation of the environment and configuration the benchmarks are run in. However, in some situations, manual editing and transformation of these records is required. -nnbench exposes the `nnbench.io.transforms` module to facilitate these transforms. +nnbench exposes the `nnbench.transforms` module to facilitate these transforms. ## Types of transforms: 1->1 vs. N->1 vs. N->N diff --git a/examples/transforms/transforms.py b/examples/transforms/transforms.py index be4a711..c03f62c 100644 --- a/examples/transforms/transforms.py +++ b/examples/transforms/transforms.py @@ -4,8 +4,8 @@ import numpy as np import nnbench -from nnbench.io.transforms import OneToOneTransform from nnbench.reporter.file import FileIO +from nnbench.transforms import OneToOneTransform from nnbench.types import BenchmarkRecord diff --git a/src/nnbench/transforms/__init__.py b/src/nnbench/transforms/__init__.py new file mode 100644 index 0000000..45d380f --- /dev/null +++ b/src/nnbench/transforms/__init__.py @@ -0,0 +1 @@ +from .base import ManyToManyTransform, ManyToOneTransform, OneToOneTransform diff --git a/src/nnbench/io/transforms.py b/src/nnbench/transforms/base.py similarity index 100% rename from src/nnbench/io/transforms.py rename to src/nnbench/transforms/base.py diff --git a/src/nnbench/transforms/params.py b/src/nnbench/transforms/params.py new file mode 100644 index 0000000..6c8c70c --- /dev/null +++ b/src/nnbench/transforms/params.py @@ -0,0 +1,48 @@ +from typing import Any, Sequence + +from nnbench.transforms import ManyToManyTransform, OneToOneTransform +from nnbench.types import BenchmarkRecord + + +class CompressionMixin: + def compress(self, params: dict[str, Any]) -> dict[str, Any]: + containers = (tuple, list, set, frozenset) + natives = (float, int, str, bool, bytes, complex) + compressed: dict[str, Any] = {} + + def _compress_impl(val): + if isinstance(val, natives): + # save native types without modification... + return val + else: + # ... or return the string repr. + # TODO: Allow custom representations for types with formatters. + return repr(val) + + for k, v in params.items(): + if isinstance(v, containers): + container_type = type(v) + compressed[k] = container_type(_compress_impl(vv) for vv in v) + elif isinstance(v, dict): + compressed[k] = self.compress(v) + else: + compressed[k] = _compress_impl(v) + + return compressed + + +class ParameterCompression1to1(OneToOneTransform, CompressionMixin): + def apply(self, record: BenchmarkRecord) -> BenchmarkRecord: + for bm in record.benchmarks: + bm["params"] = self.compress(bm["params"]) + + return record + + +class ParameterCompressionNtoN(ManyToManyTransform, CompressionMixin): + def apply(self, record: Sequence[BenchmarkRecord]) -> Sequence[BenchmarkRecord]: + for rec in record: + for bm in rec.benchmarks: + bm["params"] = self.compress(bm["params"]) + + return record