Skip to content

Commit b528931

Browse files
committed
chore(resolver): compress cache files with zstd
Tests on some systems show that the cache can get quite big (50MB or more) if packages which have multiple references on the snapshot mirrors are processed (e.g. firefox-esr). To reduce the impact, we compress the cache with zstd. This library is anyways almonst on all systems and will be part of the Python standard library from 3.14 on. Signed-off-by: Felix Moessbauer <[email protected]>
1 parent 7d84aa9 commit b528931

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dev = [
4444
]
4545
download = [
4646
"requests>=2.25.1",
47+
"zstandard>=0.20",
4748
]
4849
# only distributed in Debian (not pip).
4950
# only needed to speedup apt parsing

src/debsbom/download/resolver.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
from collections.abc import Iterable
77
import dataclasses
88
import hashlib
9+
import io
910
import json
1011
import logging
1112
from pathlib import Path
1213
from packageurl import PackageURL
14+
from zstandard import ZstdCompressor, ZstdDecompressor
1315

1416
from ..dpkg import package
1517
from ..snapshot import client as sdlclient
@@ -44,6 +46,8 @@ class PersistentResolverCache(PackageResolverCache):
4446

4547
def __init__(self, cachedir: Path):
4648
self.cachedir = cachedir
49+
self.cctx = ZstdCompressor(level=10)
50+
self.dctx = ZstdDecompressor()
4751
cachedir.mkdir(exist_ok=True)
4852

4953
@staticmethod
@@ -53,15 +57,19 @@ def _package_hash(p: package.SourcePackage | package.BinaryPackage) -> str:
5357
).hexdigest()
5458

5559
def _entry_path(self, hash: str) -> Path:
56-
return self.cachedir / f"{hash}.json"
60+
return self.cachedir / f"{hash}.json.zst"
5761

5862
def lookup(self, p: package.SourcePackage | package.BinaryPackage) -> list["RemoteFile"] | None:
5963
hash = self._package_hash(p)
6064
entry = self._entry_path(hash)
6165
if not entry.is_file():
6266
logger.debug(f"Package '{p.name}' is not cached")
6367
return None
64-
with open(entry, "r") as f:
68+
with (
69+
open(entry, "rb") as _f,
70+
self.dctx.stream_reader(_f) as cf,
71+
io.TextIOWrapper(cf, encoding="utf-8") as f,
72+
):
6573
try:
6674
data = json.load(f)
6775
except json.decoder.JSONDecodeError:
@@ -75,7 +83,11 @@ def insert(
7583
) -> None:
7684
hash = self._package_hash(p)
7785
entry = self._entry_path(hash)
78-
with open(entry.with_suffix(".tmp"), "w") as f:
86+
with (
87+
open(entry.with_suffix(".tmp"), "wb") as _f,
88+
self.cctx.stream_writer(_f) as cf,
89+
io.TextIOWrapper(cf, encoding="utf-8") as f,
90+
):
7991
json.dump([dataclasses.asdict(rf) for rf in files], f)
8092
entry.with_suffix(".tmp").rename(entry)
8193

0 commit comments

Comments
 (0)