Skip to content

Commit 3f61201

Browse files
committed
feat: compare SBOMs and show components only present in target SBOM
Signed-off-by: badrikesh prusty <[email protected]>
1 parent 0337d3a commit 3f61201

File tree

2 files changed

+243
-0
lines changed

2 files changed

+243
-0
lines changed

src/debsbom/cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .commands.source_merge import SourceMergeCmd
1919
from .commands.repack import RepackCmd
2020
from .commands.export import ExportCmd
21+
from .commands.compare import CompareCmd
2122

2223
# Attempt to import optional download dependencies to check their availability.
2324
# The success or failure of these imports determines if download features are enabled.
@@ -64,6 +65,7 @@ def setup_parser():
6465
)
6566
RepackCmd.setup_parser(subparser.add_parser("repack", help="repack sources and sbom"))
6667
ExportCmd.setup_parser(subparser.add_parser("export", help="export SBOM as graph"))
68+
CompareCmd.setup_parser(subparser.add_parser("compare", help="compare SBOMs and list new components"))
6769

6870
return parser
6971

@@ -97,6 +99,8 @@ def main():
9799
ExportCmd.run(args)
98100
elif args.cmd == "merge":
99101
MergeCmd.run(args)
102+
elif args.cmd == "compare":
103+
CompareCmd.run(args)
100104
except DistroArchUnknownError as e:
101105
logger.error(f"debsbom: error: {e}. Set --distro-arch to dpkg architecture (e.g. amd64)")
102106
sys.exit(-2)

src/debsbom/commands/compare.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# Copyright (C) 2025 Siemens
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import json
6+
import os
7+
import shutil
8+
import sys
9+
import tempfile
10+
import urllib.request
11+
import urllib.error
12+
import uuid
13+
from .input import SbomInput
14+
from datetime import datetime
15+
16+
import logging
17+
import sys
18+
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
class CompareCmd(SbomInput):
24+
"""
25+
Compare two SBOMs and generate a new SBOM containing only the additional components found in the target
26+
"""
27+
28+
@classmethod
29+
def run(cls, args):
30+
with tempfile.TemporaryDirectory() as tmp_dir:
31+
base_sbom_tmp = os.path.join(tmp_dir, "base_sbom_tmp.json")
32+
target_sbom_tmp = os.path.join(tmp_dir, "target_sbom_tmp.json")
33+
34+
cls.get_sbom_info(args.base_sbom, base_sbom_tmp)
35+
cls.get_sbom_info(args.target_sbom, target_sbom_tmp)
36+
37+
with open(base_sbom_tmp) as f:
38+
base_sbom_data = json.load(f)
39+
with open(target_sbom_tmp) as f:
40+
target_sbom_data = json.load(f)
41+
42+
base_sbom_fmt = cls.detect_sbom_format(base_sbom_data)
43+
target_sbom_fmt = cls.detect_sbom_format(target_sbom_data)
44+
45+
if not base_sbom_fmt or not target_sbom_fmt:
46+
raise ValueError("can not detect SBOM format for one or both files")
47+
48+
if base_sbom_fmt != target_sbom_fmt:
49+
raise ValueError("can not compare mixed SPDX and CycloneDX documents")
50+
51+
if target_sbom_fmt == "spdx":
52+
base_sbom_comp, base_sbom = cls.load_spdx_sbom(base_sbom_tmp)
53+
target_sbom_comp, target_sbom = cls.load_spdx_sbom(target_sbom_tmp)
54+
extra_pkgs = cls.compare_items(base_sbom_comp, target_sbom_comp, "pkg")
55+
56+
ref_creation_info = base_sbom.get("creationInfo", {
57+
"creators": ["Tool: sbom-diff-generator 1.0"],
58+
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
59+
})
60+
result = cls.build_extra_spdx(extra_pkgs, ref_creation_info)
61+
62+
63+
elif target_sbom_fmt == "cdx":
64+
base_sbom_comp, base_sbom = cls.load_cdx_sbom(base_sbom_tmp)
65+
target_sbom_comp, target_sbom = cls.load_cdx_sbom(target_sbom_tmp)
66+
67+
extra_components = cls.compare_items(base_sbom_comp, target_sbom_comp, "component")
68+
result = cls.build_extra_cdx(extra_components, target_sbom)
69+
70+
71+
else:
72+
raise ValueError(f"Unsupported SBOM format: {new_fmt}")
73+
74+
out_dir = os.path.dirname(args.out_file)
75+
if out_dir:
76+
os.makedirs(out_dir, exist_ok=True)
77+
78+
with open(args.out_file, 'w', encoding='utf-8') as f:
79+
json.dump(result, f, indent=4)
80+
81+
82+
@classmethod
83+
def get_sbom_info(cls, sbom, sbom_tmp):
84+
"""
85+
Retrieve SBOM file content (from local path or URL) and store it in a temporary file.
86+
87+
Args:
88+
sbom (str): Path or URL to the SBOM file.
89+
sbom_tmp (str): Temporary destination path to copy/download the SBOM.
90+
"""
91+
if sbom.startswith(('http://', 'https://', 'ftp://')):
92+
try:
93+
with urllib.request.urlopen(sbom) as response, open(sbom_tmp, 'wb') as out_file:
94+
shutil.copyfileobj(response, out_file)
95+
except (urllib.error.URLError, urllib.error.HTTPError) as e:
96+
error(f"Unable to fetch: {sbom}{e}")
97+
else:
98+
if not os.path.isfile(sbom):
99+
error(f"File not found: {sbom}")
100+
try:
101+
shutil.copy(sbom, sbom_tmp)
102+
except Exception as e:
103+
error(f"Error copying file: {e}")
104+
105+
106+
@classmethod
107+
def detect_sbom_format(cls, data):
108+
"""
109+
Detect SBOM format based on known top-level keys.
110+
Returns 'spdx' or 'cdx' or None.
111+
"""
112+
if "spdxVersion" in data:
113+
return "spdx"
114+
if data.get("bomFormat", "").lower() == "cyclonedx":
115+
return "cdx"
116+
return None
117+
118+
119+
@classmethod
120+
def load_spdx_sbom(cls, path):
121+
"""Return packages keyed by purl or fallback name@version."""
122+
with open(path) as f:
123+
data = json.load(f)
124+
125+
packages = {}
126+
for pkg in data.get("packages", []):
127+
purl = next(
128+
(ref["referenceLocator"]
129+
for ref in pkg.get("externalRefs", [])
130+
if ref.get("referenceType") == "purl"),
131+
None
132+
)
133+
if not purl:
134+
version = pkg.get("versionInfo", "")
135+
purl = f"{pkg.get('name')}@{version}"
136+
137+
sha256 = next(
138+
(c["checksumValue"]
139+
for c in pkg.get("checksums", [])
140+
if c.get("algorithm", "").upper() == "SHA256"),
141+
None
142+
)
143+
144+
packages[purl] = {"pkg": pkg, "sha256": sha256}
145+
146+
return packages, data
147+
148+
149+
@classmethod
150+
def build_extra_spdx(cls, extra_pkgs, ref_creation_info):
151+
"""Build minimal SPDX 2.3 JSON document."""
152+
return {
153+
"spdxVersion": "SPDX-2.3",
154+
"SPDXID": "SPDXRef-DOCUMENT",
155+
"name": "Extra Components SBOM",
156+
"dataLicense": "CC0-1.0",
157+
"documentNamespace": f"https://example.org/spdx/extra-{uuid.uuid4()}",
158+
"creationInfo": ref_creation_info,
159+
"packages": extra_pkgs
160+
}
161+
162+
163+
@classmethod
164+
def load_cdx_sbom(cls, path):
165+
"""Return components keyed by purl or fallback name@version."""
166+
with open(path) as f:
167+
data = json.load(f)
168+
169+
components = {}
170+
for comp in data.get("components", []):
171+
purl = comp.get("purl") or f"{comp.get('name')}@{comp.get('version', '')}"
172+
sha256 = next(
173+
(h["content"]
174+
for h in comp.get("hashes", [])
175+
if h.get("alg", "").upper() == "SHA-256"),
176+
None
177+
)
178+
components[purl] = {"component": comp, "sha256": sha256}
179+
180+
return components, data
181+
182+
183+
@classmethod
184+
def build_extra_cdx(cls, extra_components, new_metadata=None):
185+
"""Build minimal CycloneDX 1.5 JSON SBOM."""
186+
sbom = {
187+
"bomFormat": "CycloneDX",
188+
"specVersion": "1.5",
189+
"version": 1,
190+
"serialNumber": f"urn:uuid:{uuid.uuid4()}",
191+
"components": extra_components
192+
}
193+
if new_metadata and "metadata" in new_metadata:
194+
sbom["metadata"] = new_metadata["metadata"]
195+
else:
196+
sbom["metadata"] = {
197+
"timestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
198+
"tools": [{"name": "sbom-diff-generator", "version": "1.0"}]
199+
}
200+
return sbom
201+
202+
203+
@classmethod
204+
def compare_items(cls, base_sbom_comp, target_sbom_comp, key_name):
205+
"""Generic comparison for SPDX or CDX items keyed by purl."""
206+
extra = []
207+
for key, new_info in target_sbom_comp.items():
208+
base_info = base_sbom_comp.get(key)
209+
new_sha = (new_info["sha256"] or "").lower().strip()
210+
ref_sha = ((base_info or {}).get("sha256") or "").lower().strip()
211+
212+
if base_info is None or (ref_sha and new_sha and ref_sha != new_sha):
213+
extra.append(new_info[key_name])
214+
return extra
215+
216+
217+
@classmethod
218+
def setup_parser(cls, parser):
219+
cls.parser_add_sbom_input_args(parser)
220+
parser.add_argument(
221+
"-b",
222+
"--base-sbom",
223+
required=True,
224+
help="Path or URL to the base (reference) SBOM file"
225+
)
226+
227+
parser.add_argument(
228+
"-n",
229+
"--target-sbom",
230+
required=True,
231+
help="Path or URL to the target (new) SBOM file"
232+
)
233+
234+
parser.add_argument(
235+
"-o",
236+
"--out-file",
237+
default="uncleared_components.json",
238+
help="Path to the output JSON file (default: uncleared_components.json)"
239+
)

0 commit comments

Comments
 (0)