Skip to content

Commit 62cc99a

Browse files
authored
Merge pull request #63 from TNG/profiling
Replace Path objects with str
2 parents fc6b2f7 + a8c703f commit 62cc99a

File tree

17 files changed

+361
-306
lines changed

17 files changed

+361
-306
lines changed

.github/workflows/sbom.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on:
88
inputs:
99
test_archive:
1010
description: Name of the test archive to download from https://fileshare.tngtech.com/d/e69946da808b41f88047/
11-
default: linux.v6.17.tinyconfig.tar.gz
11+
default: linux.v6.17.tinyconfig.x86.tar.gz
1212
output_tree:
1313
description: Path to the output tree relative to the src tree
1414
default: kernel_build

sbom/lib/sbom/cmd/cmd_file_parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,24 @@
22
# SPDX-FileCopyrightText: 2025 TNG Technology Consulting GmbH
33

44
import re
5-
from pathlib import Path
65
from dataclasses import dataclass, field
76
import sbom.errors as sbom_errors
7+
from sbom.path_utils import PathStr
88

99
SAVEDCMD_PATTERN = re.compile(r"^(saved)?cmd_.*?:=\s*(?P<full_command>.+)$")
1010
SOURCE_PATTERN = re.compile(r"^source.*?:=\s*(?P<source_file>.+)$")
1111

1212

1313
@dataclass
1414
class CmdFile:
15-
cmd_file_path: Path
15+
cmd_file_path: PathStr
1616
savedcmd: str
17-
source: Path | None = None
17+
source: PathStr | None = None
1818
deps: list[str] = field(default_factory=list[str])
1919
make_rules: list[str] = field(default_factory=list[str])
2020

2121

22-
def parse_cmd_file(cmd_file_path: Path) -> CmdFile | None:
22+
def parse_cmd_file(cmd_file_path: PathStr) -> CmdFile | None:
2323
"""
2424
Parses a .cmd file.
2525
.cmd files can have the following structures:
@@ -70,7 +70,7 @@ def parse_cmd_file(cmd_file_path: Path) -> CmdFile | None:
7070
if line1 is None:
7171
sbom_errors.log(f"Skip parsing '{cmd_file_path}' because no 'source_' entry was found.")
7272
return CmdFile(cmd_file_path, savedcmd)
73-
source = Path(line1.group("source_file"))
73+
source = line1.group("source_file")
7474

7575
# deps
7676
deps: list[str] = []

sbom/lib/sbom/cmd/cmd_graph.py

Lines changed: 50 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import logging
66
import os
7-
from pathlib import Path
87
from dataclasses import dataclass, field
98
import pickle
109
from typing import Iterator
@@ -14,12 +13,13 @@
1413
from sbom.cmd.incbin_parser import parse_incbin
1514
from sbom.cmd.cmd_file_parser import CmdFile, parse_cmd_file
1615
import sbom.errors as sbom_errors
16+
from sbom.path_utils import PathStr, is_relative_to
1717
from .hardcoded_dependencies import get_hardcoded_dependencies
1818

1919

2020
@dataclass
2121
class CmdGraphNode:
22-
absolute_path: Path
22+
absolute_path: PathStr
2323
cmd_file: CmdFile | None = None
2424
children: list["CmdGraphNode"] = field(default_factory=list["CmdGraphNode"])
2525

@@ -29,20 +29,20 @@ class CmdGraph:
2929
roots: list[CmdGraphNode] = field(default_factory=list[CmdGraphNode])
3030

3131

32-
def build_cmd_graph(root_paths: list[Path], output_tree: Path, src_tree: Path, log_depth: int = 0) -> CmdGraph:
33-
node_cache: dict[Path, CmdGraphNode] = {}
32+
def build_cmd_graph(root_paths: list[PathStr], output_tree: PathStr, src_tree: PathStr, log_depth: int = 0) -> CmdGraph:
33+
node_cache: dict[PathStr, CmdGraphNode] = {}
3434
root_nodes = [
35-
build_cmd_graph_node(root_path, output_tree, src_tree, node_cache, log_depth=log_depth)
35+
build_cmd_graph_node(str(root_path), str(output_tree), str(src_tree), node_cache, log_depth=log_depth)
3636
for root_path in root_paths
3737
]
3838
return CmdGraph(root_nodes)
3939

4040

4141
def build_cmd_graph_node(
42-
root_path: Path,
43-
output_tree: Path,
44-
src_tree: Path,
45-
cache: dict[Path, CmdGraphNode] | None = None,
42+
root_path: PathStr,
43+
output_tree: PathStr,
44+
src_tree: PathStr,
45+
cache: dict[PathStr, CmdGraphNode] | None = None,
4646
depth: int = 0,
4747
log_depth: int = 0,
4848
) -> CmdGraphNode:
@@ -64,7 +64,7 @@ def build_cmd_graph_node(
6464
if cache is None:
6565
cache = {}
6666

67-
root_path_absolute = Path(os.path.normpath(output_tree / root_path))
67+
root_path_absolute = os.path.normpath(os.path.join(output_tree, root_path))
6868
if root_path_absolute in cache.keys():
6969
if depth <= log_depth:
7070
logging.info(f"Reuse Node: {' ' * depth}{root_path}")
@@ -73,12 +73,12 @@ def build_cmd_graph_node(
7373
if depth <= log_depth:
7474
logging.info(f"Build Node: {' ' * depth}{root_path}")
7575
cmd_path = _to_cmd_path(root_path_absolute)
76-
cmd_file = parse_cmd_file(cmd_path) if cmd_path.exists() else None
76+
cmd_file = parse_cmd_file(cmd_path) if os.path.exists(cmd_path) else None
7777
node = CmdGraphNode(root_path_absolute, cmd_file)
7878
cache[root_path_absolute] = node
7979

80-
if not root_path_absolute.exists():
81-
if root_path_absolute.is_relative_to(output_tree) or root_path_absolute.is_relative_to(src_tree):
80+
if not os.path.exists(root_path_absolute):
81+
if is_relative_to(root_path_absolute, output_tree) or is_relative_to(root_path_absolute, src_tree):
8282
sbom_errors.log(f"Skip parsing '{root_path_absolute}' because file does not exist")
8383
else:
8484
logging.warning(f"Skip parsing {root_path_absolute} because file does not exist")
@@ -88,7 +88,7 @@ def build_cmd_graph_node(
8888
child_paths = get_hardcoded_dependencies(root_path_absolute, output_tree, src_tree)
8989
if cmd_file is not None:
9090
child_paths += _parse_cmd_file(cmd_file, output_tree, src_tree, root_path)
91-
if node.absolute_path.suffix == ".S":
91+
if node.absolute_path.endswith(".S"):
9292
child_paths += _parse_incbin(node.absolute_path, output_tree, src_tree, root_path)
9393

9494
# Create child nodes
@@ -99,17 +99,19 @@ def build_cmd_graph_node(
9999
return node
100100

101101

102-
def _parse_cmd_file(cmd_file: CmdFile, output_tree: Path, src_tree: Path, root_artifact: Path) -> list[Path]:
103-
input_files = parse_commands(cmd_file.savedcmd)
102+
def _parse_cmd_file(
103+
cmd_file: CmdFile, output_tree: PathStr, src_tree: PathStr, root_artifact: PathStr
104+
) -> list[PathStr]:
105+
input_files: list[PathStr] = [str(p) for p in parse_commands(cmd_file.savedcmd)]
104106
if cmd_file.deps:
105-
input_files += parse_deps(cmd_file.deps)
107+
input_files += [str(p) for p in parse_deps(cmd_file.deps)]
106108
input_files = _expand_resolve_files(input_files, output_tree)
107109

108-
child_paths: list[Path] = []
109-
working_directory: Path | None = None
110+
child_paths: list[PathStr] = []
111+
working_directory: PathStr | None = None
110112
for input_file in input_files:
111113
if os.path.isabs(input_file):
112-
child_paths.append(Path(os.path.relpath(input_file, output_tree)))
114+
child_paths.append(os.path.relpath(input_file, output_tree))
113115
continue
114116

115117
if working_directory is None:
@@ -122,15 +124,17 @@ def _parse_cmd_file(cmd_file: CmdFile, output_tree: Path, src_tree: Path, root_a
122124
)
123125
return []
124126

125-
child_paths.append(Path(os.path.normpath(working_directory / input_file)))
127+
child_paths.append(os.path.normpath(os.path.join(working_directory, input_file)))
126128

127129
# Remove root output from the input_files to prevent cycles.
128130
# Some multi stage commands create an output and pass it as input to the next command, e.g., objcopy.
129131
child_paths = [child_path for child_path in child_paths if child_path != root_artifact]
130132
return child_paths
131133

132134

133-
def _parse_incbin(assembly_path: Path, output_tree: Path, src_tree: Path, root_output_in_tree: Path) -> list[Path]:
135+
def _parse_incbin(
136+
assembly_path: PathStr, output_tree: PathStr, src_tree: PathStr, root_output_in_tree: PathStr
137+
) -> list[PathStr]:
134138
incbin_paths = parse_incbin(assembly_path)
135139
if len(incbin_paths) == 0:
136140
return []
@@ -140,11 +144,11 @@ def _parse_incbin(assembly_path: Path, output_tree: Path, src_tree: Path, root_o
140144
f"Skip children of node {root_output_in_tree} because no working directory for {incbin_paths[0]} could be found"
141145
)
142146
return []
143-
return [Path(os.path.normpath(working_directory / incbin_path)) for incbin_path in incbin_paths]
147+
return [os.path.normpath(os.path.join(working_directory, incbin_path)) for incbin_path in incbin_paths]
144148

145149

146150
def iter_cmd_graph(cmd_graph: CmdGraph | CmdGraphNode) -> Iterator[CmdGraphNode]:
147-
visited: set[Path] = set()
151+
visited: set[PathStr] = set()
148152
node_stack: list[CmdGraphNode] = cmd_graph.roots.copy() if isinstance(cmd_graph, CmdGraph) else [cmd_graph]
149153
while len(node_stack) > 0:
150154
node = node_stack.pop(0)
@@ -156,20 +160,20 @@ def iter_cmd_graph(cmd_graph: CmdGraph | CmdGraphNode) -> Iterator[CmdGraphNode]
156160
yield node
157161

158162

159-
def save_cmd_graph(node: CmdGraph, path: Path) -> None:
163+
def save_cmd_graph(node: CmdGraph, path: PathStr) -> None:
160164
with open(path, "wb") as f:
161165
pickle.dump(node, f)
162166

163167

164-
def load_cmd_graph(path: Path) -> CmdGraph:
168+
def load_cmd_graph(path: PathStr) -> CmdGraph:
165169
with open(path, "rb") as f:
166170
return pickle.load(f)
167171

168172

169173
def build_or_load_cmd_graph(
170-
root_paths: list[Path], output_tree: Path, src_tree: Path, cmd_graph_path: Path
174+
root_paths: list[PathStr], output_tree: PathStr, src_tree: PathStr, cmd_graph_path: PathStr
171175
) -> CmdGraph:
172-
if cmd_graph_path.exists():
176+
if os.path.exists(cmd_graph_path):
173177
logging.info("Load cmd graph")
174178
cmd_graph = load_cmd_graph(cmd_graph_path)
175179
else:
@@ -179,43 +183,46 @@ def build_or_load_cmd_graph(
179183
return cmd_graph
180184

181185

182-
def _to_cmd_path(path: Path) -> Path:
183-
return path.parent / f".{path.name}.cmd"
186+
def _to_cmd_path(path: PathStr) -> PathStr:
187+
name = os.path.basename(path)
188+
return path.removesuffix(name) + f".{name}.cmd"
184189

185190

186-
def _get_working_directory(input_file: Path, output_tree: Path, src_tree: Path, root_artifact: Path) -> Path | None:
191+
def _get_working_directory(
192+
input_file: PathStr, output_tree: PathStr, src_tree: PathStr, root_artifact: PathStr
193+
) -> PathStr | None:
187194
"""
188195
Input paths in .cmd files are often relative paths but it is unclear to which original working directory these paths are relative to.
189196
This function heuristically estimates the working directory for a given input_file and returns the working directory relative to the output tree.
190197
"""
191198

192-
relative_to_cmd_file = (output_tree / root_artifact.parent / input_file).exists()
193-
relative_to_output_tree = (output_tree / input_file).exists()
194-
relative_to_tools_objtool = str(root_artifact).startswith("tools/objtool/arch/x86")
195-
relative_to_tools_lib_subcmd = str(root_artifact).startswith("tools/objtool/libsubcmd")
199+
relative_to_cmd_file = os.path.exists(os.path.join(output_tree, os.path.dirname(root_artifact), input_file))
200+
relative_to_output_tree = os.path.exists(os.path.join(output_tree, input_file))
201+
relative_to_tools_objtool = root_artifact.startswith("tools/objtool/arch/x86")
202+
relative_to_tools_lib_subcmd = root_artifact.startswith("tools/objtool/libsubcmd")
196203

197204
if relative_to_cmd_file:
198-
return root_artifact.parent
205+
return os.path.dirname(root_artifact)
199206
elif relative_to_output_tree:
200-
return Path(".")
207+
return "."
201208
elif relative_to_tools_objtool:
202209
# Input path relative to `tools/objtool` (e.g., `tools/objtool/arch/x86/special.o` has input `arch/x86/special.c`)
203-
return Path(os.path.relpath(src_tree, output_tree)) / "tools/objtool"
210+
return os.path.join(os.path.relpath(src_tree, output_tree), "tools/objtool")
204211
elif relative_to_tools_lib_subcmd:
205212
# Input path relative to `tools/lib/subcmd` (e.g., `tools/objtool/libsubcmd/.sigchain.o` has input `subcmd-util.h` which lives in `tools/lib/subcmd/subcmd-util.h`)
206-
return Path(os.path.relpath(src_tree, output_tree)) / "tools/lib/subcmd"
213+
return os.path.join(os.path.relpath(src_tree, output_tree), "tools/lib/subcmd")
207214

208215
return None
209216

210217

211-
def _expand_resolve_files(input_files: list[Path], output_tree: Path) -> list[Path]:
212-
expanded_input_files: list[Path] = []
218+
def _expand_resolve_files(input_files: list[PathStr], output_tree: PathStr) -> list[PathStr]:
219+
expanded_input_files: list[PathStr] = []
213220
for input_file in input_files:
214221
input_file_str = str(input_file)
215222
if not input_file_str.startswith("@"):
216223
expanded_input_files.append(input_file)
217224
continue
218-
with open(output_tree / input_file_str[1:], "r") as f:
219-
resolve_file_content = [Path(line.strip()) for line in f.readlines() if line.strip()]
225+
with open(os.path.join(output_tree, input_file_str[1:]), "r") as f:
226+
resolve_file_content = [line.strip() for line in f.readlines() if line.strip()]
220227
expanded_input_files += _expand_resolve_files(resolve_file_content, output_tree)
221228
return expanded_input_files

sbom/lib/sbom/cmd/deps_parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,35 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
# SPDX-FileCopyrightText: 2025 TNG Technology Consulting GmbH
33

4-
from pathlib import Path
54
import re
65
import sbom.errors as sbom_errors
6+
from sbom.path_utils import PathStr
77

88
CONFIG_PATTERN = re.compile(r"\$\(wildcard (include/config/[^)]+)\)")
99
WILDCARD_PATTERN = re.compile(r"\$\(wildcard (?P<path>[^)]+)\)")
1010
VALID_PATH_PATTERN = re.compile(r"^(\/)?(([\w\-\., ]*)\/)*[\w\-\., ]+$")
1111

1212

13-
def parse_deps(deps: list[str]) -> list[Path]:
13+
def parse_deps(deps: list[str]) -> list[PathStr]:
1414
"""
1515
Parse dependency strings of a .cmd file and return valid input file paths.
1616
Args:
1717
deps: List of dependency strings as found in `.cmd` files.
1818
Returns:
1919
input_files: List of input file paths
2020
"""
21-
input_files: list[Path] = []
21+
input_files: list[PathStr] = []
2222
for dep in deps:
2323
dep = dep.strip()
2424
match dep:
2525
case _ if _ := CONFIG_PATTERN.match(dep):
2626
# config paths like include/config/<CONFIG_NAME> are not included in the graph
2727
continue
2828
case _ if match := WILDCARD_PATTERN.match(dep):
29-
path = Path(match.group("path"))
29+
path = match.group("path")
3030
input_files.append(path)
3131
case _ if VALID_PATH_PATTERN.match(dep):
32-
input_files.append(Path(dep))
32+
input_files.append(dep)
3333

3434
case _:
3535
sbom_errors.log(f"Skip parsing dependency {dep} because of unrecognized format")

sbom/lib/sbom/cmd/hardcoded_dependencies.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
# SPDX-FileCopyrightText: 2025 TNG Technology Consulting GmbH
33

4-
from pathlib import Path
54
import os
65
from typing import Callable
76
import sbom.errors as sbom_errors
7+
from sbom.path_utils import PathStr, is_relative_to
88

99
HARDCODED_DEPENDENCIES: dict[str, list[str]] = {
1010
# defined in linux/Kbuild
@@ -15,7 +15,7 @@
1515
}
1616

1717

18-
def get_hardcoded_dependencies(path: Path, output_tree: Path, src_tree: Path) -> list[Path]:
18+
def get_hardcoded_dependencies(path: PathStr, output_tree: PathStr, src_tree: PathStr) -> list[PathStr]:
1919
"""
2020
Some files in the Linux kernel build process are not tracked by the .cmd dependency mechanism.
2121
This function provides a temporary workaround by manually specifying known missing dependencies required to correctly model the build graph.
@@ -29,10 +29,10 @@ def get_hardcoded_dependencies(path: Path, output_tree: Path, src_tree: Path) ->
2929
list[Path]: A list of dependency file paths (relative to the output tree) required to build the file at the given path.
3030
"""
3131
key: str | None = None
32-
if path.is_relative_to(output_tree):
33-
key = str(path.relative_to(output_tree))
34-
elif path.is_relative_to(src_tree):
35-
key = str(path.relative_to(src_tree))
32+
if is_relative_to(path, output_tree):
33+
key = os.path.relpath(path, output_tree)
34+
elif is_relative_to(path, src_tree):
35+
key = os.path.relpath(path, src_tree)
3636

3737
if key is None or key not in HARDCODED_DEPENDENCIES:
3838
return []
@@ -41,15 +41,15 @@ def get_hardcoded_dependencies(path: Path, output_tree: Path, src_tree: Path) ->
4141
"arch": lambda: _get_arch(path),
4242
}
4343

44-
dependencies: list[Path] = []
44+
dependencies: list[PathStr] = []
4545
for template in HARDCODED_DEPENDENCIES[key]:
4646
dependency = _evaluate_template(template, template_variables)
4747
if dependency is None:
4848
continue
49-
if (output_tree / dependency).exists():
50-
dependencies.append(Path(dependency))
51-
elif (src_tree / dependency).exists():
52-
dependencies.append(Path(os.path.relpath(dependency, output_tree)))
49+
if os.path.exists(os.path.join(output_tree, dependency)):
50+
dependencies.append(dependency)
51+
elif os.path.exists(os.path.join(src_tree, dependency)):
52+
dependencies.append(os.path.relpath(dependency, output_tree))
5353
else:
5454
sbom_errors.log(
5555
f"Skip hardcoded dependency '{dependency}' for '{path}' because the dependency lies neither in the src tree nor the output tree."
@@ -69,7 +69,7 @@ def _evaluate_template(template: str, variables: dict[str, Callable[[], str | No
6969
return template
7070

7171

72-
def _get_arch(path: Path):
72+
def _get_arch(path: PathStr):
7373
srcarch = os.environ.get("SRCARCH")
7474
if srcarch is None:
7575
sbom_errors.log(

0 commit comments

Comments
 (0)