|
6 | 6 | from __future__ import annotations |
7 | 7 |
|
8 | 8 | import fnmatch |
| 9 | +import hashlib |
9 | 10 | import io |
| 11 | +import json |
10 | 12 | import logging |
11 | 13 | import os |
12 | 14 | import os.path |
@@ -75,6 +77,82 @@ def patch_file(path: PathLike, old: str, new: str) -> None: |
75 | 77 | fp.write(new_content) |
76 | 78 |
|
77 | 79 |
|
| 80 | +def update_sbom_checksums( |
| 81 | + source_dir: PathLike, files_to_update: MutableMapping[str, PathLike] |
| 82 | +) -> None: |
| 83 | + """ |
| 84 | + Update checksums in sbom.spdx.json for modified files. |
| 85 | +
|
| 86 | + Python 3.12+ includes an SBOM (Software Bill of Materials) that tracks |
| 87 | + file checksums. When we update files (e.g., expat sources), we need to |
| 88 | + recalculate their checksums. |
| 89 | +
|
| 90 | + :param source_dir: Path to the Python source directory |
| 91 | + :type source_dir: PathLike |
| 92 | + :param files_to_update: Mapping of SBOM relative paths to actual file paths |
| 93 | + :type files_to_update: MutableMapping[str, PathLike] |
| 94 | + """ |
| 95 | + source_path = pathlib.Path(source_dir) |
| 96 | + spdx_json = source_path / "Misc" / "sbom.spdx.json" |
| 97 | + |
| 98 | + # SBOM only exists in Python 3.12+ |
| 99 | + if not spdx_json.exists(): |
| 100 | + log.debug("SBOM file not found, skipping checksum updates") |
| 101 | + return |
| 102 | + |
| 103 | + # Read the SBOM JSON |
| 104 | + with open(spdx_json, "r") as f: |
| 105 | + data = json.load(f) |
| 106 | + |
| 107 | + # Compute checksums for each file |
| 108 | + checksums = {} |
| 109 | + for relative_path, file_path in files_to_update.items(): |
| 110 | + file_path = pathlib.Path(file_path) |
| 111 | + if not file_path.exists(): |
| 112 | + log.warning("File not found for checksum: %s", file_path) |
| 113 | + continue |
| 114 | + |
| 115 | + # Compute SHA1 and SHA256 |
| 116 | + sha1 = hashlib.sha1() |
| 117 | + sha256 = hashlib.sha256() |
| 118 | + with open(file_path, "rb") as f: |
| 119 | + content = f.read() |
| 120 | + sha1.update(content) |
| 121 | + sha256.update(content) |
| 122 | + |
| 123 | + checksums[relative_path] = [ |
| 124 | + { |
| 125 | + "algorithm": "SHA1", |
| 126 | + "checksumValue": sha1.hexdigest(), |
| 127 | + }, |
| 128 | + { |
| 129 | + "algorithm": "SHA256", |
| 130 | + "checksumValue": sha256.hexdigest(), |
| 131 | + }, |
| 132 | + ] |
| 133 | + log.debug( |
| 134 | + "Computed checksums for %s: SHA1=%s, SHA256=%s", |
| 135 | + relative_path, |
| 136 | + sha1.hexdigest(), |
| 137 | + sha256.hexdigest(), |
| 138 | + ) |
| 139 | + |
| 140 | + # Update the SBOM with new checksums |
| 141 | + updated_count = 0 |
| 142 | + for file_entry in data.get("files", []): |
| 143 | + file_name = file_entry.get("fileName") |
| 144 | + if file_name in checksums: |
| 145 | + file_entry["checksums"] = checksums[file_name] |
| 146 | + updated_count += 1 |
| 147 | + log.info("Updated SBOM checksums for %s", file_name) |
| 148 | + |
| 149 | + # Write back the updated SBOM |
| 150 | + with open(spdx_json, "w") as f: |
| 151 | + json.dump(data, f, indent=2) |
| 152 | + |
| 153 | + log.info("Updated %d file checksums in SBOM", updated_count) |
| 154 | + |
| 155 | + |
78 | 156 | def patch_shebang(path: PathLike, old: str, new: str) -> bool: |
79 | 157 | """ |
80 | 158 | Replace a file's shebang. |
|
0 commit comments