Skip to content

Commit 974c8eb

Browse files
committed
add basic reproducible build setup
Signed-off-by: Zen <[email protected]>
1 parent 0618c6c commit 974c8eb

File tree

2 files changed

+41
-26
lines changed

2 files changed

+41
-26
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pycpio"
7-
version = "1.1.7"
7+
version = "1.2.0"
88
authors = [
99
{ name="Desultory", email="[email protected]" },
1010
]

src/pycpio/cpio/archive.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from zenlib.logging import loggify
99
from zenlib.util import handle_plural
1010
from .symlink import CPIO_Symlink
11+
from .file import CPIO_File
1112

1213

1314
@loggify
@@ -18,43 +19,57 @@ class CPIOArchive(dict):
1819
def __setitem__(self, name, value):
1920
if name in self:
2021
raise AttributeError("Entry already exists: %s" % name)
22+
# If reproduceable is enabled, set the inode to 0, so it can be recalculated
23+
if self.reproducible:
24+
value.header.ino = 0
25+
2126
# Check if the inode already exists
22-
# Ignore symlinks, they can have the same inode
23-
# Remove data from hardlinks, to save space
24-
if value.header.ino in self.inodes:
25-
if isinstance(value, CPIO_Symlink):
26-
self.logger.debug("[%s] Symlink inode already exists: %s" % (value.header.name, value.header.ino))
27-
elif self[self.inodes[value.header.ino][0]].data == value.data:
28-
self.logger.info("[%s] New hardlink detected, removing data." % value.header.name)
29-
# Remove the data from the current entry
30-
value.data = b''
31-
elif value.data == b'':
32-
self.logger.debug("[%s] Hardlink detected." % value.header.name)
33-
else:
34-
from .common import get_new_inode
35-
self.logger.warning("[%s] Inode already exists: %s" % (value.header.name, value.header.ino))
36-
value.header.ino = get_new_inode(self.inodes)
37-
self.logger.info("New inode: %s", value.header.ino)
38-
self.inodes[value.header.ino] = []
39-
self.inodes[value.header.ino].append(name)
40-
else:
41-
# Create an inode entry with the name
42-
self.inodes[value.header.ino] = [name]
27+
self._update_inodes(value)
4328

4429
# Check if the hash already exists and the data is not empty
4530
if value.hash in self.hashes and value.data != b'' and not isinstance(value, CPIO_Symlink):
4631
match = self[self.hashes[value.hash]]
4732
self.logger.warning("[%s] Hash matches existing entry: %s" % (value.header.name, match.header.name))
48-
value.header.ino = match.header.ino
49-
# run setitem again to handle the duplicate inode as a hardlink
50-
self[name] = value
33+
if match.data == value.data:
34+
self.logger.info("[%s] New hardlink detected by hash match." % value.header.name)
35+
self.inodes[value.header.ino].remove(value.header.name) # Remove the name from the inode list
36+
value.header.ino = match.header.ino
37+
self._update_inodes(value) # Update the inode list
38+
else:
39+
raise ValueError("[%s] Hash collision detected!" % value.header.name)
5140
else:
5241
# Add the name to the hash table
5342
self.hashes[value.hash] = name
5443

5544
super().__setitem__(name, value)
5645
self._update_nlinks(value.header.ino)
5746

47+
def _update_inodes(self, entry):
48+
"""
49+
Checks if an entry exists with the same inode,
50+
if it's a hardlink, remove the data in the copy.
51+
"""
52+
if entry.header.ino in self.inodes:
53+
if isinstance(entry, CPIO_File) and self[self.inodes[entry.header.ino][0]].data == entry.data:
54+
self.logger.info("[%s] New hardlink detected, removing data." % entry.header.name)
55+
# Remove the data from the current entry
56+
entry.data = b''
57+
elif isinstance(entry, CPIO_File) and entry.data == b'':
58+
self.logger.debug("[%s] Hardlink detected." % entry.header.name)
59+
else:
60+
from .common import get_new_inode
61+
if entry.header.ino == 0 and not self.reproducible:
62+
self.logger.warning("[%s] Inode already exists: %s" % (entry.header.name, entry.header.ino))
63+
entry.header.ino = get_new_inode(self.inodes)
64+
if self.reproducible:
65+
self.logger.debug("[%s] Inode recalculated: %s" % (entry.header.name, entry.header.ino))
66+
else:
67+
self.logger.info("[%s] New inode: %s" % (entry.header.name, entry.header.ino))
68+
self.inodes[entry.header.ino] = []
69+
else:
70+
self.inodes[entry.header.ino] = []
71+
self.inodes[entry.header.ino].append(entry.header.name)
72+
5873
def _update_nlinks(self, inode):
5974
""" Update nlinks for all entries with the same inode """
6075
# Get the number of links based on the number of entries with that inode
@@ -68,11 +83,11 @@ def __contains__(self, name):
6883
return super().__contains__(self._normalize_name(name))
6984

7085
def __init__(self, structure=HEADER_NEW, reproducible=False, *args, **kwargs):
71-
super().__init__(*args, **kwargs)
7286
self.structure = structure
7387
self.reproducible = reproducible
7488
self.inodes = {}
7589
self.hashes = {}
90+
super().__init__(*args, **kwargs)
7691

7792
def update(self, other):
7893
""" Update the archive with the values from another archive. """

0 commit comments

Comments
 (0)