88from zenlib .logging import loggify
99from zenlib .util import handle_plural
1010from .symlink import CPIO_Symlink
11+ from .file import CPIO_File
1112
1213
1314@loggify
@@ -18,43 +19,57 @@ class CPIOArchive(dict):
1819 def __setitem__ (self , name , value ):
1920 if name in self :
2021 raise AttributeError ("Entry already exists: %s" % name )
22+ # If reproduceable is enabled, set the inode to 0, so it can be recalculated
23+ if self .reproducible :
24+ value .header .ino = 0
25+
2126 # Check if the inode already exists
22- # Ignore symlinks, they can have the same inode
23- # Remove data from hardlinks, to save space
24- if value .header .ino in self .inodes :
25- if isinstance (value , CPIO_Symlink ):
26- self .logger .debug ("[%s] Symlink inode already exists: %s" % (value .header .name , value .header .ino ))
27- elif self [self .inodes [value .header .ino ][0 ]].data == value .data :
28- self .logger .info ("[%s] New hardlink detected, removing data." % value .header .name )
29- # Remove the data from the current entry
30- value .data = b''
31- elif value .data == b'' :
32- self .logger .debug ("[%s] Hardlink detected." % value .header .name )
33- else :
34- from .common import get_new_inode
35- self .logger .warning ("[%s] Inode already exists: %s" % (value .header .name , value .header .ino ))
36- value .header .ino = get_new_inode (self .inodes )
37- self .logger .info ("New inode: %s" , value .header .ino )
38- self .inodes [value .header .ino ] = []
39- self .inodes [value .header .ino ].append (name )
40- else :
41- # Create an inode entry with the name
42- self .inodes [value .header .ino ] = [name ]
27+ self ._update_inodes (value )
4328
4429 # Check if the hash already exists and the data is not empty
4530 if value .hash in self .hashes and value .data != b'' and not isinstance (value , CPIO_Symlink ):
4631 match = self [self .hashes [value .hash ]]
4732 self .logger .warning ("[%s] Hash matches existing entry: %s" % (value .header .name , match .header .name ))
48- value .header .ino = match .header .ino
49- # run setitem again to handle the duplicate inode as a hardlink
50- self [name ] = value
33+ if match .data == value .data :
34+ self .logger .info ("[%s] New hardlink detected by hash match." % value .header .name )
35+ self .inodes [value .header .ino ].remove (value .header .name ) # Remove the name from the inode list
36+ value .header .ino = match .header .ino
37+ self ._update_inodes (value ) # Update the inode list
38+ else :
39+ raise ValueError ("[%s] Hash collision detected!" % value .header .name )
5140 else :
5241 # Add the name to the hash table
5342 self .hashes [value .hash ] = name
5443
5544 super ().__setitem__ (name , value )
5645 self ._update_nlinks (value .header .ino )
5746
47+ def _update_inodes (self , entry ):
48+ """
49+ Checks if an entry exists with the same inode,
50+ if it's a hardlink, remove the data in the copy.
51+ """
52+ if entry .header .ino in self .inodes :
53+ if isinstance (entry , CPIO_File ) and self [self .inodes [entry .header .ino ][0 ]].data == entry .data :
54+ self .logger .info ("[%s] New hardlink detected, removing data." % entry .header .name )
55+ # Remove the data from the current entry
56+ entry .data = b''
57+ elif isinstance (entry , CPIO_File ) and entry .data == b'' :
58+ self .logger .debug ("[%s] Hardlink detected." % entry .header .name )
59+ else :
60+ from .common import get_new_inode
61+ if entry .header .ino == 0 and not self .reproducible :
62+ self .logger .warning ("[%s] Inode already exists: %s" % (entry .header .name , entry .header .ino ))
63+ entry .header .ino = get_new_inode (self .inodes )
64+ if self .reproducible :
65+ self .logger .debug ("[%s] Inode recalculated: %s" % (entry .header .name , entry .header .ino ))
66+ else :
67+ self .logger .info ("[%s] New inode: %s" % (entry .header .name , entry .header .ino ))
68+ self .inodes [entry .header .ino ] = []
69+ else :
70+ self .inodes [entry .header .ino ] = []
71+ self .inodes [entry .header .ino ].append (entry .header .name )
72+
5873 def _update_nlinks (self , inode ):
5974 """ Update nlinks for all entries with the same inode """
6075 # Get the number of links based on the number of entries with that inode
@@ -68,11 +83,11 @@ def __contains__(self, name):
6883 return super ().__contains__ (self ._normalize_name (name ))
6984
7085 def __init__ (self , structure = HEADER_NEW , reproducible = False , * args , ** kwargs ):
71- super ().__init__ (* args , ** kwargs )
7286 self .structure = structure
7387 self .reproducible = reproducible
7488 self .inodes = {}
7589 self .hashes = {}
90+ super ().__init__ (* args , ** kwargs )
7691
7792 def update (self , other ):
7893 """ Update the archive with the values from another archive. """
0 commit comments