@@ -720,76 +720,38 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
720
720
pass
721
721
722
722
def compare_and_extract_chunks (self , item , fs_path ):
723
- print (f"Initial fs_path: { fs_path } " )
724
- print (f"self.cwd: { self .cwd } " )
725
- if fs_path .startswith (self .cwd ):
726
- fs_path = fs_path [len (self .cwd ) :].lstrip (os .sep )
727
- print (f"Relative fs_path: { fs_path } " )
728
-
729
- # Construct the final path
730
- fs_path = os .path .normpath (os .path .join (self .cwd , fs_path ))
731
- print (f"Final fs_path: { fs_path } " )
732
- print (f"File exists at final path: { os .path .isfile (fs_path )} " )
733
-
734
- os .makedirs (os .path .dirname (fs_path ), exist_ok = True )
723
+ """Compare file chunks and patch if needed. Returns True if patching succeeded."""
735
724
try :
736
- if os .path .isfile (fs_path ):
737
- with open (fs_path , "rb+" ) as fs_file :
738
- chunk_offset = 0
739
- for chunk_entry in item .chunks :
740
- chunkid_A = chunk_entry .id
741
- size = chunk_entry .size
742
- print (f"Processing chunk at offset { chunk_offset } " )
725
+ st = os .stat (fs_path , follow_symlinks = False )
726
+ if not stat .S_ISREG (st .st_mode ):
727
+ return False
743
728
744
- fs_file .seek (chunk_offset )
745
- data_F = fs_file .read (size )
746
- print (f"Read { len (data_F )} bytes at offset { chunk_offset } " )
747
- print (f"File content: { data_F [:20 ]} ..." ) # Show first 20 bytes
748
-
749
- if len (data_F ) == size :
750
- chunkid_F = self .key .id_hash (data_F )
751
- print ("Comparing hashes:" ) # Debug
752
- print (f"Archive hash: { chunkid_A .hex ()} " ) # Debug
753
- print (f"File hash: { chunkid_F .hex ()} " ) # Debug
754
- print (f"Hashes match? { chunkid_A == chunkid_F } " )
755
- if chunkid_A != chunkid_F :
756
- print ("Hashes don't match, fetching new chunk" ) # Debug
757
- fs_file .seek (chunk_offset ) # Go back to the start of the chunk
758
- chunk_data = b"" .join (self .pipeline .fetch_many ([chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
759
- print (f"Fetched content: { chunk_data [:20 ]} ..." )
760
- fs_file .write (chunk_data )
761
- fs_file .flush ()
762
- print ("Wrote and flushed new chunk data" )
763
- else :
764
- print (f"Chunk size mismatch at offset { chunk_offset } " )
765
- fs_file .seek (chunk_offset )
766
- chunk_data = b"" .join (self .pipeline .fetch_many ([chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
767
- fs_file .write (chunk_data )
729
+ with open (fs_path , "rb+" ) as fs_file :
730
+ chunk_offset = 0
731
+ for chunk_entry in item .chunks :
732
+ chunkid_A = chunk_entry .id
733
+ size = chunk_entry .size
768
734
769
- chunk_offset += size
735
+ fs_file .seek (chunk_offset )
736
+ data_F = fs_file .read (size )
770
737
771
- fs_file . truncate ( item . size )
772
- print ( f" \n Final file size: { os . path . getsize ( fs_path ) } " )
773
- with open ( fs_path , "rb" ) as f :
774
- print ( f"Final content: { f . read ()[: 20 ] } ..." )
775
- else :
776
- with open ( fs_path , "wb" ) as fs_file :
777
- for chunk_entry in item . chunks :
778
- chunk_data = b"" . join ( self . pipeline . fetch_many ([ chunk_entry . id ], ro_type = ROBJ_FILE_STREAM ) )
738
+ needs_update = True
739
+ if len ( data_F ) == size :
740
+ chunkid_F = self . key . id_hash ( data_F )
741
+ needs_update = chunkid_A != chunkid_F
742
+
743
+ if needs_update :
744
+ chunk_data = b"" . join ( self . pipeline . fetch_many ([ chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
745
+ fs_file . seek ( chunk_offset )
779
746
fs_file .write (chunk_data )
780
- fs_file .truncate (item .size )
781
747
782
- with open (fs_path , "rb" ) as fs_file :
783
- preview = fs_file .read (50 )
784
- print (f"Final file size: { os .path .getsize (fs_path )} , Expected: { item .size } " )
785
- print (f"Content preview (text): { preview .decode ('utf-8' , errors = 'replace' )} " )
748
+ chunk_offset += size
786
749
787
- except OSError as e :
788
- print (f"IO error processing { fs_path } : { e } " )
789
- raise
790
- except Exception as e :
791
- print (f"Error processing { fs_path } : { str (e )} " )
792
- raise
750
+ fs_file .truncate (item .size )
751
+ return True
752
+
753
+ except (OSError , Exception ):
754
+ return False
793
755
794
756
def extract_item (
795
757
self ,
@@ -802,7 +764,6 @@ def extract_item(
802
764
hlm = None ,
803
765
pi = None ,
804
766
continue_extraction = False ,
805
- check_existing = False ,
806
767
):
807
768
"""
808
769
Extract archive item.
@@ -815,7 +776,6 @@ def extract_item(
815
776
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
816
777
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
817
778
:param continue_extraction: continue a previously interrupted extraction of same archive
818
- :param check_existing: check against existing file/block device and only retrieve changed data
819
779
"""
820
780
821
781
def same_item (item , st ):
@@ -836,16 +796,6 @@ def same_item(item, st):
836
796
# if a previous extraction was interrupted between setting the mtime and setting non-default flags.
837
797
return True
838
798
839
- if check_existing :
840
- dest = os .path .normpath (self .cwd )
841
- fs_path = os .path .join (dest , item .path )
842
-
843
- if not os .path .normpath (fs_path ).startswith (dest ):
844
- raise Exception (f"Path { fs_path } is outside of extraction directory { dest } " )
845
-
846
- self .compare_and_extract_chunks (item , fs_path )
847
- return
848
-
849
799
has_damaged_chunks = "chunks_healthy" in item
850
800
if dry_run or stdout :
851
801
with self .extract_helper (item , "" , hlm , dry_run = dry_run or stdout ) as hardlink_set :
@@ -884,6 +834,10 @@ def same_item(item, st):
884
834
st = os .stat (path , follow_symlinks = False )
885
835
if continue_extraction and same_item (item , st ):
886
836
return # done! we already have fully extracted this file in a previous run.
837
+
838
+ elif stat .S_ISREG (item .mode ) and stat .S_ISREG (st .st_mode ):
839
+ if self .compare_and_extract_chunks (item , path ):
840
+ return
887
841
elif stat .S_ISDIR (st .st_mode ):
888
842
os .rmdir (path )
889
843
else :
0 commit comments