Skip to content

Commit 737f618

Browse files
committed
Change logic for comparison and extraction
1 parent 6ea3ea8 commit 737f618

File tree

3 files changed

+155
-189
lines changed

3 files changed

+155
-189
lines changed

src/borg/archive.py

+29-75
Original file line numberDiff line numberDiff line change
@@ -720,76 +720,38 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
720720
pass
721721

722722
def compare_and_extract_chunks(self, item, fs_path):
723-
print(f"Initial fs_path: {fs_path}")
724-
print(f"self.cwd: {self.cwd}")
725-
if fs_path.startswith(self.cwd):
726-
fs_path = fs_path[len(self.cwd) :].lstrip(os.sep)
727-
print(f"Relative fs_path: {fs_path}")
728-
729-
# Construct the final path
730-
fs_path = os.path.normpath(os.path.join(self.cwd, fs_path))
731-
print(f"Final fs_path: {fs_path}")
732-
print(f"File exists at final path: {os.path.isfile(fs_path)}")
733-
734-
os.makedirs(os.path.dirname(fs_path), exist_ok=True)
723+
"""Compare file chunks and patch if needed. Returns True if patching succeeded."""
735724
try:
736-
if os.path.isfile(fs_path):
737-
with open(fs_path, "rb+") as fs_file:
738-
chunk_offset = 0
739-
for chunk_entry in item.chunks:
740-
chunkid_A = chunk_entry.id
741-
size = chunk_entry.size
742-
print(f"Processing chunk at offset {chunk_offset}")
725+
st = os.stat(fs_path, follow_symlinks=False)
726+
if not stat.S_ISREG(st.st_mode):
727+
return False
743728

744-
fs_file.seek(chunk_offset)
745-
data_F = fs_file.read(size)
746-
print(f"Read {len(data_F)} bytes at offset {chunk_offset}")
747-
print(f"File content: {data_F[:20]}...") # Show first 20 bytes
748-
749-
if len(data_F) == size:
750-
chunkid_F = self.key.id_hash(data_F)
751-
print("Comparing hashes:") # Debug
752-
print(f"Archive hash: {chunkid_A.hex()}") # Debug
753-
print(f"File hash: {chunkid_F.hex()}") # Debug
754-
print(f"Hashes match? {chunkid_A == chunkid_F}")
755-
if chunkid_A != chunkid_F:
756-
print("Hashes don't match, fetching new chunk") # Debug
757-
fs_file.seek(chunk_offset) # Go back to the start of the chunk
758-
chunk_data = b"".join(self.pipeline.fetch_many([chunkid_A], ro_type=ROBJ_FILE_STREAM))
759-
print(f"Fetched content: {chunk_data[:20]}...")
760-
fs_file.write(chunk_data)
761-
fs_file.flush()
762-
print("Wrote and flushed new chunk data")
763-
else:
764-
print(f"Chunk size mismatch at offset {chunk_offset}")
765-
fs_file.seek(chunk_offset)
766-
chunk_data = b"".join(self.pipeline.fetch_many([chunkid_A], ro_type=ROBJ_FILE_STREAM))
767-
fs_file.write(chunk_data)
729+
with open(fs_path, "rb+") as fs_file:
730+
chunk_offset = 0
731+
for chunk_entry in item.chunks:
732+
chunkid_A = chunk_entry.id
733+
size = chunk_entry.size
768734

769-
chunk_offset += size
735+
fs_file.seek(chunk_offset)
736+
data_F = fs_file.read(size)
770737

771-
fs_file.truncate(item.size)
772-
print(f"\nFinal file size: {os.path.getsize(fs_path)}")
773-
with open(fs_path, "rb") as f:
774-
print(f"Final content: {f.read()[:20]}...")
775-
else:
776-
with open(fs_path, "wb") as fs_file:
777-
for chunk_entry in item.chunks:
778-
chunk_data = b"".join(self.pipeline.fetch_many([chunk_entry.id], ro_type=ROBJ_FILE_STREAM))
738+
needs_update = True
739+
if len(data_F) == size:
740+
chunkid_F = self.key.id_hash(data_F)
741+
needs_update = chunkid_A != chunkid_F
742+
743+
if needs_update:
744+
chunk_data = b"".join(self.pipeline.fetch_many([chunkid_A], ro_type=ROBJ_FILE_STREAM))
745+
fs_file.seek(chunk_offset)
779746
fs_file.write(chunk_data)
780-
fs_file.truncate(item.size)
781747

782-
with open(fs_path, "rb") as fs_file:
783-
preview = fs_file.read(50)
784-
print(f"Final file size: {os.path.getsize(fs_path)}, Expected: {item.size}")
785-
print(f"Content preview (text): {preview.decode('utf-8', errors='replace')}")
748+
chunk_offset += size
786749

787-
except OSError as e:
788-
print(f"IO error processing {fs_path}: {e}")
789-
raise
790-
except Exception as e:
791-
print(f"Error processing {fs_path}: {str(e)}")
792-
raise
750+
fs_file.truncate(item.size)
751+
return True
752+
753+
except (OSError, Exception):
754+
return False
793755

794756
def extract_item(
795757
self,
@@ -802,7 +764,6 @@ def extract_item(
802764
hlm=None,
803765
pi=None,
804766
continue_extraction=False,
805-
check_existing=False,
806767
):
807768
"""
808769
Extract archive item.
@@ -815,7 +776,6 @@ def extract_item(
815776
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
816777
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
817778
:param continue_extraction: continue a previously interrupted extraction of same archive
818-
:param check_existing: check against existing file/block device and only retrieve changed data
819779
"""
820780

821781
def same_item(item, st):
@@ -836,16 +796,6 @@ def same_item(item, st):
836796
# if a previous extraction was interrupted between setting the mtime and setting non-default flags.
837797
return True
838798

839-
if check_existing:
840-
dest = os.path.normpath(self.cwd)
841-
fs_path = os.path.join(dest, item.path)
842-
843-
if not os.path.normpath(fs_path).startswith(dest):
844-
raise Exception(f"Path {fs_path} is outside of extraction directory {dest}")
845-
846-
self.compare_and_extract_chunks(item, fs_path)
847-
return
848-
849799
has_damaged_chunks = "chunks_healthy" in item
850800
if dry_run or stdout:
851801
with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
@@ -884,6 +834,10 @@ def same_item(item, st):
884834
st = os.stat(path, follow_symlinks=False)
885835
if continue_extraction and same_item(item, st):
886836
return # done! we already have fully extracted this file in a previous run.
837+
838+
elif stat.S_ISREG(item.mode) and stat.S_ISREG(st.st_mode):
839+
if self.compare_and_extract_chunks(item, path):
840+
return
887841
elif stat.S_ISDIR(st.st_mode):
888842
os.rmdir(path)
889843
else:

src/borg/archiver/extract_cmd.py

-8
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def do_extract(self, args, repository, manifest, archive):
4444
sparse = args.sparse
4545
strip_components = args.strip_components
4646
continue_extraction = args.continue_extraction
47-
check_existing = args.check_existing
4847
dirs = []
4948
hlm = HardLinkManager(id_type=bytes, info_type=str) # hlid -> path
5049

@@ -97,7 +96,6 @@ def do_extract(self, args, repository, manifest, archive):
9796
hlm=hlm,
9897
pi=pi,
9998
continue_extraction=continue_extraction,
100-
check_existing=check_existing,
10199
)
102100
except BackupError as e:
103101
self.print_warning_instance(BackupWarning(remove_surrogates(orig_path), e))
@@ -194,12 +192,6 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
194192
action="store_true",
195193
help="continue a previously interrupted extraction of same archive",
196194
)
197-
subparser.add_argument(
198-
"--check-existing",
199-
dest="check_existing",
200-
action="store_true",
201-
help="check against existing file/block device and only retrieve changed data",
202-
)
203195
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
204196
subparser.add_argument(
205197
"paths", metavar="PATH", nargs="*", type=PathSpec, help="paths to extract; patterns are supported"

0 commit comments

Comments
 (0)