From 41f5093099e80f0f5daad0c3e629e6364392844b Mon Sep 17 00:00:00 2001 From: rxpha3l Date: Wed, 26 Mar 2025 14:41:29 +0100 Subject: [PATCH 1/3] fix(utils): add get_endian_short to support 2 bytes magic get_endian was limited to 32 bit integer magic values and partclone required checking a 16 bit integer magic value. We therefore created get_endian_short and expose it from file_utils. --- python/unblob/file_utils.py | 18 +++++++++++-- tests/test_file_utils.py | 50 +++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/python/unblob/file_utils.py b/python/unblob/file_utils.py index 1bd9774698..d0e64d54db 100644 --- a/python/unblob/file_utils.py +++ b/python/unblob/file_utils.py @@ -358,8 +358,8 @@ def parse( def get_endian(file: File, big_endian_magic: int) -> Endian: """Read a four bytes magic and derive endianness from it. - It compares the read data with the big endian magic. It reads - four bytes and seeks back after that. + It compares the read data with the big endian magic and then seeks back + the amount of read bytes. """ if big_endian_magic > 0xFF_FF_FF_FF: raise ValueError("big_endian_magic is larger than a 32 bit integer.") @@ -369,6 +369,20 @@ def get_endian(file: File, big_endian_magic: int) -> Endian: return Endian.BIG if magic == big_endian_magic else Endian.LITTLE +def get_endian_short(file: File, big_endian_magic: int) -> Endian: + """Read a two bytes magic and derive endianness from it. + + It compares the read data with the big endian magic and then seeks back + the amount of read bytes. + """ + if big_endian_magic > 0xFF_FF: + raise ValueError("big_endian_magic is larger than a 16 bit integer.") + magic_bytes = file.read(2) + file.seek(-len(magic_bytes), io.SEEK_CUR) + magic = convert_int16(magic_bytes, Endian.BIG) + return Endian.BIG if magic == big_endian_magic else Endian.LITTLE + + def get_endian_multi(file: File, big_endian_magics: list[int]) -> Endian: """Read a four bytes magic and derive endianness from it. diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 9f85bca530..6d6773c59c 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -17,6 +17,7 @@ convert_int64, decode_multibyte_integer, get_endian, + get_endian_short, is_safe_path, iterate_file, iterate_patterns, @@ -345,7 +346,10 @@ class TestGetEndian: "content, big_endian_magic, expected", [ pytest.param( - b"\xff\x00\x00\x10", 0x100000FF, Endian.LITTLE, id="valid_little_endian" + b"\xff\x00\x00\x10", + 0x100000FF, + Endian.LITTLE, + id="valid_little_endian", ), pytest.param( b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian" @@ -356,10 +360,27 @@ def test_get_endian(self, content: bytes, big_endian_magic: int, expected: Endia file = File.from_bytes(content) assert get_endian(file, big_endian_magic) == expected + @pytest.mark.parametrize( + "content, big_endian_magic, expected", + [ + pytest.param(b"\xff\x00", 0x00FF, Endian.LITTLE, id="valid_little_endian"), + pytest.param(b"\x10\x00", 0x1000, Endian.BIG, id="valid_big_endian"), + ], + ) + def test_get_endian_short( + self, content: bytes, big_endian_magic: int, expected: Endian + ): + file = File.from_bytes(content) + assert get_endian_short(file, big_endian_magic) == expected + @pytest.mark.parametrize( "content, big_endian_magic", [ - pytest.param(b"\x00\x00\x00\x01", 0xFF_FF_FF_FF_FF, id="larger_than_32bit"), + pytest.param( + b"\x00\x00\x00\x01", + 0xFF_FF_FF_FF_FF, + id="larger_than_32bit", + ), ], ) def test_get_endian_errors(self, content: bytes, big_endian_magic: int): @@ -369,6 +390,23 @@ def test_get_endian_errors(self, content: bytes, big_endian_magic: int): ): get_endian(file, big_endian_magic) + @pytest.mark.parametrize( + "content, big_endian_magic", + [ + pytest.param( + b"\x00\x00\x00\x01", + 0xFF_FF_FF, + id="larger_than_16bit", + ), + ], + ) + def test_get_endian_short_errors(self, content: bytes, big_endian_magic: int): + file = File.from_bytes(content) + with pytest.raises( + ValueError, match="big_endian_magic is larger than a 16 bit integer" + ): + get_endian_short(file, big_endian_magic) + def test_get_endian_resets_the_file_pointer(self): file = File.from_bytes(bytes.fromhex("FFFF 0000")) file.seek(-1, io.SEEK_END) @@ -377,6 +415,14 @@ def test_get_endian_resets_the_file_pointer(self): get_endian(file, 0xFFFF_0000) assert file.tell() == pos + def test_get_endian_short_resets_the_file_pointer(self): + file = File.from_bytes(bytes.fromhex("FFFF")) + file.seek(-1, io.SEEK_END) + pos = file.tell() + with pytest.raises(InvalidInputFormat): + get_endian_short(file, 0xFFFF) + assert file.tell() == pos + @pytest.mark.parametrize( "input_path, expected", From c300801ce3a0f326058ebbf739c8881ebefe775a Mon Sep 17 00:00:00 2001 From: rxpha3l Date: Wed, 26 Mar 2025 14:39:13 +0100 Subject: [PATCH 2/3] feat(handler): add partclone handler Partclone is a utility used for backing up and restoring partitions. Many cloning tools (such as Clonezilla) rely on it to create block-level images that include filesystem metadata. Right now only partclone version 2 is supported. end offset is computed from data available in partclone's super block. Extraction is performed by `partclone.restore`, which is part of `partclone` package on Debian based systems and Nix. --- docs/formats.md | 7 +- install-deps.sh | 1 + package.nix | 4 +- python/unblob/handlers/__init__.py | 2 + python/unblob/handlers/archive/partclone.py | 81 +++++++++++++++++++ .../partclone/__input__/floppy-144m.img | 3 + .../partclone/__input__/fs_dev0.partclone.img | 3 + .../partclone.restored | 3 + .../lost+found/.gitkeep | 0 .../partclone.restored | 3 + .../lost+found/.gitkeep | 0 11 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 python/unblob/handlers/archive/partclone.py create mode 100755 tests/integration/archive/partclone/__input__/floppy-144m.img create mode 100755 tests/integration/archive/partclone/__input__/fs_dev0.partclone.img create mode 100644 tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored create mode 100644 tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep create mode 100644 tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored create mode 100644 tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep diff --git a/docs/formats.md b/docs/formats.md index 463d68810a..5e280ad883 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -8,7 +8,7 @@ hide: unblob supports more than 30 formats. You can see their code in [`unblob/handlers/`](https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/__init__.py). -✅: Some or all metadata is preserved for the format. +✅: Some or all metadata is preserved for the format. ❌: Metadata is not preserved (limitation of the format). ## Archives @@ -22,6 +22,7 @@ unblob supports more than 30 formats. You can see their code in | CAB | ❌ | ❌ | ❌ | [archive/cab.py][cab-handler] | [`7z`][cab-extractor] | | CPIO | ✅ | ✅ | ✅ | [archive/cpio.py][cpio-handler] | unblob extractor | | DMG | ❌ | ❌ | ❌ | [archive/dmg.py][dmg-handler] | [`7z`][dmg-extractor] | +| PARTCLONE | ✅ | ❌ | ❌ | [archive/partclone.py][partclone-handler] | [`partclone`][partclone-extractor] | | RAR | ❌ | ❌ | ❌ | [archive/rar.py][rar-handler] | [`unar`][rar-extractor] | | 7ZIP | ❌ | ❌ | ❌ | [archive/sevenzip.py][7zip-handler] | [`7z`][7zip-extractor] | | StuffIt | ❌ | ❌ | ❌ | [archive/stuffit.py][stuffit-handler] | [`unar`][stuffit-extractor] | @@ -39,6 +40,8 @@ unblob supports more than 30 formats. You can see their code in [cpio-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/cpio.py [dmg-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/dmg.py [dmg-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/dmg.py#L67-L69 +[partclone-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/partclone.py +[partclone-extractor]: https://github.com/onekey-sec/unblob/blob/b21b6dc291583af6b7ec9b7c3d63ee8302328841/python/unblob/handlers/archive/partclone.py#L44 [rar-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/rar.py [rar-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/rar.py#L32 [7zip-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/sevenzip.py @@ -97,7 +100,7 @@ For compression formats, metadata cannot be preserved, as this information in mo | ---------------------- | ---------------------------------- | ----------------------------------------------- | ----------------------------------------------- | | Android sparse image | ❌ | [filesystem/android/sparse.py][android-handler] | [`simg2img`][android-extractor] | | CRAMFS | ✅ | [filesystem/cramfs.py][cramfs-handler] | [`7z`][cramfs-extractor] | -| EROFS | ✅ | [filesystem/android/erofs.py][erofs-handler] | [`fsck.erfos`][erofs-extractor] | +| EROFS | ✅ | [filesystem/android/erofs.py][erofs-handler] | [`fsck.erfos`][erofs-extractor] | | ExtFS | ✅ | [filesystem/extfs.py][extfs-handler] | [`debugfs`][extfs-extractor] | | FAT | ✅ | [filesystem/fat.py][fat-handler] | [`7z`][fat-extractor] | | ISO9660 | ✅ | [filesystem/iso9660.py][iso9660-handler] | [`7z`][iso9660-extractor] | diff --git a/install-deps.sh b/install-deps.sh index 0b819f4593..c5bb9751f2 100755 --- a/install-deps.sh +++ b/install-deps.sh @@ -10,6 +10,7 @@ apt-get install --no-install-recommends -y \ lziprecover \ lzop \ p7zip-full \ + partclone \ unar \ xz-utils \ libmagic1 \ diff --git a/package.nix b/package.nix index 7c5dc60e90..7507bf5cc4 100644 --- a/package.nix +++ b/package.nix @@ -1,5 +1,6 @@ { lib, + stdenv, python3, makeWrapper, e2fsprogs-nofortify, @@ -9,6 +10,7 @@ lziprecover, lzop, p7zip16, + partclone, nix-filter, sasquatch, sasquatch-v4be, @@ -22,7 +24,7 @@ let # These dependencies are only added to PATH - runtimeDeps = [ + runtimeDeps = lib.optional stdenv.isLinux partclone ++ [ e2fsprogs-nofortify erofs-utils jefferson diff --git a/python/unblob/handlers/__init__.py b/python/unblob/handlers/__init__.py index 163dfc9c32..98a0accba6 100644 --- a/python/unblob/handlers/__init__.py +++ b/python/unblob/handlers/__init__.py @@ -6,6 +6,7 @@ cab, cpio, dmg, + partclone, rar, sevenzip, stuffit, @@ -119,6 +120,7 @@ ecc.AutelECCHandler, uzip.UZIPHandler, erofs.EROFSHandler, + partclone.PartcloneHandler, ) BUILTIN_DIR_HANDLERS: DirectoryHandlers = ( diff --git a/python/unblob/handlers/archive/partclone.py b/python/unblob/handlers/archive/partclone.py new file mode 100644 index 0000000000..58af530386 --- /dev/null +++ b/python/unblob/handlers/archive/partclone.py @@ -0,0 +1,81 @@ +import binascii +import io +from math import ceil +from typing import Optional + +from unblob.extractors import Command +from unblob.file_utils import File, InvalidInputFormat, get_endian_short +from unblob.models import Regex, StructHandler, ValidChunk + +C_DEFINITIONS = r""" + typedef struct partclone_header{ + char magic[16]; + char partclone_version[14]; + char image_version_txt[4]; + char endian[2]; + char fs_type[16]; + uint64 fs_size; + uint64 fs_total_block_count; + uint64 fs_used_block_count_superblock; + uint64 fs_used_block_count_bitmap; + uint32 fs_block_size; + uint32 feature_size; + uint16 image_version; + uint16 number_of_bits_for_CPU; + uint16 checksum_mode; + uint16 checksum_size; + uint32 blocks_per_checksum; + uint8 reseed_checksum; + uint8 bitmap_mode; + uint32 crc32; + } partclone_header_t; +""" + +HEADER_STRUCT = "partclone_header_t" +BIG_ENDIAN_MAGIC = 0xC0DE +ENDIAN_OFFSET = 34 + + +class PartcloneHandler(StructHandler): + NAME = "partclone" + PATTERNS = [Regex(r"partclone-image\x00\d+\.\d+\.\d+.*?0002(\xde\xc0|\xc0\xde)")] + HEADER_STRUCT = HEADER_STRUCT + C_DEFINITIONS = C_DEFINITIONS + EXTRACTOR = Command( + "partclone.restore", + "-W", + "-s", + "{inpath}", + "-o", + "{outdir}/partclone.restored", + "-L", + "/dev/stdout", + ) + + def is_valid_header(self, header) -> bool: + calculated_crc = binascii.crc32(header.dumps()[0:-4]) + return ( + header.crc32 ^ 0xFFFFFFFF + ) == calculated_crc # partclone does not final XOR + + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: + file.seek(start_offset + ENDIAN_OFFSET, io.SEEK_SET) # go to endian + endian = get_endian_short(file, BIG_ENDIAN_MAGIC) + file.seek(start_offset, io.SEEK_SET) # go to beginning of file + header = self.parse_header(file, endian) + + if not self.is_valid_header(header): + raise InvalidInputFormat("Invalid partclone header.") + + end_offset = start_offset + len(header) # header + end_offset += header.checksum_size # checksum size + end_offset += ceil(header.fs_total_block_count / 8) # bitmap, as bytes + + if header.checksum_mode != 0: + checksum_blocks = ceil( + header.fs_used_block_count_bitmap / header.blocks_per_checksum + ) + end_offset += checksum_blocks * header.checksum_size + + end_offset += header.fs_used_block_count_bitmap * header.fs_block_size # Data + return ValidChunk(start_offset=start_offset, end_offset=end_offset) diff --git a/tests/integration/archive/partclone/__input__/floppy-144m.img b/tests/integration/archive/partclone/__input__/floppy-144m.img new file mode 100755 index 0000000000..4a01bc782e --- /dev/null +++ b/tests/integration/archive/partclone/__input__/floppy-144m.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e63b5b8ec0ab6dfc4a4254d72e26b8c1b7ee8b6ceb61fe67bea1105b0d60156 +size 69930 diff --git a/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img new file mode 100755 index 0000000000..657491385b --- /dev/null +++ b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8fb4fbc359454b017521504eddf0e2955c5808280337b73ad9f897a5f501285 +size 40123 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored new file mode 100644 index 0000000000..e21579b62c --- /dev/null +++ b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be413ccd078c706d4f7dd64d4e29fe917fd188f22202becf906b0b79aa9d645 +size 1474560 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored new file mode 100644 index 0000000000..c149e9f285 --- /dev/null +++ b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557ad6d9db9ea8ed1f749d8da063d661c78951e318f3d5f23e517b8b93a205d6 +size 565248 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 From 01d739adab74d90a22882e02ccc02a8b7aeba44a Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Thu, 8 May 2025 13:34:05 +0200 Subject: [PATCH 3/3] fix(tests): skip partclone handler test on darwin partclone is not available on Darwin (OSX), so we need to skip that test --- tests/test_handlers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index acc764b190..e8d5e0f3bb 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -8,6 +8,7 @@ """ import inspect +import sys from pathlib import Path import pytest @@ -35,6 +36,12 @@ def test_all_handlers( extraction_config: ExtractionConfig, request: pytest.FixtureRequest, ): + handler_name = input_dir.parent.name + if (sys.platform, handler_name) == ("darwin", "partclone"): + pytest.skip( + f"Handler '{handler_name}' not supported on platform '{sys.platform}'" + ) + log_path = Path("/dev/null") # no logging report_file = None # no reporting