diff --git a/docs/formats.md b/docs/formats.md index 463d68810a..3887c62579 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -22,6 +22,7 @@ unblob supports more than 30 formats. You can see their code in | CAB | ❌ | ❌ | ❌ | [archive/cab.py][cab-handler] | [`7z`][cab-extractor] | | CPIO | ✅ | ✅ | ✅ | [archive/cpio.py][cpio-handler] | unblob extractor | | DMG | ❌ | ❌ | ❌ | [archive/dmg.py][dmg-handler] | [`7z`][dmg-extractor] | +| PARTCLONE | ✅ | ❌ | ❌ | [archive/partclone.py][partclone-hanlder] | [`partclone`][partclone-extractor] | | RAR | ❌ | ❌ | ❌ | [archive/rar.py][rar-handler] | [`unar`][rar-extractor] | | 7ZIP | ❌ | ❌ | ❌ | [archive/sevenzip.py][7zip-handler] | [`7z`][7zip-extractor] | | StuffIt | ❌ | ❌ | ❌ | [archive/stuffit.py][stuffit-handler] | [`unar`][stuffit-extractor] | @@ -39,6 +40,8 @@ unblob supports more than 30 formats. You can see their code in [cpio-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/cpio.py [dmg-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/dmg.py [dmg-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/dmg.py#L67-L69 +[partclone-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/partclone.py +[partclone-extractor]: https://github.com/onekey-sec/unblob/blob/b21b6dc291583af6b7ec9b7c3d63ee8302328841/python/unblob/handlers/archive/partclone.py#L44 [rar-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/rar.py [rar-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/rar.py#L32 [7zip-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/sevenzip.py diff --git a/install-deps.sh b/install-deps.sh index 0b819f4593..c5bb9751f2 100755 --- a/install-deps.sh +++ b/install-deps.sh @@ -10,6 +10,7 @@ apt-get install --no-install-recommends -y \ lziprecover \ lzop \ p7zip-full \ + partclone \ unar \ xz-utils \ libmagic1 \ diff --git a/package.nix b/package.nix index 7c5dc60e90..7507bf5cc4 100644 --- a/package.nix +++ b/package.nix @@ -1,5 +1,6 @@ { lib, + stdenv, python3, makeWrapper, e2fsprogs-nofortify, @@ -9,6 +10,7 @@ lziprecover, lzop, p7zip16, + partclone, nix-filter, sasquatch, sasquatch-v4be, @@ -22,7 +24,7 @@ let # These dependencies are only added to PATH - runtimeDeps = [ + runtimeDeps = lib.optional stdenv.isLinux partclone ++ [ e2fsprogs-nofortify erofs-utils jefferson diff --git a/python/unblob/file_utils.py b/python/unblob/file_utils.py index 1bd9774698..f66529e8cb 100644 --- a/python/unblob/file_utils.py +++ b/python/unblob/file_utils.py @@ -355,17 +355,25 @@ def parse( return struct_parser(file) -def get_endian(file: File, big_endian_magic: int) -> Endian: - """Read a four bytes magic and derive endianness from it. +def get_endian( + file: File, big_endian_magic: int, endian_len: Literal[2] | Literal[4] = 4 +) -> Endian: + """Read a two or four bytes magic and derive endianness from it. - It compares the read data with the big endian magic. It reads - four bytes and seeks back after that. + It compares the read data with the big endian magic and then seeks back + the amount of read bytes. """ - if big_endian_magic > 0xFF_FF_FF_FF: - raise ValueError("big_endian_magic is larger than a 32 bit integer.") - magic_bytes = file.read(4) + if big_endian_magic > (1 << (endian_len * 8)) - 1: + raise ValueError( + f"big_endian_magic is larger than a {endian_len * 8} bit integer." + ) + magic_bytes = file.read(endian_len) file.seek(-len(magic_bytes), io.SEEK_CUR) - magic = convert_int32(magic_bytes, Endian.BIG) + magic = ( + convert_int32(magic_bytes, Endian.BIG) + if endian_len == 4 + else convert_int16(magic_bytes, Endian.BIG) + ) return Endian.BIG if magic == big_endian_magic else Endian.LITTLE diff --git a/python/unblob/handlers/__init__.py b/python/unblob/handlers/__init__.py index 163dfc9c32..98a0accba6 100644 --- a/python/unblob/handlers/__init__.py +++ b/python/unblob/handlers/__init__.py @@ -6,6 +6,7 @@ cab, cpio, dmg, + partclone, rar, sevenzip, stuffit, @@ -119,6 +120,7 @@ ecc.AutelECCHandler, uzip.UZIPHandler, erofs.EROFSHandler, + partclone.PartcloneHandler, ) BUILTIN_DIR_HANDLERS: DirectoryHandlers = ( diff --git a/python/unblob/handlers/archive/partclone.py b/python/unblob/handlers/archive/partclone.py new file mode 100644 index 0000000000..06aaa25f62 --- /dev/null +++ b/python/unblob/handlers/archive/partclone.py @@ -0,0 +1,81 @@ +import binascii +import io +from math import ceil +from typing import Optional + +from unblob.extractors import Command +from unblob.file_utils import File, InvalidInputFormat, get_endian +from unblob.models import Regex, StructHandler, ValidChunk + +C_DEFINITIONS = r""" + typedef struct partclone_header{ + char magic[16]; + char partclone_version[14]; + char image_version_txt[4]; + char endian[2]; + char fs_type[16]; + uint64 fs_size; + uint64 fs_total_block_count; + uint64 fs_used_block_count_superblock; + uint64 fs_used_block_count_bitmap; + uint32 fs_block_size; + uint32 feature_size; + uint16 image_version; + uint16 number_of_bits_for_CPU; + uint16 checksum_mode; + uint16 checksum_size; + uint32 blocks_per_checksum; + uint8 reseed_checksum; + uint8 bitmap_mode; + uint32 crc32; + } partclone_header_t; +""" + +HEADER_STRUCT = "partclone_header_t" +BIG_ENDIAN_MAGIC = 0xC0DE +ENDIAN_OFFSET = 34 + + +class PartcloneHandler(StructHandler): + NAME = "partclone" + PATTERNS = [Regex(r"partclone-image\x00\d+\.\d+\.\d+.*?0002(\xde\xc0|\xc0\xde)")] + HEADER_STRUCT = HEADER_STRUCT + C_DEFINITIONS = C_DEFINITIONS + EXTRACTOR = Command( + "partclone.restore", + "-W", + "-s", + "{inpath}", + "-o", + "{outdir}/partclone.restored", + "-L", + "/dev/stdout", + ) + + def is_valid_header(self, header) -> bool: + calculated_crc = binascii.crc32(header.dumps()[0:-4]) + return ( + header.crc32 ^ 0xFFFFFFFF + ) == calculated_crc # partclone does not final XOR + + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: + file.seek(start_offset + ENDIAN_OFFSET, io.SEEK_SET) # go to endian + endian = get_endian(file, BIG_ENDIAN_MAGIC, endian_len=2) + file.seek(start_offset, io.SEEK_SET) # go to beginning of file + header = self.parse_header(file, endian) + + if not self.is_valid_header(header): + raise InvalidInputFormat("Invalid partclone header.") + + end_offset = start_offset + len(header) # header + end_offset += header.checksum_size # checksum size + end_offset += ceil(header.fs_total_block_count / 8) # bitmap, as bytes + + if header.checksum_mode != 0: + checksum_blocks = ceil( + header.fs_used_block_count_bitmap / header.blocks_per_checksum + ) + end_offset += checksum_blocks * header.checksum_size + + end_offset += header.fs_used_block_count_bitmap * header.fs_block_size # Data + return ValidChunk(start_offset=start_offset, end_offset=end_offset) diff --git a/tests/integration/archive/partclone/__input__/floppy-144m.img b/tests/integration/archive/partclone/__input__/floppy-144m.img new file mode 100755 index 0000000000..4a01bc782e --- /dev/null +++ b/tests/integration/archive/partclone/__input__/floppy-144m.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e63b5b8ec0ab6dfc4a4254d72e26b8c1b7ee8b6ceb61fe67bea1105b0d60156 +size 69930 diff --git a/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img new file mode 100755 index 0000000000..657491385b --- /dev/null +++ b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8fb4fbc359454b017521504eddf0e2955c5808280337b73ad9f897a5f501285 +size 40123 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored new file mode 100644 index 0000000000..e21579b62c --- /dev/null +++ b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be413ccd078c706d4f7dd64d4e29fe917fd188f22202becf906b0b79aa9d645 +size 1474560 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored new file mode 100644 index 0000000000..c149e9f285 --- /dev/null +++ b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557ad6d9db9ea8ed1f749d8da063d661c78951e318f3d5f23e517b8b93a205d6 +size 565248 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 9f85bca530..8fdeb04d73 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -345,29 +345,49 @@ class TestGetEndian: "content, big_endian_magic, expected", [ pytest.param( - b"\xff\x00\x00\x10", 0x100000FF, Endian.LITTLE, id="valid_little_endian" + b"\xff\x00\x00\x10", + 0x100000FF, + Endian.LITTLE, + id="valid_little_endian_4", ), pytest.param( - b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian" + b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian_4" ), + pytest.param( + b"\xff\x00", 0x00FF, Endian.LITTLE, id="valid_little_endian_2" + ), + pytest.param(b"\x10\x00", 0x1000, Endian.BIG, id="valid_big_endian_2"), ], ) def test_get_endian(self, content: bytes, big_endian_magic: int, expected: Endian): file = File.from_bytes(content) - assert get_endian(file, big_endian_magic) == expected + assert get_endian(file, big_endian_magic, endian_len=len(content)) == expected @pytest.mark.parametrize( - "content, big_endian_magic", + "content, big_endian_magic, endian_len, msg", [ - pytest.param(b"\x00\x00\x00\x01", 0xFF_FF_FF_FF_FF, id="larger_than_32bit"), + pytest.param( + b"\x00\x00\x00\x01", + 0xFF_FF_FF_FF_FF, + 4, + "big_endian_magic is larger than a 32 bit integer", + id="larger_than_32bit", + ), + pytest.param( + b"\x00\x00\x00\x01", + 0xFF_FF_FF_FF_FF, + 2, + "big_endian_magic is larger than a 16 bit integer", + id="larger_than_16bit", + ), ], ) - def test_get_endian_errors(self, content: bytes, big_endian_magic: int): + def test_get_endian_errors( + self, content: bytes, big_endian_magic: int, endian_len: int, msg: str + ): file = File.from_bytes(content) - with pytest.raises( - ValueError, match="big_endian_magic is larger than a 32 bit integer" - ): - get_endian(file, big_endian_magic) + with pytest.raises(ValueError, match=msg): + get_endian(file, big_endian_magic, endian_len) def test_get_endian_resets_the_file_pointer(self): file = File.from_bytes(bytes.fromhex("FFFF 0000")) @@ -376,6 +396,9 @@ def test_get_endian_resets_the_file_pointer(self): with pytest.raises(InvalidInputFormat): get_endian(file, 0xFFFF_0000) assert file.tell() == pos + with pytest.raises(InvalidInputFormat): + get_endian(file, 0xFFFF) + assert file.tell() == pos @pytest.mark.parametrize( diff --git a/tests/test_handlers.py b/tests/test_handlers.py index acc764b190..e8d5e0f3bb 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -8,6 +8,7 @@ """ import inspect +import sys from pathlib import Path import pytest @@ -35,6 +36,12 @@ def test_all_handlers( extraction_config: ExtractionConfig, request: pytest.FixtureRequest, ): + handler_name = input_dir.parent.name + if (sys.platform, handler_name) == ("darwin", "partclone"): + pytest.skip( + f"Handler '{handler_name}' not supported on platform '{sys.platform}'" + ) + log_path = Path("/dev/null") # no logging report_file = None # no reporting