Skip to content

feat(handler): add partclone handler #1155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/formats.md
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ unblob supports more than 30 formats. You can see their code in
| CAB |||| [archive/cab.py][cab-handler] | [`7z`][cab-extractor] |
| CPIO |||| [archive/cpio.py][cpio-handler] | unblob extractor |
| DMG |||| [archive/dmg.py][dmg-handler] | [`7z`][dmg-extractor] |
| PARTCLONE |||| [archive/partclone.py][partclone-hanlder] | [`partclone`][partclone-extractor] |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo in partclone-hanlder -> partclone-handler (as a result the reference was also broken).

| RAR |||| [archive/rar.py][rar-handler] | [`unar`][rar-extractor] |
| 7ZIP |||| [archive/sevenzip.py][7zip-handler] | [`7z`][7zip-extractor] |
| StuffIt |||| [archive/stuffit.py][stuffit-handler] | [`unar`][stuffit-extractor] |
@@ -39,6 +40,8 @@ unblob supports more than 30 formats. You can see their code in
[cpio-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/cpio.py
[dmg-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/dmg.py
[dmg-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/dmg.py#L67-L69
[partclone-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/partclone.py
[partclone-extractor]: https://github.com/onekey-sec/unblob/blob/b21b6dc291583af6b7ec9b7c3d63ee8302328841/python/unblob/handlers/archive/partclone.py#L44
[rar-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/rar.py
[rar-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/rar.py#L32
[7zip-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/sevenzip.py
1 change: 1 addition & 0 deletions install-deps.sh
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@ apt-get install --no-install-recommends -y \
lziprecover \
lzop \
p7zip-full \
partclone \
unar \
xz-utils \
libmagic1 \
4 changes: 3 additions & 1 deletion package.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
lib,
stdenv,
python3,
makeWrapper,
e2fsprogs-nofortify,
@@ -9,6 +10,7 @@
lziprecover,
lzop,
p7zip16,
partclone,
nix-filter,
sasquatch,
sasquatch-v4be,
@@ -22,7 +24,7 @@

let
# These dependencies are only added to PATH
runtimeDeps = [
runtimeDeps = lib.optional stdenv.isLinux partclone ++ [
e2fsprogs-nofortify
erofs-utils
jefferson
24 changes: 16 additions & 8 deletions python/unblob/file_utils.py
Original file line number Diff line number Diff line change
@@ -355,17 +355,25 @@ def parse(
return struct_parser(file)


def get_endian(file: File, big_endian_magic: int) -> Endian:
"""Read a four bytes magic and derive endianness from it.
def get_endian(
file: File, big_endian_magic: int, endian_len: Literal[2] | Literal[4] = 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Failing the type check for earlier python versions, Literal[2, 4] should work, and is even shorter.

) -> Endian:
"""Read a two or four bytes magic and derive endianness from it.

It compares the read data with the big endian magic. It reads
four bytes and seeks back after that.
It compares the read data with the big endian magic and then seeks back
the amount of read bytes.
"""
if big_endian_magic > 0xFF_FF_FF_FF:
raise ValueError("big_endian_magic is larger than a 32 bit integer.")
magic_bytes = file.read(4)
if big_endian_magic > (1 << (endian_len * 8)) - 1:
raise ValueError(
f"big_endian_magic is larger than a {endian_len * 8} bit integer."
)
magic_bytes = file.read(endian_len)
file.seek(-len(magic_bytes), io.SEEK_CUR)
magic = convert_int32(magic_bytes, Endian.BIG)
magic = (
convert_int32(magic_bytes, Endian.BIG)
if endian_len == 4
else convert_int16(magic_bytes, Endian.BIG)
)
return Endian.BIG if magic == big_endian_magic else Endian.LITTLE


2 changes: 2 additions & 0 deletions python/unblob/handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
cab,
cpio,
dmg,
partclone,
rar,
sevenzip,
stuffit,
@@ -119,6 +120,7 @@
ecc.AutelECCHandler,
uzip.UZIPHandler,
erofs.EROFSHandler,
partclone.PartcloneHandler,
)

BUILTIN_DIR_HANDLERS: DirectoryHandlers = (
81 changes: 81 additions & 0 deletions python/unblob/handlers/archive/partclone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import binascii
import io
from math import ceil
from typing import Optional

from unblob.extractors import Command
from unblob.file_utils import File, InvalidInputFormat, get_endian
from unblob.models import Regex, StructHandler, ValidChunk

C_DEFINITIONS = r"""
typedef struct partclone_header{
char magic[16];
char partclone_version[14];
char image_version_txt[4];
char endian[2];
char fs_type[16];
uint64 fs_size;
uint64 fs_total_block_count;
uint64 fs_used_block_count_superblock;
uint64 fs_used_block_count_bitmap;
uint32 fs_block_size;
uint32 feature_size;
uint16 image_version;
uint16 number_of_bits_for_CPU;
uint16 checksum_mode;
uint16 checksum_size;
uint32 blocks_per_checksum;
uint8 reseed_checksum;
uint8 bitmap_mode;
uint32 crc32;
} partclone_header_t;
"""

HEADER_STRUCT = "partclone_header_t"
BIG_ENDIAN_MAGIC = 0xC0DE
ENDIAN_OFFSET = 34


class PartcloneHandler(StructHandler):
NAME = "partclone"
PATTERNS = [Regex(r"partclone-image\x00\d+\.\d+\.\d+.*?0002(\xde\xc0|\xc0\xde)")]
HEADER_STRUCT = HEADER_STRUCT
C_DEFINITIONS = C_DEFINITIONS
EXTRACTOR = Command(
"partclone.restore",
"-W",
"-s",
"{inpath}",
"-o",
"{outdir}/partclone.restored",
"-L",
"/dev/stdout",
)

def is_valid_header(self, header) -> bool:
calculated_crc = binascii.crc32(header.dumps()[0:-4])
return (
header.crc32 ^ 0xFFFFFFFF
) == calculated_crc # partclone does not final XOR

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
file.seek(start_offset + ENDIAN_OFFSET, io.SEEK_SET) # go to endian
endian = get_endian(file, BIG_ENDIAN_MAGIC, endian_len=2)
file.seek(start_offset, io.SEEK_SET) # go to beginning of file
header = self.parse_header(file, endian)

if not self.is_valid_header(header):
raise InvalidInputFormat("Invalid partclone header.")

end_offset = start_offset + len(header) # header
end_offset += header.checksum_size # checksum size
end_offset += ceil(header.fs_total_block_count / 8) # bitmap, as bytes

if header.checksum_mode != 0:
checksum_blocks = ceil(
header.fs_used_block_count_bitmap / header.blocks_per_checksum
)
end_offset += checksum_blocks * header.checksum_size

end_offset += header.fs_used_block_count_bitmap * header.fs_block_size # Data
return ValidChunk(start_offset=start_offset, end_offset=end_offset)
3 changes: 3 additions & 0 deletions tests/integration/archive/partclone/__input__/floppy-144m.img
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
43 changes: 33 additions & 10 deletions tests/test_file_utils.py
Original file line number Diff line number Diff line change
@@ -345,29 +345,49 @@ class TestGetEndian:
"content, big_endian_magic, expected",
[
pytest.param(
b"\xff\x00\x00\x10", 0x100000FF, Endian.LITTLE, id="valid_little_endian"
b"\xff\x00\x00\x10",
0x100000FF,
Endian.LITTLE,
id="valid_little_endian_4",
),
pytest.param(
b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian"
b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian_4"
),
pytest.param(
b"\xff\x00", 0x00FF, Endian.LITTLE, id="valid_little_endian_2"
),
pytest.param(b"\x10\x00", 0x1000, Endian.BIG, id="valid_big_endian_2"),
],
)
def test_get_endian(self, content: bytes, big_endian_magic: int, expected: Endian):
file = File.from_bytes(content)
assert get_endian(file, big_endian_magic) == expected
assert get_endian(file, big_endian_magic, endian_len=len(content)) == expected
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is failing the type check, as it is not known to the type checker if len(content) is surely one of 2 and 4.

This test needs to be split into 2, to test, one of magic length 2, and for magic length 4.

(Which also shows that get_endian() should have a new version instead of being extended, keeping the original and defining a new one for 2 byte magic)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about leaving the original get_endian() as is, and introducing a new get_endian_short()?

I prefer 2 smaller function, than one bigger, which is also harder to use/understand.


@pytest.mark.parametrize(
"content, big_endian_magic",
"content, big_endian_magic, endian_len, msg",
[
pytest.param(b"\x00\x00\x00\x01", 0xFF_FF_FF_FF_FF, id="larger_than_32bit"),
pytest.param(
b"\x00\x00\x00\x01",
0xFF_FF_FF_FF_FF,
4,
"big_endian_magic is larger than a 32 bit integer",
id="larger_than_32bit",
),
pytest.param(
b"\x00\x00\x00\x01",
0xFF_FF_FF_FF_FF,
2,
"big_endian_magic is larger than a 16 bit integer",
id="larger_than_16bit",
),
],
)
def test_get_endian_errors(self, content: bytes, big_endian_magic: int):
def test_get_endian_errors(
self, content: bytes, big_endian_magic: int, endian_len: int, msg: str
):
file = File.from_bytes(content)
with pytest.raises(
ValueError, match="big_endian_magic is larger than a 32 bit integer"
):
get_endian(file, big_endian_magic)
with pytest.raises(ValueError, match=msg):
get_endian(file, big_endian_magic, endian_len)

def test_get_endian_resets_the_file_pointer(self):
file = File.from_bytes(bytes.fromhex("FFFF 0000"))
@@ -376,6 +396,9 @@ def test_get_endian_resets_the_file_pointer(self):
with pytest.raises(InvalidInputFormat):
get_endian(file, 0xFFFF_0000)
assert file.tell() == pos
with pytest.raises(InvalidInputFormat):
get_endian(file, 0xFFFF)
assert file.tell() == pos


@pytest.mark.parametrize(
7 changes: 7 additions & 0 deletions tests/test_handlers.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
"""

import inspect
import sys
from pathlib import Path

import pytest
@@ -35,6 +36,12 @@ def test_all_handlers(
extraction_config: ExtractionConfig,
request: pytest.FixtureRequest,
):
handler_name = input_dir.parent.name
if (sys.platform, handler_name) == ("darwin", "partclone"):
pytest.skip(
f"Handler '{handler_name}' not supported on platform '{sys.platform}'"
)

log_path = Path("/dev/null") # no logging
report_file = None # no reporting

Loading