From f7f32fa7c8bb4894d095b38402cc91af2cecff2c Mon Sep 17 00:00:00 2001 From: Antoine Pecoraro Date: Mon, 3 Apr 2023 14:01:25 +0200 Subject: [PATCH] feat(handler): Add support for HP IPKG format The IPKG files are childs of a BDL file. The header contains some informations such as name, version, revision, etc of the IPKG package. After that header is a table of content that has the name, the offset, the size and a crc32 of each file it contains. This repo https://github.com/tylerwhall/hpbdl contains information that was useful --- .../archive/hp/ipkg/__input__/sample.ipkg | 3 + .../__output__/sample.ipkg_extract/sample.txt | 3 + unblob/handlers/__init__.py | 3 +- unblob/handlers/archive/hp/ipkg.py | 112 ++++++++++++++++++ 4 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 tests/integration/archive/hp/ipkg/__input__/sample.ipkg create mode 100644 tests/integration/archive/hp/ipkg/__output__/sample.ipkg_extract/sample.txt create mode 100644 unblob/handlers/archive/hp/ipkg.py diff --git a/tests/integration/archive/hp/ipkg/__input__/sample.ipkg b/tests/integration/archive/hp/ipkg/__input__/sample.ipkg new file mode 100644 index 0000000000..1b2c0e272d --- /dev/null +++ b/tests/integration/archive/hp/ipkg/__input__/sample.ipkg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c99dd4e215b508395aed42ebfee1e87d64b6bb5551c165934fe1a759e40093ee +size 1345 diff --git a/tests/integration/archive/hp/ipkg/__output__/sample.ipkg_extract/sample.txt b/tests/integration/archive/hp/ipkg/__output__/sample.ipkg_extract/sample.txt new file mode 100644 index 0000000000..0df0e9aa42 --- /dev/null +++ b/tests/integration/archive/hp/ipkg/__output__/sample.ipkg_extract/sample.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6478ea19e2122817e1986151847d3590e114ffd7c5d65d8d91cccea316c1cf2 +size 17 diff --git a/unblob/handlers/__init__.py b/unblob/handlers/__init__.py index 35479a86bc..f7472486a8 100644 --- a/unblob/handlers/__init__.py +++ b/unblob/handlers/__init__.py @@ -2,7 +2,7 @@ from .archive import ar, arc, arj, cab, cpio, dmg, rar, sevenzip, stuffit, tar, zip from .archive.dlink import encrpted_img, shrs from .archive.engeniustech import engenius -from .archive.hp import bdl +from .archive.hp import bdl, ipkg from .archive.instar import bneg from .archive.netgear import chk, trx from .archive.qnap import qnap_nas @@ -64,6 +64,7 @@ qnap_nas.QnapHandler, bneg.BNEGHandler, bdl.HPBDLHandler, + ipkg.HPIPKGHandler, sparse.SparseHandler, ar.ARHandler, arc.ARCHandler, diff --git a/unblob/handlers/archive/hp/ipkg.py b/unblob/handlers/archive/hp/ipkg.py new file mode 100644 index 0000000000..520b999d55 --- /dev/null +++ b/unblob/handlers/archive/hp/ipkg.py @@ -0,0 +1,112 @@ +import io +from pathlib import Path +from typing import Optional + +from dissect.cstruct import Instance +from structlog import get_logger + +from unblob.extractor import carve_chunk_to_file, is_safe_path +from unblob.file_utils import Endian, File, InvalidInputFormat, StructParser, snull +from unblob.models import Chunk, Extractor, HexString, StructHandler, ValidChunk + +logger = get_logger() + +C_DEFINITIONS = r""" + typedef struct ipkg_file_entry { + char name[256]; + uint64 offset; + uint64 size; + uint32 crc32; + } ipkg_toc_entry_t; + + typedef struct ipkg_header { + char magic[4]; + uint16 major; + uint16 minor; + uint32 toc_offset; + uint32 unknown_1; + uint32 toc_entries; + uint32 unknown_2[2]; + uint32 always_null; + char file_version[256]; + char product_name[256]; + char ipkg_name[256]; + char signature[256]; + } ipkg_header_t; +""" + + +def is_valid_header(header: Instance) -> bool: + if header.toc_offset == 0 or header.toc_entries == 0: + return False + try: + snull(header.ipkg_name).decode("utf-8") + snull(header.file_version).decode("utf-8") + snull(header.product_name).decode("utf-8") + except UnicodeDecodeError: + return False + return True + + +class HPIPKGExtractor(Extractor): + def __init__(self): + self._struct_parser = StructParser(C_DEFINITIONS) + + def extract(self, inpath: Path, outdir: Path): + entries = [] + with File.from_path(inpath) as file: + header = self._struct_parser.parse("ipkg_header_t", file, Endian.LITTLE) + file.seek(header.toc_offset, io.SEEK_SET) + for _ in range(header.toc_entries): + entry = self._struct_parser.parse( + "ipkg_toc_entry_t", file, Endian.LITTLE + ) + entry_path = Path(snull(entry.name).decode("utf-8")) + if entry_path.parent.name: + raise InvalidInputFormat("Entry name contains directories.") + if not is_safe_path(outdir, entry_path): + logger.warning( + "Path traversal attempt, discarding.", + outdir=outdir, + ) + continue + entries.append( + ( + outdir.joinpath(outdir / entry_path.name), + Chunk( + start_offset=entry.offset, + end_offset=entry.offset + entry.size, + ), + ) + ) + + for carve_path, chunk in entries: + carve_chunk_to_file( + file=file, + chunk=chunk, + carve_path=carve_path, + ) + + +class HPIPKGHandler(StructHandler): + NAME = "ipkg" + + PATTERNS = [HexString("69 70 6B 67 01 00 03 00")] + + C_DEFINITIONS = C_DEFINITIONS + HEADER_STRUCT = "ipkg_header_t" + EXTRACTOR = HPIPKGExtractor() + + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: + header = self.parse_header(file, endian=Endian.LITTLE) + + if not is_valid_header(header): + raise InvalidInputFormat("Invalid IPKG header.") + + file.seek(start_offset + header.toc_offset, io.SEEK_SET) + end_offset = -1 + for _ in range(header.toc_entries): + entry = self._struct_parser.parse("ipkg_toc_entry_t", file, Endian.LITTLE) + end_offset = max(end_offset, start_offset + entry.offset + entry.size) + + return ValidChunk(start_offset=start_offset, end_offset=end_offset)