From 3c3cafba9bf3193d1a83252bfbeaecfaa5c831fc Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Sun, 24 Dec 2023 11:07:48 +0100 Subject: [PATCH] fix(cli): add ability to extend default skip magic rather than overwrite it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unblob has a decent default skip magic list that gets overwritten if a user provides its own, which means unblob users need to redefine all of unblob's default skip magic through the CLI whenever they provide their own. Changed the logic so that user provided skip magic values are simply appended to unblob's default list unless the user explicitly provides the "--clear-skip-magics" flag. Co-authored-by: KrisztiƔn Fekete <1246751+e3krisztian@users.noreply.github.com> --- tests/test_cli.py | 57 ++++++++++++++++++++++++++++++++++++++++++-- unblob/cli.py | 20 +++++++++++++--- unblob/processing.py | 38 ++++++++++++++--------------- 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index e38c58486b..720015859a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Optional, Type +from typing import Iterable, List, Optional, Type from unittest import mock import pytest @@ -10,7 +10,12 @@ from unblob.extractors.command import MultiFileCommand from unblob.handlers import BUILTIN_HANDLERS from unblob.models import DirectoryHandler, Glob, Handler, HexString, MultiFile -from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, ExtractionConfig +from unblob.processing import ( + DEFAULT_DEPTH, + DEFAULT_PROCESS_NUM, + DEFAULT_SKIP_MAGIC, + ExtractionConfig, +) from unblob.ui import ( NullProgressReporter, ProgressReporter, @@ -367,3 +372,51 @@ def test_skip_extraction( assert ( process_file_mock.call_args.args[0].skip_extraction == skip_extraction ), fail_message + + +@pytest.mark.parametrize( + "args, skip_magic, fail_message", + [ + ([], DEFAULT_SKIP_MAGIC, "Should have kept default skip magics"), + ( + ["--skip-magic", "SUPERMAGIC"], + (*DEFAULT_SKIP_MAGIC, "SUPERMAGIC"), + "Should have kept default skip magics", + ), + (["--clear-skip-magics"], [], "Should have cleared default skip magics"), + ( + ["--clear-skip-magics", "--skip-magic", "SUPERMAGIC"], + ["SUPERMAGIC"], + "Should have cleared default skip magics", + ), + ( + ["--clear-skip-magics", "--skip-magic", DEFAULT_SKIP_MAGIC[1]], + [DEFAULT_SKIP_MAGIC[1]], + "Should allow user specified and remove the rest", + ), + ], +) +def test_clear_skip_magics( + args: List[str], skip_magic: Iterable[str], fail_message: str, tmp_path: Path +): + runner = CliRunner() + in_path = ( + Path(__file__).parent + / "integration" + / "archive" + / "zip" + / "regular" + / "__input__" + / "apple.zip" + ) + params = [*args, "--extract-dir", str(tmp_path), str(in_path)] + + process_file_mock = mock.MagicMock() + with mock.patch.object(unblob.cli, "process_file", process_file_mock): + result = runner.invoke(unblob.cli.cli, params) + + assert result.exit_code == 0 + process_file_mock.assert_called_once() + assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted( + skip_magic + ), fail_message diff --git a/unblob/cli.py b/unblob/cli.py index 9a117c86e1..fd65675052 100755 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -168,9 +168,11 @@ def __init__( "--skip-magic", "skip_magic", type=click.STRING, - default=DEFAULT_SKIP_MAGIC, - help="Skip processing files with given magic prefix", - show_default=True, + help=f"""Skip processing files with given magic prefix. + The provided values are appended to unblob's own skip magic list unless + --clear-skip-magic is provided. + [default: {', '.join(DEFAULT_SKIP_MAGIC)}] + """, multiple=True, ) @click.option( @@ -182,6 +184,14 @@ def __init__( show_default=True, multiple=True, ) +@click.option( + "--clear-skip-magics", + "clear_skip_magics", + is_flag=True, + show_default=True, + default=False, + help="Clear unblob's own skip magic list.", +) @click.option( "-p", "--process-num", @@ -246,6 +256,7 @@ def cli( entropy_depth: int, skip_magic: Iterable[str], skip_extension: Iterable[str], + clear_skip_magics: bool, # noqa: FBT001 skip_extraction: bool, # noqa: FBT001 keep_extracted_chunks: bool, # noqa: FBT001 handlers: Handlers, @@ -263,6 +274,9 @@ def cli( extra_dir_handlers = plugin_manager.load_dir_handlers_from_plugins() dir_handlers += tuple(extra_dir_handlers) + extra_magics_to_skip = () if clear_skip_magics else DEFAULT_SKIP_MAGIC + skip_magic = tuple(sorted(set(skip_magic).union(extra_magics_to_skip))) + config = ExtractionConfig( extract_root=extract_root, force_extract=force, diff --git a/unblob/processing.py b/unblob/processing.py index fd38c8fd2c..83eb9dc64d 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -52,31 +52,31 @@ DEFAULT_PROCESS_NUM = multiprocessing.cpu_count() DEFAULT_SKIP_MAGIC = ( "BFLT", - "JPEG", + "Composite Document File V2 Document", + "Erlang BEAM file", "GIF", - "PNG", - "SQLite", - "compiled Java class", - "TrueType Font data", - "PDF document", - "magic binary file", - "MS Windows icon resource", - "Web Open Font Format", "GNU message catalog", - "Xilinx BIT data", + "HP Printer Job Language", + "Intel serial flash for PCH ROM", + "JPEG", + "MPEG", + "MS Windows icon resource", + "Macromedia Flash data", "Microsoft Excel", - "Microsoft Word", - "Microsoft PowerPoint", "Microsoft OOXML", + "Microsoft PowerPoint", + "Microsoft Word", "OpenDocument", - "Macromedia Flash data", - "MPEG", - "HP Printer Job Language", - "Erlang BEAM file", - "python", # (e.g. python 2.7 byte-compiled) - "Composite Document File V2 Document", + "PDF document", + "PNG", + "SQLite", + "TrueType Font data", + "Web Open Font Format", "Windows Embedded CE binary image", - "Intel serial flash for PCH ROM", + "Xilinx BIT data", + "compiled Java class", + "magic binary file", + "python", # # (e.g. python 2.7 byte-compiled) ) DEFAULT_SKIP_EXTENSION = (".rlib",)