-
Notifications
You must be signed in to change notification settings - Fork 75
Add support for --direct access for cim plugins #1437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 28 commits
b5bcc7c
df955f1
c36698b
f183164
340ce17
1000f34
db71419
28b5a4a
5ea70ae
1afdbb9
25b8d17
4d60ab1
6bc3c16
715c249
1e6a00d
6b076a4
5bc1b42
076f512
21a6aee
4ed0bcd
867dbc0
8140f64
88cfc2f
10f348b
9f2a012
880e20d
4710aba
808e028
f1e01c4
310632a
48719bd
f413c9a
8b2fd4a
f2d7ab1
0a1efce
5815c12
c674034
c6033b7
745e5d9
e9386bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -1,31 +1,95 @@ | ||||
| from __future__ import annotations | ||||
|
|
||||
| import sys | ||||
| from itertools import chain | ||||
| from pathlib import Path | ||||
| from typing import TYPE_CHECKING | ||||
|
|
||||
| from dissect.target.filesystem import VirtualFilesystem | ||||
| from dissect.target.helpers.logging import get_logger | ||||
| from dissect.target.loader import Loader | ||||
| from dissect.target.plugins.os.default._os import DefaultOSPlugin | ||||
|
|
||||
| if TYPE_CHECKING: | ||||
| from collections.abc import Iterator | ||||
|
|
||||
| from dissect.target.target import Target | ||||
|
|
||||
| log = get_logger(__name__) | ||||
|
|
||||
|
|
||||
| class DirectLoader(Loader): | ||||
| def __init__(self, paths: list[str | Path]): | ||||
| def __init__(self, paths: list[str | Path], case_sensitive: bool = False): | ||||
| self.case_sensitive = case_sensitive | ||||
| self.paths = [(Path(path) if not isinstance(path, Path) else path).resolve() for path in paths] | ||||
|
|
||||
| @staticmethod | ||||
| def detect(path: Path) -> bool: | ||||
| return False | ||||
|
|
||||
| def map(self, target: Target) -> None: | ||||
| vfs = VirtualFilesystem() | ||||
| if not self.case_sensitive and self.check_case_insensitive_overlap(): | ||||
| log.warning( | ||||
| "Direct mode used in case insensitive mode, but this will cause files overlap, " | ||||
| "consider using --direct-sensitive" | ||||
| ) | ||||
| vfs = VirtualFilesystem(case_sensitive=self.case_sensitive) | ||||
| for path in self.paths: | ||||
| if path.is_file(): | ||||
| vfs.map_file(str(path), str(path)) | ||||
| vfs.map_file(str(path), path) | ||||
| elif path.is_dir(): | ||||
| vfs.map_dir(str(path), str(path)) | ||||
| vfs.map_dir(str(path), path) | ||||
|
|
||||
| target.filesystems.add(vfs) | ||||
| target._os_plugin = DefaultOSPlugin | ||||
|
|
||||
| def check_case_insensitive_overlap(self) -> bool: | ||||
| """Verify if two differents files will have the same path in a case-insensitive fs""" | ||||
william-billaud marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| if sys.version_info >= (3, 12) or sys.platform != "win32": | ||||
|
|
||||
| def get_files(path: Path) -> Iterator[Path]: | ||||
| """Return list of all files recursively,""" | ||||
william-billaud marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| if not path.exists(): | ||||
| return | ||||
| if path.is_file(): | ||||
| yield path | ||||
| # Recursively find all files in the directory | ||||
| yield from path.rglob("*") | ||||
| else: | ||||
|
|
||||
| def get_files(path: Path, max_depth: int = 7) -> Iterator[Path]: | ||||
| """ | ||||
william-billaud marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| rglob seems to have issue on windows with python <3.12 when working on a case sensitive FS. Thus | ||||
| we use another implemntation without using rglob. | ||||
| Probably related to https://github.com/python/cpython/issues/94537 | ||||
|
|
||||
| :param path: | ||||
| :param max_depth: | ||||
| :return: | ||||
| """ | ||||
| if max_depth == 0: | ||||
| return | ||||
| if not path.exists(): | ||||
| return | ||||
| if path.is_file(): | ||||
| yield path | ||||
| for f in path.iterdir(): | ||||
| if f.is_dir(): | ||||
| yield from get_files(f, max_depth=max_depth - 1) | ||||
| else: | ||||
| yield f | ||||
|
|
||||
| # Create a flat list of all file paths from all input directories | ||||
| all_paths = chain.from_iterable(get_files(p) for p in self.paths) | ||||
| # Filter out directories, keeping only files | ||||
| all_files = [p for p in all_paths if p.is_file()] | ||||
| # Compare the count of all files with the count of unique, lowercased file paths | ||||
|
|
||||
| return len({str(p).lower() for p in all_files}) != len(all_files) | ||||
|
|
||||
| def __repr__(self) -> str: | ||||
william-billaud marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| """ | ||||
| As DirectLoader does not call super().__init__() self.path is not defined, we need to redefine the __repr__ func | ||||
|
||||
| def __init__( |
Calling super().init(path=paths) won't work. We can still call super().init(path=paths[0]) but it's look like more like a hack to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
with super.init()
uv run --python 3.10 --refresh --extra dev --extra full target-query --direct -f example_namespace o
Traceback (most recent call last):
File "/home/myusername/Documents/tools/github/dissect.target/.venv/bin/target-query", line 10, in <module>
sys.exit(main())
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 477, in wrapper
return func(*args, **kwargs)
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/query.py", line 153, in main
for target in open_targets(args):
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 246, in open_targets
[Target.open_direct(args.targets, getattr(args, "direct_sensitive", False))]
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/target.py", line 454, in open_direct
return cls._load("direct", DirectLoader(paths, case_sensitive))
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/loaders/direct.py", line 23, in __init__
super().__init__()
TypeError: Loader.__init__() missing 1 required positional argument: 'path'
with super.init(paths)
uv run --python 3.10 --refresh --extra dev --extra full target-query --direct -f example_namespace o
Traceback (most recent call last):
File "/home/myusername/Documents/tools/github/dissect.target/.venv/bin/target-query", line 10, in <module>
sys.exit(main())
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 477, in wrapper
return func(*args, **kwargs)
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/query.py", line 153, in main
for target in open_targets(args):
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 246, in open_targets
[Target.open_direct(args.targets, getattr(args, "direct_sensitive", False))]
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/target.py", line 454, in open_direct
return cls._load("direct", DirectLoader(paths, case_sensitive))
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/loaders/direct.py", line 23, in __init__
super().__init__(paths)
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/loader.py", line 79, in __init__
self.base_path = self.absolute_path.parent
AttributeError: 'list' object has no attribute 'parent'
with super().init(paths[0])
uv run --python 3.10 --refresh --extra dev --extra full target-query --direct -f example_namespace o
Traceback (most recent call last):
File "/home/myusername/Documents/tools/github/dissect.target/.venv/bin/target-query", line 10, in <module>
sys.exit(main())
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 477, in wrapper
return func(*args, **kwargs)
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/query.py", line 153, in main
for target in open_targets(args):
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/tools/utils/cli.py", line 246, in open_targets
[Target.open_direct(args.targets, getattr(args, "direct_sensitive", False))]
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/target.py", line 454, in open_direct
return cls._load("direct", DirectLoader(paths, case_sensitive))
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/loaders/direct.py", line 23, in __init__
super().__init__(paths[0])
File "/home/myusername/Documents/tools/github/dissect.target/dissect/target/loader.py", line 79, in __init__
self.base_path = self.absolute_path.parent
AttributeError: 'str' object has no attribute 'parent'
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah this is possibly a design problem with the DirectLoader class, since in python the constructor is considered to be part of the public-facing contract, while in other languages this is typically not the case. Thus, this situation violates the substitution principle, i.e. the expectation that all subclasses of Loader can be created in the same way.
I think it is out of scope of this PR to tackle this.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import io | ||
william-billaud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| import logging | ||
| import typing | ||
|
|
||
| from dissect.target import Target | ||
| from dissect.target.filesystem import VirtualFilesystem | ||
|
|
||
| if typing.TYPE_CHECKING: | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
|
|
||
|
|
||
| def test_direct_overlap_warning( | ||
| tmp_path: Path, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch | ||
| ) -> None: | ||
| """Assert direct raise warning in case sensitive mode if some files overlap | ||
william-billaud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| We must uncompress files in a temporary directory as having two files with same name | ||
| would cause issue with git on case insensitive fs. | ||
| """ | ||
| source_vfs = VirtualFilesystem(case_sensitive=True) | ||
| source_vfs.map_file_fh("file.txt", io.BytesIO(b"a")) | ||
| source_vfs.map_file_fh("File.txt", io.BytesIO(b"b")) | ||
| with caplog.at_level(logging.WARNING): | ||
| _ = Target.open_direct([source_vfs.path("/")], case_sensitive=True) | ||
| assert ( | ||
| "Direct mode used in case insensitive mode, but this will cause files overlap, " | ||
| "consider using --direct-sensitive" not in caplog.text | ||
| ) | ||
| with caplog.at_level(logging.WARNING): | ||
| _ = Target.open_direct([source_vfs.path("/")], case_sensitive=False) | ||
| assert ( | ||
| "Direct mode used in case insensitive mode, but this will cause files overlap, " | ||
| "consider using --direct-sensitive" in caplog.text | ||
| ) | ||
| with caplog.at_level(logging.WARNING): | ||
| _ = Target.open_direct([source_vfs.path("/file.txt"), source_vfs.path("/File.txt")], case_sensitive=False) | ||
| assert ( | ||
| "Direct mode used in case insensitive mode, but this will cause files overlap, " | ||
| "consider using --direct-sensitive" in caplog.text | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@twiggler regarding
As the path variable (that containes the VFS) was converted to string, the vfs object was lost and vfs.map_file tried to map the string '/' (thus my root fs)