From 11e67dc8c21fb5e61e96cd027e82f0444e34746c Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 2 Jun 2023 11:24:57 +0200 Subject: [PATCH 1/3] main: Add Analysis.sources to JSON output Increase transparency and debug-friendliness by including our Source objects in the --json output. In order to more easily differentiate the various types of Source objects in this output, we want to include the Source subclass name in its JSON representation. This is accomplished by making a proper Source base class that automatically injects a .source_type member which records the subclass. This is then included in the JSON representation. Without this, a CodeSource, DepsSource, and PyEnvSource would be represented like this, and differentiating them would have to be done by looking at incidental members: "sources": [ { "path": "/some/path/code.py", "base_dir": "/some/path", }, { "path": "/some/path/requirements.txt", "parser_choice": "requirements.txt", }, { "path": "/some/path/.venv", }, ] Instead, we make the entries self-documenting, by including the subclass name: "sources": [ { "source_type": "CodeSource", "path": "/some/path/code.py", "base_dir": "/some/path", }, { "source_type": "DepsSource", "path": "/some/path/requirements.txt", "parser_choice": "requirements.txt", }, { "source_type": "PyEnvSource", "path": "/some/path/.venv", }, ], --- fawltydeps/main.py | 4 +++- fawltydeps/types.py | 28 ++++++++++++++++++++------- tests/test_cmdline.py | 45 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/fawltydeps/main.py b/fawltydeps/main.py index 44543a80..7490a039 100644 --- a/fawltydeps/main.py +++ b/fawltydeps/main.py @@ -82,7 +82,7 @@ def __init__(self, settings: Settings, stdin: Optional[TextIO] = None): # The following members are calculated once, on-demand, by the # @property @calculated_once methods below: - self._source: Optional[Set[Source]] = None + self._sources: Optional[Set[Source]] = None self._imports: Optional[List[ParsedImport]] = None self._declared_deps: Optional[List[DeclaredDependency]] = None self._resolved_deps: Optional[Dict[str, Package]] = None @@ -199,6 +199,7 @@ def print_json(self, out: TextIO) -> None: frozenset: partial(sorted, key=str), set: partial(sorted, key=str), type(BasePackageResolver): lambda klass: klass.__name__, + type(Source): lambda klass: klass.__name__, } encoder = partial(custom_pydantic_encoder, custom_type_encoders) json_dict = { @@ -206,6 +207,7 @@ def print_json(self, out: TextIO) -> None: # Using properties with an underscore do not trigger computations. # They are populated only if the computations were already required # by settings.actions. + "sources": self._sources, "imports": self._imports, "declared_deps": self._declared_deps, "resolved_deps": self._resolved_deps, diff --git a/fawltydeps/types.py b/fawltydeps/types.py index 9342bb0c..ade42dd4 100644 --- a/fawltydeps/types.py +++ b/fawltydeps/types.py @@ -1,11 +1,12 @@ """Common types used across FawltyDeps.""" import sys +from abc import ABC from dataclasses import asdict, dataclass, field, replace from enum import Enum from functools import total_ordering from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union from fawltydeps.utils import hide_dataclass_fields @@ -47,7 +48,20 @@ def __str__(self) -> str: @dataclass(frozen=True, eq=True, order=True) -class CodeSource: +class Source(ABC): + """Base class for some source of input to FawltyDeps. + + This exists to inject the class name of the subclass into our JSON output. + """ + + source_type: Type["Source"] = field(init=False) + + def __post_init__(self) -> None: + object.__setattr__(self, "source_type", self.__class__) + + +@dataclass(frozen=True, eq=True, order=True) +class CodeSource(Source): """A Python code source to be parsed for import statements. .path points to the .py or .ipynb file containing Python code, alternatively @@ -63,6 +77,7 @@ class CodeSource: base_dir: Optional[Path] = None def __post_init__(self) -> None: + super().__post_init__() if self.path != "": assert isinstance(self.path, Path) if not self.path.is_file(): @@ -78,7 +93,7 @@ def __post_init__(self) -> None: @dataclass(frozen=True, eq=True, order=True) -class DepsSource: +class DepsSource(Source): """A source to be parsed for declared dependencies. Also include which declared dependencies parser we have chosen to use for @@ -96,11 +111,12 @@ class DepsSource: parser_choice: ParserChoice def __post_init__(self) -> None: + super().__post_init__() assert self.path.is_file() # sanity check @dataclass(frozen=True, eq=True, order=True) -class PyEnvSource: +class PyEnvSource(Source): """A source to be used for looking up installed Python packages. .path points to a directory that directly contains Python packages, e.g. the @@ -114,6 +130,7 @@ class PyEnvSource: path: Path def __post_init__(self) -> None: + super().__post_init__() assert self.path.is_dir() # sanity check # Support vitualenvs, poetry2nix envs, system-wide installs, etc. if self.path.match("lib/python?.*/site-packages"): @@ -126,9 +143,6 @@ def __post_init__(self) -> None: raise ValueError(f"{self.path} is not a valid dir for Python packages!") -Source = Union[CodeSource, DepsSource, PyEnvSource] - - @total_ordering @dataclass(frozen=True) class Location: diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 9fa8b191..40d3a10c 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -7,6 +7,7 @@ import json import logging +import sys from dataclasses import dataclass, field from itertools import dropwhile from textwrap import dedent @@ -45,6 +46,7 @@ def make_json_settings_dict(**kwargs): "verbosity": 0, "custom_mapping_file": [], } + assert all(k in settings for k in kwargs) settings.update(kwargs) return settings @@ -136,6 +138,13 @@ def test_list_imports_json__from_py_file__prints_imports_from_file(write_tmp_fil code=[f"{tmp_path}/myfile.py"], output_format="json", ), + "sources": [ + { + "source_type": "CodeSource", + "path": f"{tmp_path}/myfile.py", + "base_dir": None, + }, + ], "imports": [ { "name": "requests", @@ -298,6 +307,13 @@ def test_list_deps_json__dir__prints_deps_from_requirements_txt(fake_project): "settings": make_json_settings_dict( actions=["list_deps"], deps=[f"{tmp_path}"], output_format="json" ), + "sources": [ + { + "source_type": "DepsSource", + "path": f"{tmp_path}/requirements.txt", + "parser_choice": "requirements.txt", + }, + ], "imports": None, "declared_deps": [ { @@ -516,12 +532,33 @@ def test_check_json__simple_project__can_report_both_undeclared_and_unused( tmp_path = fake_project( imports=["requests"], declared_deps=["pandas"], + fake_venvs={"my_venv": {}}, ) + major, minor = sys.version_info[:2] expect = { "settings": make_json_settings_dict( - code=[f"{tmp_path}"], deps=[f"{tmp_path}"], output_format="json" + code=[f"{tmp_path}"], + deps=[f"{tmp_path}"], + pyenvs=[f"{tmp_path}"], + output_format="json", ), + "sources": [ + { + "source_type": "CodeSource", + "path": f"{tmp_path}/code.py", + "base_dir": f"{tmp_path}", + }, + { + "source_type": "DepsSource", + "path": f"{tmp_path}/requirements.txt", + "parser_choice": "requirements.txt", + }, + { + "source_type": "PyEnvSource", + "path": f"{tmp_path}/my_venv/lib/python{major}.{minor}/site-packages", + }, + ], "imports": [ { "name": "requests", @@ -557,7 +594,11 @@ def test_check_json__simple_project__can_report_both_undeclared_and_unused( "version": version(), } output, returncode = run_fawltydeps_function( - "--check", "--json", f"--code={tmp_path}", f"--deps={tmp_path}" + "--check", + "--json", + f"--code={tmp_path}", + f"--deps={tmp_path}", + f"--pyenv={tmp_path}", ) assert json.loads(output) == expect assert returncode == 3 # --json does not affect exit code From e56e65204ccc6748a4941e9f1d5aa1eb8e147540 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 2 Jun 2023 18:11:59 +0200 Subject: [PATCH 2/3] Add --list-sources CLI argument This allows FawltyDeps to stop early and print the sources that would be used for finding imports and declared dependencies in the project. This can be used to help find the appropriate options to pass to FawltyDeps in order to have it analyze the desired files. The (default) --summary view lists all file paths that are used as sources. The --detailed view also categorizes sources by type, and adds extra information relevant to the type (for CodeSource, which base path is used to recognize 1st-party imports, and for DepsSource, which parser is used to parse declared dependencies). --- README.md | 5 ++ fawltydeps/cli_parser.py | 10 ++++ fawltydeps/main.py | 21 ++++++++ fawltydeps/settings.py | 1 + fawltydeps/types.py | 22 +++++++- tests/test_cmdline.py | 110 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 96d31b5d..0bb9a4d4 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,8 @@ one of these can be used at a time: - `--check-unused`: Report only unused dependencies - `--list-imports`: List third-party imports extracted from the project - `--list-deps`: List declared dependencies extracted from the project +- `--list-sources`: List files/directories from which imports, declared + dependencies and installed packages would be extracted When none of these are specified, the default action is `--check`. @@ -111,6 +113,9 @@ To include both code from stdin (`import foo`) and a file path (`file.py`), use: echo "import foo" | fawltydeps --list-imports --code - file.py ``` +At any time, if you want to see where FawltyDeps is looking for Python code, +you can use the `--list-sources --detailed` options. + #### Where to find declared dependencies The `--deps` option tells FawltyDeps where to look for your project's declared diff --git a/fawltydeps/cli_parser.py b/fawltydeps/cli_parser.py index 4ea49bc8..850a7ef1 100644 --- a/fawltydeps/cli_parser.py +++ b/fawltydeps/cli_parser.py @@ -59,6 +59,16 @@ def populate_parser_actions(parser: argparse._ActionsContainer) -> None: const={Action.REPORT_UNUSED}, help="Report only unused dependencies", ) + parser.add_argument( + "--list-sources", + dest="actions", + action="store_const", + const={Action.LIST_SOURCES}, + help=( + "List input paths used to extract imports, declared dependencies " + "and find installed packages" + ), + ) parser.add_argument( "--list-imports", dest="actions", diff --git a/fawltydeps/main.py b/fawltydeps/main.py index 7490a039..5e767937 100644 --- a/fawltydeps/main.py +++ b/fawltydeps/main.py @@ -99,6 +99,7 @@ def sources(self) -> Set[Source]: """The input sources (code, deps, pyenv) found in this project.""" # What Source types are needed for which action? source_types: Dict[Action, Set[Type[Source]]] = { + Action.LIST_SOURCES: {CodeSource, DepsSource, PyEnvSource}, Action.LIST_IMPORTS: {CodeSource}, Action.LIST_DEPS: {DepsSource}, Action.REPORT_UNDECLARED: {CodeSource, DepsSource, PyEnvSource}, @@ -177,6 +178,8 @@ def create(cls, settings: Settings, stdin: Optional[TextIO] = None) -> "Analysis ret = cls(settings, stdin) # Compute only the properties needed to satisfy settings.actions: + if ret.is_enabled(Action.LIST_SOURCES): + ret.sources # pylint: disable=pointless-statement if ret.is_enabled(Action.LIST_IMPORTS): ret.imports # pylint: disable=pointless-statement if ret.is_enabled(Action.LIST_DEPS): @@ -219,6 +222,24 @@ def print_json(self, out: TextIO) -> None: def print_human_readable(self, out: TextIO, details: bool = True) -> None: """Print a human-readable rendering of this analysis to 'out'.""" + if self.is_enabled(Action.LIST_SOURCES): + if details: + # Sort sources by type, then by path + source_types = [ + (CodeSource, "Sources of Python code:"), + (DepsSource, "Sources of declared dependencies:"), + (PyEnvSource, "Python environments:"), + ] + for source_type, heading in source_types: + sources = {s for s in self.sources if s.source_type is source_type} + if sources: + print("\n" + heading, file=out) + lines = [f" {src.render(details)}" for src in sources] + print("\n".join(sorted(lines)), file=out) + else: + unique_paths = {src.render(details) for src in self.sources} + print("\n".join(unique_paths), file=out) + if self.is_enabled(Action.LIST_IMPORTS): if details: # Sort imports by source, then by name diff --git a/fawltydeps/settings.py b/fawltydeps/settings.py index c106dc2e..5481307b 100644 --- a/fawltydeps/settings.py +++ b/fawltydeps/settings.py @@ -63,6 +63,7 @@ def __lt__(self, other: object) -> bool: class Action(OrderedEnum): """Actions provided by the FawltyDeps application.""" + LIST_SOURCES = "list_sources" LIST_IMPORTS = "list_imports" LIST_DEPS = "list_deps" REPORT_UNDECLARED = "check_undeclared" diff --git a/fawltydeps/types.py b/fawltydeps/types.py index ade42dd4..45f6aeb8 100644 --- a/fawltydeps/types.py +++ b/fawltydeps/types.py @@ -1,7 +1,7 @@ """Common types used across FawltyDeps.""" import sys -from abc import ABC +from abc import ABC, abstractmethod from dataclasses import asdict, dataclass, field, replace from enum import Enum from functools import total_ordering @@ -59,6 +59,11 @@ class Source(ABC): def __post_init__(self) -> None: object.__setattr__(self, "source_type", self.__class__) + @abstractmethod + def render(self, detailed: bool) -> str: + """Return a human-readable string representation of this source.""" + raise NotImplementedError + @dataclass(frozen=True, eq=True, order=True) class CodeSource(Source): @@ -91,6 +96,11 @@ def __post_init__(self) -> None: path=self.path, ) + def render(self, detailed: bool) -> str: + if detailed and self.base_dir is not None: + return f"{self.path} (using {self.base_dir}/ as base for 1st-party imports)" + return f"{self.path}" + @dataclass(frozen=True, eq=True, order=True) class DepsSource(Source): @@ -114,6 +124,11 @@ def __post_init__(self) -> None: super().__post_init__() assert self.path.is_file() # sanity check + def render(self, detailed: bool) -> str: + if detailed: + return f"{self.path} (parsed as a {self.parser_choice} file)" + return f"{self.path}" + @dataclass(frozen=True, eq=True, order=True) class PyEnvSource(Source): @@ -142,6 +157,11 @@ def __post_init__(self) -> None: raise ValueError(f"{self.path} is not a valid dir for Python packages!") + def render(self, detailed: bool) -> str: + if detailed: + return f"{self.path} (as a source of Python packages)" + return f"{self.path}" + @total_ordering @dataclass(frozen=True) diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 40d3a10c..a3ca248a 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -396,6 +396,116 @@ def test_list_deps__pick_multiple_listed_files__prints_all_dependencies( assert returncode == 0 +def test_list_sources__in_empty_project__lists_nothing(tmp_path): + output, returncode = run_fawltydeps_function("--list-sources", f"{tmp_path}") + expect = [] + assert_unordered_equivalence(output.splitlines()[:-2], expect) + assert returncode == 0 + + +def test_list_sources__in_varied_project__lists_all_files(fake_project): + tmp_path = fake_project( + files_with_imports={ + "code.py": ["foo"], + "subdir/other.py": ["foo"], + "subdir/notebook.ipynb": ["foo"], + }, + files_with_declared_deps={ + "requirements.txt": ["foo"], + "pyproject.toml": ["foo"], + "setup.py": ["foo"], + "setup.cfg": ["foo"], + }, + fake_venvs={"my_venv": {}}, + ) + output, returncode = run_fawltydeps_function("--list-sources", f"{tmp_path}") + major, minor = sys.version_info[:2] + expect = [ + str(tmp_path / filename) + for filename in [ + "code.py", + "subdir/other.py", + "subdir/notebook.ipynb", + "requirements.txt", + "pyproject.toml", + "setup.py", + "setup.cfg", + f"my_venv/lib/python{major}.{minor}/site-packages", + ] + ] + assert_unordered_equivalence(output.splitlines()[:-2], expect) + assert returncode == 0 + + +def test_list_sources_detailed__in_varied_project__lists_all_files(fake_project): + tmp_path = fake_project( + files_with_imports={ + "code.py": ["foo"], + "subdir/notebook.ipynb": ["foo"], + "subdir/other.py": ["foo"], + }, + files_with_declared_deps={ + "pyproject.toml": ["foo"], + "requirements.txt": ["foo"], + "setup.cfg": ["foo"], + "setup.py": ["foo"], + }, + fake_venvs={"my_venv": {}}, + ) + output, returncode = run_fawltydeps_function( + "--list-sources", f"{tmp_path}", "--detailed" + ) + expect_code_lines = [ + f" {tmp_path / filename} (using {tmp_path}/ as base for 1st-party imports)" + for filename in [ + "code.py", + "setup.py", # This is both a CodeSource and an DepsSource! + "subdir/notebook.ipynb", + "subdir/other.py", + ] + ] + expect_deps_lines = [ + f" {tmp_path / filename} (parsed as a {filename} file)" + for filename in [ + "pyproject.toml", + "requirements.txt", + "setup.cfg", + "setup.py", + ] + ] + major, minor = sys.version_info[:2] + expect_pyenv_lines = [ + f" {tmp_path}/my_venv/lib/python{major}.{minor}/site-packages " + + "(as a source of Python packages)", + ] + expect = [ + "Sources of Python code:", + *expect_code_lines, + "", + "Sources of declared dependencies:", + *expect_deps_lines, + "", + "Python environments:", + *expect_pyenv_lines, + ] + assert output.splitlines() == expect + assert returncode == 0 + + +def test_list_sources_detailed__from_both_python_file_and_stdin(fake_project): + tmp_path = fake_project(files_with_imports={"code.py": ["foo"]}) + output, returncode = run_fawltydeps_function( + "--list-sources", f"{tmp_path}", "--code", f"{tmp_path}", "-", "--detailed" + ) + expect = [ + "Sources of Python code:", + f" {tmp_path}/code.py (using {tmp_path}/ as base for 1st-party imports)", + " ", + ] + assert output.splitlines() == expect + assert returncode == 0 + + @dataclass class ProjectTestVector: """Test vectors for FawltyDeps Settings configuration.""" From 33c67093736b2c6794d26d281bb68171954ae6f7 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Tue, 13 Jun 2023 15:49:44 +0200 Subject: [PATCH 3/3] main: Reformat print_human_readable() to fix pylint's 'Too many branches' --- fawltydeps/main.py | 79 ++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/fawltydeps/main.py b/fawltydeps/main.py index 5e767937..90504a91 100644 --- a/fawltydeps/main.py +++ b/fawltydeps/main.py @@ -15,7 +15,7 @@ import sys from functools import partial from operator import attrgetter -from typing import Dict, List, Optional, Set, TextIO, Type +from typing import Dict, Iterator, List, Optional, Set, TextIO, Type from pydantic.json import custom_pydantic_encoder # pylint: disable=no-name-in-module @@ -220,10 +220,11 @@ def print_json(self, out: TextIO) -> None: } json.dump(json_dict, out, indent=2, default=encoder) - def print_human_readable(self, out: TextIO, details: bool = True) -> None: + def print_human_readable(self, out: TextIO, detailed: bool = True) -> None: """Print a human-readable rendering of this analysis to 'out'.""" - if self.is_enabled(Action.LIST_SOURCES): - if details: + + def render_sources() -> Iterator[str]: + if detailed: # Sort sources by type, then by path source_types = [ (CodeSource, "Sources of Python code:"), @@ -231,45 +232,55 @@ def print_human_readable(self, out: TextIO, details: bool = True) -> None: (PyEnvSource, "Python environments:"), ] for source_type, heading in source_types: - sources = {s for s in self.sources if s.source_type is source_type} - if sources: - print("\n" + heading, file=out) - lines = [f" {src.render(details)}" for src in sources] - print("\n".join(sorted(lines)), file=out) + filtered = {s for s in self.sources if s.source_type is source_type} + if filtered: + yield "\n" + heading + yield from sorted([f" {src.render(True)}" for src in filtered]) else: - unique_paths = {src.render(details) for src in self.sources} - print("\n".join(unique_paths), file=out) + yield from sorted({src.render(False) for src in self.sources}) - if self.is_enabled(Action.LIST_IMPORTS): - if details: + def render_imports() -> Iterator[str]: + if detailed: # Sort imports by source, then by name for imp in sorted(self.imports, key=attrgetter("source", "name")): - print(f"{imp.source}: {imp.name}", file=out) + yield f"{imp.source}: {imp.name}" else: unique_imports = {i.name for i in self.imports} - print("\n".join(sorted(unique_imports)), file=out) - - if self.is_enabled(Action.LIST_DEPS): - if details: - # Sort dependencies by location, then by name - for dep in sorted( - set(self.declared_deps), key=attrgetter("source", "name") - ): - print(f"{dep.source}: {dep.name}", file=out) + yield from sorted(unique_imports) + + def render_declared_deps() -> Iterator[str]: + if detailed: + # Sort dependencies by source, then by name + unique_deps = set(self.declared_deps) + for dep in sorted(unique_deps, key=attrgetter("source", "name")): + yield f"{dep.source}: {dep.name}" else: - print( - "\n".join(sorted(set(d.name for d in self.declared_deps))), file=out - ) + yield from sorted(set(d.name for d in self.declared_deps)) - if self.is_enabled(Action.REPORT_UNDECLARED) and self.undeclared_deps: - print("\nThese imports appear to be undeclared dependencies:", file=out) + def render_undeclared() -> Iterator[str]: + yield "\nThese imports appear to be undeclared dependencies:" for undeclared in self.undeclared_deps: - print(f"- {undeclared.render(details)}", file=out) + yield f"- {undeclared.render(detailed)}" - if self.is_enabled(Action.REPORT_UNUSED) and self.unused_deps: - print(f"\n{UNUSED_DEPS_OUTPUT_PREFIX}:", file=out) + def render_unused() -> Iterator[str]: + yield f"\n{UNUSED_DEPS_OUTPUT_PREFIX}:" for unused in sorted(self.unused_deps, key=lambda d: d.name): - print(f"- {unused.render(details)}", file=out) + yield f"- {unused.render(detailed)}" + + def output(lines: Iterator[str]) -> None: + for line in lines: + print(line, file=out) + + if self.is_enabled(Action.LIST_SOURCES): + output(render_sources()) + if self.is_enabled(Action.LIST_IMPORTS): + output(render_imports()) + if self.is_enabled(Action.LIST_DEPS): + output(render_declared_deps()) + if self.is_enabled(Action.REPORT_UNDECLARED) and self.undeclared_deps: + output(render_undeclared()) + if self.is_enabled(Action.REPORT_UNUSED) and self.unused_deps: + output(render_unused()) @staticmethod def success_message(check_undeclared: bool, check_unused: bool) -> Optional[str]: @@ -318,11 +329,11 @@ def print_output( if analysis.settings.output_format == OutputFormat.JSON: analysis.print_json(stdout) elif analysis.settings.output_format == OutputFormat.HUMAN_DETAILED: - analysis.print_human_readable(stdout, details=True) + analysis.print_human_readable(stdout, detailed=True) if exit_code == 0 and success_message: print(f"\n{success_message}", file=stdout) elif analysis.settings.output_format == OutputFormat.HUMAN_SUMMARY: - analysis.print_human_readable(stdout, details=False) + analysis.print_human_readable(stdout, detailed=False) if exit_code == 0 and success_message: print(f"\n{success_message}", file=stdout) else: