From 304a5cb051c2dc56be528ec6899153483f1f1dd4 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 17 Sep 2024 16:54:10 +1000 Subject: [PATCH 1/5] chore: add --local-maven-repo as input option --- scripts/release_scripts/run_macaron.sh | 30 ++++++++++++ src/macaron/__main__.py | 31 +++++++++++++ src/macaron/config/global_config.py | 3 ++ .../test.yaml | 46 +++++++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 5201cedb5..e90532b63 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -328,6 +328,10 @@ if [[ $command == "analyze" ]]; then python_venv_path="$2" shift ;; + --local-maven-repo) + local_maven_repo="$2" + shift + ;; *) rest_command+=("$1") ;; @@ -455,6 +459,32 @@ if [[ -n "${python_venv_path:-}" ]]; then mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container" fi +# Mount the local Maven repo into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. +if [[ -n "${local_maven_repo:-}" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" +else + # If the user doesn't provide local maven repo, we mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly instead. + # This is because we don't want Macaron running within + # the container to use `$HOME/.m2` within the container as it is being used + # by the cyclonedx plugins for dependency resolution. + if [[ -d "$HOME/.m2" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" + else + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" + mkdir -p "$output_local_maven_repo" + + mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + fi +fi + # MACARON entrypoint - verify-policy command argvs # This is for macaron verify-policy command. # Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index 120d8e0d8..ea228d329 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -57,6 +57,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None sys.exit(os.EX_OSFILE) global_config.load_python_venv(analyzer_single_args.python_venv) + # Set Python virtual environment path. + if analyzer_single_args.local_maven_repo is None: + # Load the default user local .m2 directory. + # Exit on error if $HOME is not set or empty. + home_dir = os.getenv("HOME") + if not home_dir: + logger.critical("Environment variable HOME is not set.") + sys.exit(os.EX_USAGE) + + local_maven_repo = os.path.join(home_dir, ".m2") + if not os.path.isdir(local_maven_repo): + logger.debug("The default local Maven repo at %s does not exist. Ignore ...") + global_config.local_maven_repo = None + + global_config.local_maven_repo = local_maven_repo + else: + user_provided_local_maven_repo = analyzer_single_args.local_maven_repo + if not os.path.exists(user_provided_local_maven_repo) or not os.path.isdir(user_provided_local_maven_repo): + logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo) + sys.exit(os.EX_USAGE) + + global_config.local_maven_repo = user_provided_local_maven_repo + analyzer = Analyzer(global_config.output_path, global_config.build_log_path) # Initiate reporters. @@ -393,6 +416,14 @@ def main(argv: list[str] | None = None) -> None: help=("The path to the Python virtual environment of the target software component."), ) + single_analyze_parser.add_argument( + "--local-maven-repo", + required=False, + help=( + "The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2" + ), + ) + # Dump the default values. sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.") diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py index bfee23d32..d6d113a3a 100644 --- a/src/macaron/config/global_config.py +++ b/src/macaron/config/global_config.py @@ -46,6 +46,9 @@ class GlobalConfig: #: The path to Python virtual environment. python_venv_path: str = "" + #: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory. + local_maven_repo: str | None = None + def load( self, macaron_path: str, diff --git a/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml new file mode 100644 index 000000000..7de49fcac --- /dev/null +++ b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml @@ -0,0 +1,46 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Test invalid usecases for --local-maven-repo. + +tags: +- macaron-docker-image +- macaron-python-package + +steps: +- name: HOME environment variable is not set and --local-maven-repo is not used. + kind: analyze + env: + HOME: + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + expect_fail: true +- name: Providing a directory that doesn't exist to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - invalid_dir + expect_fail: true +- name: Create a test file. + kind: shell + options: + cmd: touch test.txt +- name: Providing a file path to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - ./test.txt + expect_fail: true +- name: Clean up the test file. + kind: shell + options: + cmd: rm test.txt From c3ccc083ae514c488add35c087147ce3e2efd25e Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Thu, 19 Sep 2024 10:31:19 +1000 Subject: [PATCH 2/5] chore: improve run_macaron.sh on local maven repo mounting behavior and improve the unit tests for run_macaron.sh --- scripts/release_scripts/run_macaron.sh | 46 +++-- .../test_run_macaron_sh.py | 165 ++++++++++++++---- 2 files changed, 158 insertions(+), 53 deletions(-) diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index e90532b63..457ccd220 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -339,7 +339,7 @@ if [[ $command == "analyze" ]]; then shift done elif [[ $command == "verify-policy" ]]; then - while [[ $# -gt 0 ]]; do + while [[ $# -gt 0 ]]; do case $1 in -d|--database) arg_database="$2" @@ -355,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then esac shift done +elif [[ $command == "dump-defaults" ]]; then + while [[ $# -gt 0 ]]; do + case $1 in + *) + rest_command+=("$1") + ;; + esac + shift + done fi # MACARON entrypoint - Main argvs @@ -466,22 +475,27 @@ if [[ -n "${local_maven_repo:-}" ]]; then mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" else - # If the user doesn't provide local maven repo, we mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. - # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly instead. - # This is because we don't want Macaron running within - # the container to use `$HOME/.m2` within the container as it is being used - # by the cyclonedx plugins for dependency resolution. - if [[ -d "$HOME/.m2" ]]; then - local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" - argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") - - mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" - else - local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" - output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" - mkdir -p "$output_local_maven_repo" + # Perform default local maven repo when the user doesn't provide --local-maven-repo and `analyze` command is used. + if [[ "$command" == "analyze" ]]; then + # We mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + if [[ -d "$HOME/.m2" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" + # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly + # into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. + # This is because we don't want Macaron running within + # the container to use `$HOME/.m2` within the container as it is being used + # by the cyclonedx plugins for dependency resolution. + else + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" + mkdir -p "$output_local_maven_repo" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") - mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + fi fi fi diff --git a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py index 985ecaeed..6eaf9a1be 100755 --- a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py +++ b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py @@ -6,13 +6,54 @@ import os import subprocess # nosec B404 import sys +import tempfile from collections import namedtuple +TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) -def test_macaron_command() -> int: - """Test if the ``macaron`` command in the container receives the correct arguments.""" - TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) +def run_test_case( + test_case: TestCase, + env: dict[str, str], +) -> int: + """Run a test case in an environment with variables defined by `env` and return the exit code.""" + exit_code = 0 + + name, script_args, expected_macaron_args = test_case + print(f"test_macaron_command[{name}]:", end=" ") + + result = subprocess.run( + [ # nosec B603 + "./output/run_macaron.sh", + *script_args, + ], + capture_output=True, + env=env, + check=False, + ) + if result.returncode != 0: + exit_code = 1 + print(f"FAILED with exit code {exit_code}") + print("stderr:") + print(result.stderr.decode("utf-8")) + return exit_code + + resulting_macaron_args = list(result.stderr.decode("utf-8").split()) + + if resulting_macaron_args != expected_macaron_args: + print("FAILED") + print(" script args : %s", str(script_args)) + print(" expected macaron args : %s", str(expected_macaron_args)) + print(" resulting macaron args: %s", str(resulting_macaron_args)) + exit_code = 1 + else: + print("PASSED") + + return exit_code + + +def test_macaron_command_help() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" test_cases = [ TestCase( name="'-h' as main argument", @@ -20,9 +61,9 @@ def test_macaron_command() -> int: expected_macaron_args=["-h"], ), TestCase( - name="'-h' as action argument for 'analyze'", - script_args=["analyze", "-h"], - expected_macaron_args=["analyze", "-h"], + name="'-h' as action argument for 'dump-defaults'", + script_args=["dump-defaults", "-h"], + expected_macaron_args=["dump-defaults", "-h"], ), TestCase( name="'-h' as action argument for 'verify-policy'", @@ -31,47 +72,97 @@ def test_macaron_command() -> int: ), ] + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_no_home_m2() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo and host $HOME/.m2 is not available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + # We mimick the behavior of $HOME/.m2 not available by making $HOME pointing to a directory that doesn't exist. + env["HOME"] = "./non_exist_dir" + exit_code = 0 + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_home_m2_available() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo and host $HOME/.m2 is available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + env = dict(os.environ) env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 + + with tempfile.TemporaryDirectory() as temp_dir: + # We create a temp dir with a .m2 directory and point $HOME to it. + # This .m2 directory contains an empty `repository` directory. + os.mkdir(os.path.join(temp_dir, ".m2")) + os.mkdir(os.path.join(temp_dir, ".m2/repository")) + env["HOME"] = temp_dir + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_provide_local_maven_repo() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_cases = [ + TestCase( + name="with --local-maven-repo pointing to an existing directory", + script_args=["analyze", "--local-maven-repo", f"{temp_dir}"], + expected_macaron_args=[ + "analyze", + "--local-maven-repo", + "/home/macaron/analyze_local_maven_repo_readonly", + ], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 - for test_case in test_cases: - name, script_args, expected_macaron_args = test_case - print(f"test_macaron_command[{name}]:", end=" ") - - result = subprocess.run( - [ # nosec B603 - "./output/run_macaron.sh", - *script_args, - ], - capture_output=True, - env=env, - check=False, - ) - if result.returncode != 0: - exit_code = 1 - print(f"FAILED with exit code {exit_code}") - print("stderr:") - print(result.stderr.decode("utf-8")) - continue - - resulting_macaron_args = list(result.stderr.decode("utf-8").split()) - - if resulting_macaron_args != expected_macaron_args: - print("FAILED") - print(" script args : %s", str(script_args)) - print(" expected macaron args : %s", str(expected_macaron_args)) - print(" resulting macaron args: %s", str(resulting_macaron_args)) - exit_code = 1 - else: - print("PASSED") + for case in test_cases: + exit_code = run_test_case(case, env) return exit_code def main() -> int: """Run all tests.""" - return test_macaron_command() + return ( + test_macaron_command_help() + | test_macaron_command_no_home_m2() + | test_macaron_command_home_m2_available() + | test_macaron_provide_local_maven_repo() + ) if __name__ == "__main__": From 6f65e88e436b04f3d8afd21e4c8a10d6b9f9ea02 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 20 Sep 2024 14:05:32 +1000 Subject: [PATCH 3/5] chore: add maven local artifact finding --- src/macaron/artifact/local_artifact.py | 80 +++++++++++++ src/macaron/artifact/maven.py | 38 ++++++ src/macaron/slsa_analyzer/analyze_context.py | 4 + src/macaron/slsa_analyzer/analyzer.py | 12 ++ .../package_registry/jfrog_maven_registry.py | 47 +------- tests/artifact/test_local_artifact.py | 108 ++++++++++++++++++ tests/artifact/test_maven.py | 77 ++++++++++++- .../test_jfrog_maven_registry.py | 79 +------------ 8 files changed, 324 insertions(+), 121 deletions(-) create mode 100644 src/macaron/artifact/local_artifact.py create mode 100644 tests/artifact/test_local_artifact.py diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py new file mode 100644 index 000000000..eb862e477 --- /dev/null +++ b/src/macaron/artifact/local_artifact.py @@ -0,0 +1,80 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module declares types and utilities for handling local artifacts.""" + +import os +from collections.abc import Mapping + +from packageurl import PackageURL + +from macaron.artifact.maven import construct_maven_repository_path +from macaron.config.global_config import global_config + + +def get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Get A.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + local_artifact_mapper["maven"] = global_config.local_maven_repo + + if global_config.python_venv_path: + local_artifact_mapper["pypi"] = global_config.python_venv_path + + return local_artifact_mapper + + +def construct_local_artifact_path_from_purl( + build_purl_type: str, + component_purl: PackageURL, + local_artifact_repo_mapper: Mapping[str, str], +) -> str | None: + """Get B.""" + local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) + if local_artifact_repo is None: + return None + + artifact_path = None + match build_purl_type: + case "maven": + group = component_purl.namespace + artifact = component_purl.name + version = component_purl.version + + if group is None or version is None: + return None + + artifact_path = os.path.join( + local_artifact_repo, + "repository", + construct_maven_repository_path(group, artifact, version), + ) + case "pypi": + # TODO: implement this. + pass + case _: + return None + + return artifact_path + + +def get_local_artifact_paths( + purl: PackageURL, + build_tool_purl_types: list[str], + local_artifact_repo_mapper: Mapping[str, str], +) -> dict[str, str]: + """Get C.""" + result = {} + + for build_purl_type in build_tool_purl_types: + local_artfiact_path = construct_local_artifact_path_from_purl( + build_purl_type=build_purl_type, + component_purl=purl, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + + if local_artfiact_path and os.path.isdir(local_artfiact_path): + result[build_purl_type] = local_artfiact_path + + return result diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 711758c8c..e8cebfa37 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -140,3 +140,41 @@ def create_maven_purl_from_artifact_filename( ) return None + + +def construct_maven_repository_path( + group_id: str, + artifact_id: str | None = None, + version: str | None = None, + asset_name: str | None = None, +) -> str: + """Construct a path to a folder or file on the registry, assuming Maven repository layout. + + For more details regarding Maven repository layout, see the following: + - https://maven.apache.org/repository/layout.html + - https://maven.apache.org/guides/mini/guide-naming-conventions.html + + Parameters + ---------- + group_id : str + The group id of a Maven package. + artifact_id : str + The artifact id of a Maven package. + version : str + The version of a Maven package. + asset_name : str + The asset name. + + Returns + ------- + str + The path to a folder or file on the registry. + """ + path = group_id.replace(".", "/") + if artifact_id: + path = "/".join([path, artifact_id]) + if version: + path = "/".join([path, version]) + if asset_name: + path = "/".join([path, asset_name]) + return path diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index e54363f98..88ef09d7e 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -52,6 +52,9 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" + local_artifact_paths: dict[str, str] + # TODO this doc string for this variable need more informatino, to be revise later. + """The mapping between build tool types and the directory that contains the corresponding artifacts.""" class AnalyzeContext: @@ -106,6 +109,7 @@ def __init__( provenance_repo_url=None, provenance_commit_digest=None, provenance_verified=False, + local_artifact_paths={}, ) @property diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index d5029b430..cdd27ffd3 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -17,6 +17,7 @@ from sqlalchemy.orm import Session from macaron import __version__ +from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -471,6 +472,17 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest + discovered_build_toosl = ( + analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] + ) + build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl] + analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( + # The PURL is definitely valid here. + PackageURL.from_string(analyze_ctx.component.purl), + build_tools_purl_types, + local_artifact_repo_mapper=get_local_artifact_repo_mapper(), + ) + analyze_ctx.check_results = registry.scan(analyze_ctx) return Record( diff --git a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py index 1c78d4409..3b82bd05a 100644 --- a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py @@ -12,6 +12,7 @@ import requests +from macaron.artifact.maven import construct_maven_repository_path from macaron.config.defaults import defaults from macaron.errors import ConfigurationError from macaron.json_tools import JsonType @@ -199,44 +200,6 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool: return True return False - def construct_maven_repository_path( - self, - group_id: str, - artifact_id: str | None = None, - version: str | None = None, - asset_name: str | None = None, - ) -> str: - """Construct a path to a folder or file on the registry, assuming Maven repository layout. - - For more details regarding Maven repository layout, see the following: - - https://maven.apache.org/repository/layout.html - - https://maven.apache.org/guides/mini/guide-naming-conventions.html - - Parameters - ---------- - group_id : str - The group id of a Maven package. - artifact_id : str - The artifact id of a Maven package. - version : str - The version of a Maven package. - asset_name : str - The asset name. - - Returns - ------- - str - The path to a folder or file on the registry. - """ - path = group_id.replace(".", "/") - if artifact_id: - path = "/".join([path, artifact_id]) - if version: - path = "/".join([path, version]) - if asset_name: - path = "/".join([path, asset_name]) - return path - def fetch_artifact_ids(self, group_id: str) -> list[str]: """Get all artifact ids under a group id. @@ -253,7 +216,7 @@ def fetch_artifact_ids(self, group_id: str) -> list[str]: The artifacts ids under the group. """ folder_info_url = self.construct_folder_info_url( - folder_path=self.construct_maven_repository_path(group_id), + folder_path=construct_maven_repository_path(group_id), ) try: @@ -442,7 +405,7 @@ def fetch_asset_names( list[str] The list of asset names. """ - folder_path = self.construct_maven_repository_path( + folder_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -617,7 +580,7 @@ def fetch_asset_metadata( JFrogMavenAssetMetadata | None The asset's metadata, or ``None`` if the metadata cannot be retrieved. """ - file_path = self.construct_maven_repository_path( + file_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -800,7 +763,7 @@ def construct_asset_url( str The URL to the asset, which can be use for downloading the asset. """ - group_path = self.construct_maven_repository_path(group_id) + group_path = construct_maven_repository_path(group_id) return urlunsplit( SplitResult( scheme="https", diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py new file mode 100644 index 000000000..3e44bdcb6 --- /dev/null +++ b/tests/artifact/test_local_artifact.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Test the local artifact utilities.""" + +import tempfile +from collections.abc import Mapping + +import pytest +from packageurl import PackageURL + +from macaron.artifact.local_artifact import construct_local_artifact_path_from_purl, get_local_artifact_paths + + +@pytest.mark.parametrize( + ("build_purl_type", "purl_str", "local_artifact_repo_mapper", "expectation"), + [ + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {"maven": "/home/foo/.m2"}, + "/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre", + id="A maven type PURL with available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {}, + None, + id="A maven type PURL without an available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {"pypi": "/home/foo/.venv"}, + None, + id="A maven type PURL without an available local maven repo but there is a Python venv", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava", + {"maven": "/home/foo/.m2"}, + None, + id="A maven type PURL with missing version and an available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/guava", + {"maven": "/home/foo/.m2"}, + None, + id="A maven type PURL with missing groupd Id and an available local maven repo", + ), + pytest.param( + "maven", + "pkg:github/oracle/macaron", + {"maven": "/home/foo/.m2"}, + None, + id="A git type PURL and an available local maven repo", + ), + ], +) +def test_construct_local_artifact_path_from_purl( + build_purl_type: str, + purl_str: str, + local_artifact_repo_mapper: Mapping[str, str], + expectation: str, +) -> None: + """Test constructing a local artifact path from a given purl.""" + component_purl = PackageURL.from_string(purl_str) + assert ( + construct_local_artifact_path_from_purl( + build_purl_type=build_purl_type, + component_purl=component_purl, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + == expectation + ) + + +@pytest.mark.parametrize( + ("purl_str", "build_tool_purl_types"), + [ + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["maven", "pypi"], + id="A maven type PURL where multiple build tool types are discovered", + ), + ], +) +def test_get_local_artifact_paths_non_existing( + purl_str: str, + build_tool_purl_types: list[str], +) -> None: + """Test getting local artifact paths of non existing artifacts. + + The local artifact repos are available. + """ + purl = PackageURL.from_string(purl_str) + with tempfile.TemporaryDirectory() as temp_dir: + local_artifact_repo_mapper = { + "maven": temp_dir, + "pypi": temp_dir, + } + assert not get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) diff --git a/tests/artifact/test_maven.py b/tests/artifact/test_maven.py index 31e95ba53..6014c20ad 100644 --- a/tests/artifact/test_maven.py +++ b/tests/artifact/test_maven.py @@ -6,7 +6,7 @@ import pytest from packageurl import PackageURL -from macaron.artifact.maven import MavenSubjectPURLMatcher +from macaron.artifact.maven import MavenSubjectPURLMatcher, construct_maven_repository_path from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload @@ -86,3 +86,78 @@ def test_to_maven_artifact_subject( ) == provenance_payload.statement["subject"][subject_index] ) + + +@pytest.mark.parametrize( + ("args", "expected_path"), + [ + pytest.param( + { + "group_id": "io.micronaut", + }, + "io/micronaut", + id="Only group_id 1", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + }, + "com/fasterxml/jackson/core", + id="Only group_id 2", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + }, + "com/fasterxml/jackson/core/jackson-annotations", + id="group_id and artifact_id", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9", + id="group_id and artifact_id and version", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + "asset_name": "jackson-annotations-2.9.9.jar", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", + id="group_id and artifact_id and version and asset_name,", + ), + ], +) +def test_construct_maven_repository_path( + args: dict, + expected_path: str, +) -> None: + """Test the ``construct_maven_repository_path`` method.""" + assert construct_maven_repository_path(**args) == expected_path + + +@pytest.mark.parametrize( + ("group_id", "expected_group_path"), + [ + ( + "io.micronaut", + "io/micronaut", + ), + ( + "com.fasterxml.jackson.core", + "com/fasterxml/jackson/core", + ), + ], +) +def test_to_group_folder_path( + group_id: str, + expected_group_path: str, +) -> None: + """Test the ``to_gorup_folder_path`` method.""" + assert construct_maven_repository_path(group_id) == expected_group_path diff --git a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py index eca170319..ebb960366 100644 --- a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py +++ b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the ``JFrogMavenRegistry`` class.""" @@ -137,83 +137,6 @@ def test_is_detected( assert jfrog_maven.is_detected(build_tool) is False -@pytest.mark.parametrize( - ("args", "expected_path"), - [ - pytest.param( - { - "group_id": "io.micronaut", - }, - "io/micronaut", - id="Only group_id 1", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - }, - "com/fasterxml/jackson/core", - id="Only group_id 2", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - }, - "com/fasterxml/jackson/core/jackson-annotations", - id="group_id and artifact_id", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9", - id="group_id and artifact_id and version", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - "asset_name": "jackson-annotations-2.9.9.jar", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", - id="group_id and artifact_id and version and asset_name,", - ), - ], -) -def test_construct_maven_repository_path( - jfrog_maven: JFrogMavenRegistry, - args: dict, - expected_path: str, -) -> None: - """Test the ``construct_maven_repository_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(**args) == expected_path - - -@pytest.mark.parametrize( - ("group_id", "expected_group_path"), - [ - ( - "io.micronaut", - "io/micronaut", - ), - ( - "com.fasterxml.jackson.core", - "com/fasterxml/jackson/core", - ), - ], -) -def test_to_group_folder_path( - jfrog_maven: JFrogMavenRegistry, - group_id: str, - expected_group_path: str, -) -> None: - """Test the ``to_gorup_folder_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(group_id) == expected_group_path - - @pytest.mark.parametrize( ("folder_path", "expected_url"), [ From 67561c4929861b5ef1fac856cb85c9c1f24fb36c Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 23 Sep 2024 12:00:27 +1000 Subject: [PATCH 4/5] chore: allow multiple artifact paths for a purl type --- src/macaron/artifact/local_artifact.py | 42 ++++++++++++++------ src/macaron/slsa_analyzer/analyze_context.py | 5 +-- tests/artifact/test_local_artifact.py | 25 +++++++----- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index eb862e477..67b70c318 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -25,17 +25,17 @@ def get_local_artifact_repo_mapper() -> Mapping[str, str]: return local_artifact_mapper -def construct_local_artifact_path_from_purl( +def construct_local_artifact_paths_from_purl( build_purl_type: str, component_purl: PackageURL, local_artifact_repo_mapper: Mapping[str, str], -) -> str | None: +) -> list[str] | None: """Get B.""" local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) if local_artifact_repo is None: return None - artifact_path = None + artifact_path = [] match build_purl_type: case "maven": group = component_purl.namespace @@ -45,10 +45,12 @@ def construct_local_artifact_path_from_purl( if group is None or version is None: return None - artifact_path = os.path.join( - local_artifact_repo, - "repository", - construct_maven_repository_path(group, artifact, version), + artifact_path.append( + os.path.join( + local_artifact_repo, + "repository", + construct_maven_repository_path(group, artifact, version), + ) ) case "pypi": # TODO: implement this. @@ -59,22 +61,36 @@ def construct_local_artifact_path_from_purl( return artifact_path +# key: purl type +# value: list of paths +# If a key doesn't exist -> cannot construct the artifact paths for that purl type +# (no local artifact repo found or not enough information from PURL string or simply +# the PURL string is not applicable for that purl type). +# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository. def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], local_artifact_repo_mapper: Mapping[str, str], -) -> dict[str, str]: +) -> dict[str, list[str]]: """Get C.""" - result = {} + local_artifact_paths_purl_mapping = {} for build_purl_type in build_tool_purl_types: - local_artfiact_path = construct_local_artifact_path_from_purl( + local_artfiact_paths = construct_local_artifact_paths_from_purl( build_purl_type=build_purl_type, component_purl=purl, local_artifact_repo_mapper=local_artifact_repo_mapper, ) - if local_artfiact_path and os.path.isdir(local_artfiact_path): - result[build_purl_type] = local_artfiact_path + if not local_artfiact_paths: + continue - return result + resolved_local_artifact_paths = [] + + for local_artifact_path in local_artfiact_paths: + if os.path.isdir(local_artifact_path): + resolved_local_artifact_paths.append(local_artifact_path) + + local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths + + return local_artifact_paths_purl_mapping diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index 88ef09d7e..998e1ec29 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -52,9 +52,8 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" - local_artifact_paths: dict[str, str] - # TODO this doc string for this variable need more informatino, to be revise later. - """The mapping between build tool types and the directory that contains the corresponding artifacts.""" + local_artifact_paths: dict[str, list[str]] + """The mapping between purl types and the local artifact absolute paths.""" class AnalyzeContext: diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 3e44bdcb6..50157ac5e 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -9,7 +9,7 @@ import pytest from packageurl import PackageURL -from macaron.artifact.local_artifact import construct_local_artifact_path_from_purl, get_local_artifact_paths +from macaron.artifact.local_artifact import construct_local_artifact_paths_from_purl, get_local_artifact_paths @pytest.mark.parametrize( @@ -19,7 +19,7 @@ "maven", "pkg:maven/com.google.guava/guava@33.2.1-jre", {"maven": "/home/foo/.m2"}, - "/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre", + ["/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre"], id="A maven type PURL with available local maven repo", ), pytest.param( @@ -63,12 +63,12 @@ def test_construct_local_artifact_path_from_purl( build_purl_type: str, purl_str: str, local_artifact_repo_mapper: Mapping[str, str], - expectation: str, + expectation: list[str], ) -> None: """Test constructing a local artifact path from a given purl.""" component_purl = PackageURL.from_string(purl_str) assert ( - construct_local_artifact_path_from_purl( + construct_local_artifact_paths_from_purl( build_purl_type=build_purl_type, component_purl=component_purl, local_artifact_repo_mapper=local_artifact_repo_mapper, @@ -78,18 +78,20 @@ def test_construct_local_artifact_path_from_purl( @pytest.mark.parametrize( - ("purl_str", "build_tool_purl_types"), + ("purl_str", "build_tool_purl_types", "expectation"), [ pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", ["maven", "pypi"], - id="A maven type PURL where multiple build tool types are discovered", + {"maven": []}, + id="A maven type PURL where multiple build tool types are discovered. But no artifact path is available.", ), ], ) def test_get_local_artifact_paths_non_existing( purl_str: str, build_tool_purl_types: list[str], + expectation: dict[str, list[str]], ) -> None: """Test getting local artifact paths of non existing artifacts. @@ -101,8 +103,11 @@ def test_get_local_artifact_paths_non_existing( "maven": temp_dir, "pypi": temp_dir, } - assert not get_local_artifact_paths( - purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + assert ( + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + == expectation ) From 7ac9e54b24e6abab3ea89df4617d974024407a59 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 24 Sep 2024 14:50:13 +1000 Subject: [PATCH 5/5] chore: refactor the local artifact finding logic and add pypi support --- src/macaron/artifact/local_artifact.py | 213 +++++++++++++++----- src/macaron/slsa_analyzer/analyzer.py | 36 +++- tests/artifact/test_local_artifact.py | 263 +++++++++++++++++++------ 3 files changed, 392 insertions(+), 120 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 67b70c318..3d23a7e7b 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -3,70 +3,177 @@ """This module declares types and utilities for handling local artifacts.""" +import fnmatch +import glob import os from collections.abc import Mapping from packageurl import PackageURL from macaron.artifact.maven import construct_maven_repository_path -from macaron.config.global_config import global_config -def get_local_artifact_repo_mapper() -> Mapping[str, str]: - """Get A.""" - local_artifact_mapper: dict[str, str] = {} +def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories. - if global_config.local_maven_repo: - local_artifact_mapper["maven"] = global_config.local_maven_repo + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) - if global_config.python_venv_path: - local_artifact_mapper["pypi"] = global_config.python_venv_path + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not maven_purl.type == "maven": + return None - return local_artifact_mapper + group = maven_purl.namespace + artifact = maven_purl.name + version = maven_purl.version + if group is None or version is None: + return None -def construct_local_artifact_paths_from_purl( - build_purl_type: str, - component_purl: PackageURL, - local_artifact_repo_mapper: Mapping[str, str], + return [construct_maven_repository_path(group, artifact, version)] + + +def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories. + + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) + + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not pypi_purl.type == "pypi": + return None + + name = pypi_purl.name + version = pypi_purl.version + + if version is None: + return None + + # These patterns are from the content of a wheel file, which are extracted into the site-packages + # directory. References: + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents + glob_patterns = [] + glob_patterns.append(name) + glob_patterns.append(f"{name}-{version}.dist-info") + glob_patterns.append(f"{name}-{version}.data") + + return glob_patterns + + +def find_artifact_paths_from_local_maven_repo( + local_maven_repo: str, + glob_patterns: list[str], +) -> list[str] | None: + """Return a list of existed directories within `local_maven_repo`. + + Each directory path has the form ``local_maven_repo``/. + + None means error. + """ + if not os.path.isdir(local_maven_repo): + return None + + artifact_paths = [] + for pattern in glob_patterns: + found_paths = glob.glob( + root_dir=local_maven_repo, + pathname=pattern, + ) + + for found_path in found_paths: + full_path = os.path.join(local_maven_repo, found_path) + if os.path.isdir(full_path): + artifact_paths.append(full_path) + + return artifact_paths + + +# Assume that local_python_venv exists. +# In here we need to do it case-insensitively +# We also assume that packages are just one level down from venv_path +# The return element are relative paths from venv. +def find_artifact_paths_from_python_venv( + venv_path: str, + glob_patterns: list[str], ) -> list[str] | None: - """Get B.""" - local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) - if local_artifact_repo is None: + """TBD.""" + if not os.path.isdir(venv_path): + return None + + artifact_paths = [] + + try: + venv_path_entries = os.listdir(venv_path) + except (NotADirectoryError, PermissionError, FileNotFoundError): return None - artifact_path = [] - match build_purl_type: - case "maven": - group = component_purl.namespace - artifact = component_purl.name - version = component_purl.version - - if group is None or version is None: - return None - - artifact_path.append( - os.path.join( - local_artifact_repo, - "repository", - construct_maven_repository_path(group, artifact, version), - ) - ) - case "pypi": - # TODO: implement this. - pass - case _: + all_package_dirs: list[str] = [] + for entry in venv_path_entries: + entry_path = os.path.join(venv_path, entry) + if os.path.isdir(entry_path): + all_package_dirs.append(entry) + + for package_dir in all_package_dirs: + for pattern in glob_patterns: + if fnmatch.fnmatch(package_dir.lower(), pattern.lower()): + full_path = os.path.join(venv_path, package_dir) + artifact_paths.append(full_path) + + return artifact_paths + + +def _get_local_artifact_path_for_build_tool_purl_type( + purl: PackageURL, + build_tool_purl_type: str, + local_artifact_repo: str, +) -> list[str] | None: + """TBD.""" + if build_tool_purl_type == "maven": + maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) + if not maven_artifact_patterns: return None - return artifact_path + artifact_paths = find_artifact_paths_from_local_maven_repo( + local_maven_repo=local_artifact_repo, + glob_patterns=maven_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + if build_tool_purl_type == "pypi": + pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + if not pypi_artifact_patterns: + return None + + artifact_paths = find_artifact_paths_from_python_venv( + venv_path=local_artifact_repo, + glob_patterns=pypi_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + return None # key: purl type # value: list of paths # If a key doesn't exist -> cannot construct the artifact paths for that purl type -# (no local artifact repo found or not enough information from PURL string or simply -# the PURL string is not applicable for that purl type). -# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository. +# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found. +# We assume that the paths in local_artifact_repo_mapper all exists/ def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], @@ -75,22 +182,20 @@ def get_local_artifact_paths( """Get C.""" local_artifact_paths_purl_mapping = {} - for build_purl_type in build_tool_purl_types: - local_artfiact_paths = construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=purl, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - - if not local_artfiact_paths: + for build_tool_purl_type in build_tool_purl_types: + local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type) + if not local_artifact_repo: continue - resolved_local_artifact_paths = [] + artifact_paths = _get_local_artifact_path_for_build_tool_purl_type( + purl=purl, + build_tool_purl_type=build_tool_purl_type, + local_artifact_repo=local_artifact_repo, + ) - for local_artifact_path in local_artfiact_paths: - if os.path.isdir(local_artifact_path): - resolved_local_artifact_paths.append(local_artifact_path) + if not artifact_paths: + continue - local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths + local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths return local_artifact_paths_purl_mapping diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index cdd27ffd3..c707daaea 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -2,10 +2,13 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" + +import glob import logging import os import re import sys +from collections.abc import Mapping from datetime import datetime, timezone from pathlib import Path from typing import Any, NamedTuple @@ -17,7 +20,7 @@ from sqlalchemy.orm import Session from macaron import __version__ -from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper +from macaron.artifact.local_artifact import get_local_artifact_paths from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -472,15 +475,14 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest - discovered_build_toosl = ( + discovered_build_tools = ( analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] ) - build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl] + build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_tools] analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( - # The PURL is definitely valid here. PackageURL.from_string(analyze_ctx.component.purl), build_tools_purl_types, - local_artifact_repo_mapper=get_local_artifact_repo_mapper(), + local_artifact_repo_mapper=self._get_local_artifact_repo_mapper(), ) analyze_ctx.check_results = registry.scan(analyze_ctx) @@ -1131,6 +1133,30 @@ def _determine_package_registries(self, analyze_ctx: AnalyzeContext) -> None: ) ) + @staticmethod + def _get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Return the mapping between purl type and its local artifact repo path if that path exists.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + m2_repository_dir = os.path.join(global_config.local_maven_repo, "repository") + if os.path.isdir(m2_repository_dir): + local_artifact_mapper["maven"] = m2_repository_dir + + if global_config.python_venv_path: + site_packages_dir_pattern = os.path.join( + global_config.python_venv_path, + "lib", + "python3.*", + "site-packages", + ) + site_packages_dirs = glob.glob(site_packages_dir_pattern) + + if len(site_packages_dirs) == 1: + local_artifact_mapper["pypi"] = site_packages_dirs.pop() + + return local_artifact_mapper + class DuplicateCmpError(DuplicateError): """This class is used for duplicated software component errors.""" diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 50157ac5e..87e571f5a 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -3,74 +3,138 @@ """Test the local artifact utilities.""" -import tempfile -from collections.abc import Mapping +import os +from pathlib import Path import pytest from packageurl import PackageURL -from macaron.artifact.local_artifact import construct_local_artifact_paths_from_purl, get_local_artifact_paths +from macaron.artifact.local_artifact import ( + construct_local_artifact_paths_glob_pattern_maven_purl, + construct_local_artifact_paths_glob_pattern_pypi_purl, + find_artifact_paths_from_python_venv, + get_local_artifact_paths, +) @pytest.mark.parametrize( - ("build_purl_type", "purl_str", "local_artifact_repo_mapper", "expectation"), + ("purl_str", "expectation"), [ pytest.param( - "maven", "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"maven": "/home/foo/.m2"}, - ["/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre"], - id="A maven type PURL with available local maven repo", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group, artifact and version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {}, - None, - id="A maven type PURL without an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre?type=jar", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group artifact, version and type qualifier", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given maven purl.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param("pkg:pypi/django@5.0.6", id="The purl type is not supported."), + pytest.param("pkg:maven/guava@33.2.1-jre", id="Missing group id in the PURL"), + pytest.param("pkg:maven/guava", id="Missing version"), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given maven purl with error.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is None + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"pypi": "/home/foo/.venv"}, - None, - id="A maven type PURL without an available local maven repo but there is a Python venv", + "pkg:pypi/django@5.0.6", + ["django", "django-5.0.6.dist-info", "django-5.0.6.data"], + id="A valid pypi PURL with version", + ) + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given pypi purl.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param( + "pkg:pypi/django", + id="A pypi PURL without version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing version and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + id="The purl type is not supported.", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given pypi purl with error.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is None + + +def test_find_artifact_paths_from_invalid_python_venv() -> None: + """Test find_artifact_paths_from_python_venv method with invalid venv path""" + assert not find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + + +@pytest.mark.parametrize( + ("purl_str", "build_tool_purl_types", "local_artifact_repo_mapper", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing groupd Id and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["maven", "pypi"], + {}, + {}, + id="A maven type PURL where multiple build tool types are discovered. But no local repository is available.", ), pytest.param( - "maven", - "pkg:github/oracle/macaron", - {"maven": "/home/foo/.m2"}, - None, - id="A git type PURL and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + {}, + id="A maven type PURL where no build tool types are discovered and no local repository is available.", ), ], ) -def test_construct_local_artifact_path_from_purl( - build_purl_type: str, +def test_get_local_artifact_paths_empty( purl_str: str, - local_artifact_repo_mapper: Mapping[str, str], - expectation: list[str], + build_tool_purl_types: list[str], + local_artifact_repo_mapper: dict[str, str], + expectation: dict[str, list[str]], ) -> None: - """Test constructing a local artifact path from a given purl.""" - component_purl = PackageURL.from_string(purl_str) + """Test getting local artifact paths where the result is empty.""" + purl = PackageURL.from_string(purl_str) assert ( - construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=component_purl, + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, local_artifact_repo_mapper=local_artifact_repo_mapper, ) == expectation @@ -83,31 +147,108 @@ def test_construct_local_artifact_path_from_purl( pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", ["maven", "pypi"], - {"maven": []}, - id="A maven type PURL where multiple build tool types are discovered. But no artifact path is available.", + {}, + id="A maven type PURL where multiple build tool types are discovered", + ), + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + id="A maven type PURL where no build tool is discovered", + ), + pytest.param( + "pkg:pypi/django@5.0.3", + [], + {}, + id="A maven type PURL where no build tool is discovered", ), ], ) -def test_get_local_artifact_paths_non_existing( +def test_get_local_artifact_paths_not_available( purl_str: str, build_tool_purl_types: list[str], expectation: dict[str, list[str]], + tmp_path: Path, ) -> None: - """Test getting local artifact paths of non existing artifacts. - - The local artifact repos are available. - """ + """Test getting local artifact paths where the artifact paths are not available.""" purl = PackageURL.from_string(purl_str) - with tempfile.TemporaryDirectory() as temp_dir: - local_artifact_repo_mapper = { - "maven": temp_dir, - "pypi": temp_dir, - } - assert ( - get_local_artifact_paths( - purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - == expectation + local_artifact_repo_mapper = { + "maven": str(tmp_path), + "pypi": str(tmp_path), + } + + assert ( + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, ) + == expectation + ) + + +def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with maven purl.""" + purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + maven_artifact_path = f"{local_artifact_repo_mapper['maven']}/com/oracle/macaron/macaron/0.13.0" + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + os.makedirs(maven_artifact_path) + + expectation = { + "maven": [maven_artifact_path], + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + + assert result == expectation + + +def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with pypi purl.""" + purl = PackageURL.from_string("pkg:pypi/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + pypi_artifact_paths = [ + f"{local_artifact_repo_mapper['pypi']}/macaron", + f"{local_artifact_repo_mapper['pypi']}/macaron-0.13.0.dist-info", + f"{local_artifact_repo_mapper['pypi']}/Macaron-0.13.0.dist-info", + ] + + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + + for artifact_path in pypi_artifact_paths: + os.makedirs(artifact_path) + + expectation = { + "pypi": sorted(pypi_artifact_paths), + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + for value in result.values(): + value.sort() + + assert result == expectation