Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: obtain Java and Python artifacts from .m2 or Python virtual environment from input (WIP) #864

Draft
wants to merge 5 commits into
base: staging
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion scripts/release_scripts/run_macaron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,18 @@ if [[ $command == "analyze" ]]; then
python_venv_path="$2"
shift
;;
--local-maven-repo)
local_maven_repo="$2"
shift
;;
*)
rest_command+=("$1")
;;
esac
shift
done
elif [[ $command == "verify-policy" ]]; then
while [[ $# -gt 0 ]]; do
while [[ $# -gt 0 ]]; do
case $1 in
-d|--database)
arg_database="$2"
Expand All @@ -351,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then
esac
shift
done
elif [[ $command == "dump-defaults" ]]; then
while [[ $# -gt 0 ]]; do
case $1 in
*)
rest_command+=("$1")
;;
esac
shift
done
fi

# MACARON entrypoint - Main argvs
Expand Down Expand Up @@ -455,6 +468,37 @@ if [[ -n "${python_venv_path:-}" ]]; then
mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container"
fi

# Mount the local Maven repo into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly.
if [[ -n "${local_maven_repo:-}" ]]; then
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container"
else
# Perform default local maven repo when the user doesn't provide --local-maven-repo and `analyze` command is used.
if [[ "$command" == "analyze" ]]; then
# We mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists.
if [[ -d "$HOME/.m2" ]]; then
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container"
# If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly
# into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly.
# This is because we don't want Macaron running within
# the container to use `$HOME/.m2` within the container as it is being used
# by the cyclonedx plugins for dependency resolution.
else
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
output_local_maven_repo="${output}/analyze_local_maven_repo_readonly"
mkdir -p "$output_local_maven_repo"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container"
fi
fi
fi

# MACARON entrypoint - verify-policy command argvs
# This is for macaron verify-policy command.
# Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db
Expand Down
31 changes: 31 additions & 0 deletions src/macaron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None
sys.exit(os.EX_OSFILE)
global_config.load_python_venv(analyzer_single_args.python_venv)

# Set Python virtual environment path.
if analyzer_single_args.local_maven_repo is None:
# Load the default user local .m2 directory.
# Exit on error if $HOME is not set or empty.
home_dir = os.getenv("HOME")
if not home_dir:
logger.critical("Environment variable HOME is not set.")
sys.exit(os.EX_USAGE)

local_maven_repo = os.path.join(home_dir, ".m2")
if not os.path.isdir(local_maven_repo):
logger.debug("The default local Maven repo at %s does not exist. Ignore ...")
global_config.local_maven_repo = None

global_config.local_maven_repo = local_maven_repo
else:
user_provided_local_maven_repo = analyzer_single_args.local_maven_repo
if not os.path.exists(user_provided_local_maven_repo) or not os.path.isdir(user_provided_local_maven_repo):
logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo)
sys.exit(os.EX_USAGE)

global_config.local_maven_repo = user_provided_local_maven_repo

analyzer = Analyzer(global_config.output_path, global_config.build_log_path)

# Initiate reporters.
Expand Down Expand Up @@ -393,6 +416,14 @@ def main(argv: list[str] | None = None) -> None:
help=("The path to the Python virtual environment of the target software component."),
)

single_analyze_parser.add_argument(
"--local-maven-repo",
required=False,
help=(
"The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2"
),
)

# Dump the default values.
sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.")

Expand Down
201 changes: 201 additions & 0 deletions src/macaron/artifact/local_artifact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module declares types and utilities for handling local artifacts."""

import fnmatch
import glob
import os
from collections.abc import Mapping

from packageurl import PackageURL

from macaron.artifact.maven import construct_maven_repository_path


def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories.

Parameters
----------
maven_purl : PackageURL
A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/[email protected])`)

Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
"""
if not maven_purl.type == "maven":
return None

group = maven_purl.namespace
artifact = maven_purl.name
version = maven_purl.version

if group is None or version is None:
return None

return [construct_maven_repository_path(group, artifact, version)]


def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories.

Parameters
----------
maven_purl : PackageURL
A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/[email protected])`)

Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
"""
if not pypi_purl.type == "pypi":
return None

name = pypi_purl.name
version = pypi_purl.version

if version is None:
return None

# These patterns are from the content of a wheel file, which are extracted into the site-packages
# directory. References:
# https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents
glob_patterns = []
glob_patterns.append(name)
glob_patterns.append(f"{name}-{version}.dist-info")
glob_patterns.append(f"{name}-{version}.data")

return glob_patterns


def find_artifact_paths_from_local_maven_repo(
local_maven_repo: str,
glob_patterns: list[str],
) -> list[str] | None:
"""Return a list of existed directories within `local_maven_repo`.

Each directory path has the form ``local_maven_repo``/<artifact_specific_path>.

None means error.
"""
if not os.path.isdir(local_maven_repo):
return None

artifact_paths = []
for pattern in glob_patterns:
found_paths = glob.glob(
root_dir=local_maven_repo,
pathname=pattern,
)

for found_path in found_paths:
full_path = os.path.join(local_maven_repo, found_path)
if os.path.isdir(full_path):
artifact_paths.append(full_path)

return artifact_paths


# Assume that local_python_venv exists.
# In here we need to do it case-insensitively
# We also assume that packages are just one level down from venv_path
# The return element are relative paths from venv.
def find_artifact_paths_from_python_venv(
venv_path: str,
glob_patterns: list[str],
) -> list[str] | None:
"""TBD."""
if not os.path.isdir(venv_path):
return None

artifact_paths = []

try:
venv_path_entries = os.listdir(venv_path)
except (NotADirectoryError, PermissionError, FileNotFoundError):
return None

all_package_dirs: list[str] = []
for entry in venv_path_entries:
entry_path = os.path.join(venv_path, entry)
if os.path.isdir(entry_path):
all_package_dirs.append(entry)

for package_dir in all_package_dirs:
for pattern in glob_patterns:
if fnmatch.fnmatch(package_dir.lower(), pattern.lower()):
full_path = os.path.join(venv_path, package_dir)
artifact_paths.append(full_path)

return artifact_paths


def _get_local_artifact_path_for_build_tool_purl_type(
purl: PackageURL,
build_tool_purl_type: str,
local_artifact_repo: str,
) -> list[str] | None:
"""TBD."""
if build_tool_purl_type == "maven":
maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl)
if not maven_artifact_patterns:
return None

artifact_paths = find_artifact_paths_from_local_maven_repo(
local_maven_repo=local_artifact_repo,
glob_patterns=maven_artifact_patterns,
)

if artifact_paths:
return artifact_paths

if build_tool_purl_type == "pypi":
pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl)
if not pypi_artifact_patterns:
return None

artifact_paths = find_artifact_paths_from_python_venv(
venv_path=local_artifact_repo,
glob_patterns=pypi_artifact_patterns,
)

if artifact_paths:
return artifact_paths

return None


# key: purl type
# value: list of paths
# If a key doesn't exist -> cannot construct the artifact paths for that purl type
# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found.
# We assume that the paths in local_artifact_repo_mapper all exists/
def get_local_artifact_paths(
purl: PackageURL,
build_tool_purl_types: list[str],
local_artifact_repo_mapper: Mapping[str, str],
) -> dict[str, list[str]]:
"""Get C."""
local_artifact_paths_purl_mapping = {}

for build_tool_purl_type in build_tool_purl_types:
local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type)
if not local_artifact_repo:
continue

artifact_paths = _get_local_artifact_path_for_build_tool_purl_type(
purl=purl,
build_tool_purl_type=build_tool_purl_type,
local_artifact_repo=local_artifact_repo,
)

if not artifact_paths:
continue

local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths

return local_artifact_paths_purl_mapping
38 changes: 38 additions & 0 deletions src/macaron/artifact/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,41 @@ def create_maven_purl_from_artifact_filename(
)

return None


def construct_maven_repository_path(
group_id: str,
artifact_id: str | None = None,
version: str | None = None,
asset_name: str | None = None,
) -> str:
"""Construct a path to a folder or file on the registry, assuming Maven repository layout.

For more details regarding Maven repository layout, see the following:
- https://maven.apache.org/repository/layout.html
- https://maven.apache.org/guides/mini/guide-naming-conventions.html

Parameters
----------
group_id : str
The group id of a Maven package.
artifact_id : str
The artifact id of a Maven package.
version : str
The version of a Maven package.
asset_name : str
The asset name.

Returns
-------
str
The path to a folder or file on the registry.
"""
path = group_id.replace(".", "/")
if artifact_id:
path = "/".join([path, artifact_id])
if version:
path = "/".join([path, version])
if asset_name:
path = "/".join([path, asset_name])
return path
3 changes: 3 additions & 0 deletions src/macaron/config/global_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class GlobalConfig:
#: The path to Python virtual environment.
python_venv_path: str = ""

#: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory.
local_maven_repo: str | None = None

def load(
self,
macaron_path: str,
Expand Down
3 changes: 3 additions & 0 deletions src/macaron/slsa_analyzer/analyze_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class ChecksOutputs(TypedDict):
"""The commit digest extracted from provenance, if applicable."""
provenance_verified: bool
"""True if the provenance exists and has been verified against a signed companion provenance."""
local_artifact_paths: dict[str, list[str]]
"""The mapping between purl types and the local artifact absolute paths."""


class AnalyzeContext:
Expand Down Expand Up @@ -106,6 +108,7 @@ def __init__(
provenance_repo_url=None,
provenance_commit_digest=None,
provenance_verified=False,
local_artifact_paths={},
)

@property
Expand Down
Loading
Loading