Skip to content

Commit

Permalink
chore: refactor the local artifact finding logic and add pypi support
Browse files Browse the repository at this point in the history
  • Loading branch information
tromai committed Sep 24, 2024
1 parent 67561c4 commit 7ac9e54
Show file tree
Hide file tree
Showing 3 changed files with 392 additions and 120 deletions.
213 changes: 159 additions & 54 deletions src/macaron/artifact/local_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,70 +3,177 @@

"""This module declares types and utilities for handling local artifacts."""

import fnmatch
import glob
import os
from collections.abc import Mapping

from packageurl import PackageURL

from macaron.artifact.maven import construct_maven_repository_path
from macaron.config.global_config import global_config


def get_local_artifact_repo_mapper() -> Mapping[str, str]:
"""Get A."""
local_artifact_mapper: dict[str, str] = {}
def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories.
if global_config.local_maven_repo:
local_artifact_mapper["maven"] = global_config.local_maven_repo
Parameters
----------
maven_purl : PackageURL
A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/[email protected])`)
if global_config.python_venv_path:
local_artifact_mapper["pypi"] = global_config.python_venv_path
Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
"""
if not maven_purl.type == "maven":
return None

return local_artifact_mapper
group = maven_purl.namespace
artifact = maven_purl.name
version = maven_purl.version

if group is None or version is None:
return None

def construct_local_artifact_paths_from_purl(
build_purl_type: str,
component_purl: PackageURL,
local_artifact_repo_mapper: Mapping[str, str],
return [construct_maven_repository_path(group, artifact, version)]


def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories.
Parameters
----------
maven_purl : PackageURL
A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/[email protected])`)
Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
"""
if not pypi_purl.type == "pypi":
return None

name = pypi_purl.name
version = pypi_purl.version

if version is None:
return None

# These patterns are from the content of a wheel file, which are extracted into the site-packages
# directory. References:
# https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents
glob_patterns = []
glob_patterns.append(name)
glob_patterns.append(f"{name}-{version}.dist-info")
glob_patterns.append(f"{name}-{version}.data")

return glob_patterns


def find_artifact_paths_from_local_maven_repo(
local_maven_repo: str,
glob_patterns: list[str],
) -> list[str] | None:
"""Return a list of existed directories within `local_maven_repo`.
Each directory path has the form ``local_maven_repo``/<artifact_specific_path>.
None means error.
"""
if not os.path.isdir(local_maven_repo):
return None

artifact_paths = []
for pattern in glob_patterns:
found_paths = glob.glob(
root_dir=local_maven_repo,
pathname=pattern,
)

for found_path in found_paths:
full_path = os.path.join(local_maven_repo, found_path)
if os.path.isdir(full_path):
artifact_paths.append(full_path)

return artifact_paths


# Assume that local_python_venv exists.
# In here we need to do it case-insensitively
# We also assume that packages are just one level down from venv_path
# The return element are relative paths from venv.
def find_artifact_paths_from_python_venv(
venv_path: str,
glob_patterns: list[str],
) -> list[str] | None:
"""Get B."""
local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type)
if local_artifact_repo is None:
"""TBD."""
if not os.path.isdir(venv_path):
return None

artifact_paths = []

try:
venv_path_entries = os.listdir(venv_path)
except (NotADirectoryError, PermissionError, FileNotFoundError):
return None

artifact_path = []
match build_purl_type:
case "maven":
group = component_purl.namespace
artifact = component_purl.name
version = component_purl.version

if group is None or version is None:
return None

artifact_path.append(
os.path.join(
local_artifact_repo,
"repository",
construct_maven_repository_path(group, artifact, version),
)
)
case "pypi":
# TODO: implement this.
pass
case _:
all_package_dirs: list[str] = []
for entry in venv_path_entries:
entry_path = os.path.join(venv_path, entry)
if os.path.isdir(entry_path):
all_package_dirs.append(entry)

for package_dir in all_package_dirs:
for pattern in glob_patterns:
if fnmatch.fnmatch(package_dir.lower(), pattern.lower()):
full_path = os.path.join(venv_path, package_dir)
artifact_paths.append(full_path)

return artifact_paths


def _get_local_artifact_path_for_build_tool_purl_type(
purl: PackageURL,
build_tool_purl_type: str,
local_artifact_repo: str,
) -> list[str] | None:
"""TBD."""
if build_tool_purl_type == "maven":
maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl)
if not maven_artifact_patterns:
return None

return artifact_path
artifact_paths = find_artifact_paths_from_local_maven_repo(
local_maven_repo=local_artifact_repo,
glob_patterns=maven_artifact_patterns,
)

if artifact_paths:
return artifact_paths

if build_tool_purl_type == "pypi":
pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl)
if not pypi_artifact_patterns:
return None

artifact_paths = find_artifact_paths_from_python_venv(
venv_path=local_artifact_repo,
glob_patterns=pypi_artifact_patterns,
)

if artifact_paths:
return artifact_paths

return None


# key: purl type
# value: list of paths
# If a key doesn't exist -> cannot construct the artifact paths for that purl type
# (no local artifact repo found or not enough information from PURL string or simply
# the PURL string is not applicable for that purl type).
# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository.
# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found.
# We assume that the paths in local_artifact_repo_mapper all exists/
def get_local_artifact_paths(
purl: PackageURL,
build_tool_purl_types: list[str],
Expand All @@ -75,22 +182,20 @@ def get_local_artifact_paths(
"""Get C."""
local_artifact_paths_purl_mapping = {}

for build_purl_type in build_tool_purl_types:
local_artfiact_paths = construct_local_artifact_paths_from_purl(
build_purl_type=build_purl_type,
component_purl=purl,
local_artifact_repo_mapper=local_artifact_repo_mapper,
)

if not local_artfiact_paths:
for build_tool_purl_type in build_tool_purl_types:
local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type)
if not local_artifact_repo:
continue

resolved_local_artifact_paths = []
artifact_paths = _get_local_artifact_path_for_build_tool_purl_type(
purl=purl,
build_tool_purl_type=build_tool_purl_type,
local_artifact_repo=local_artifact_repo,
)

for local_artifact_path in local_artfiact_paths:
if os.path.isdir(local_artifact_path):
resolved_local_artifact_paths.append(local_artifact_path)
if not artifact_paths:
continue

local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths
local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths

return local_artifact_paths_purl_mapping
36 changes: 31 additions & 5 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module handles the cloning and analyzing a Git repo."""

import glob
import logging
import os
import re
import sys
from collections.abc import Mapping
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, NamedTuple
Expand All @@ -17,7 +20,7 @@
from sqlalchemy.orm import Session

from macaron import __version__
from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper
from macaron.artifact.local_artifact import get_local_artifact_paths
from macaron.config.defaults import defaults
from macaron.config.global_config import global_config
from macaron.config.target_config import Configuration
Expand Down Expand Up @@ -472,15 +475,14 @@ def run_single(
analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url
analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest

discovered_build_toosl = (
discovered_build_tools = (
analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"]
)
build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl]
build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_tools]
analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths(
# The PURL is definitely valid here.
PackageURL.from_string(analyze_ctx.component.purl),
build_tools_purl_types,
local_artifact_repo_mapper=get_local_artifact_repo_mapper(),
local_artifact_repo_mapper=self._get_local_artifact_repo_mapper(),
)

analyze_ctx.check_results = registry.scan(analyze_ctx)
Expand Down Expand Up @@ -1131,6 +1133,30 @@ def _determine_package_registries(self, analyze_ctx: AnalyzeContext) -> None:
)
)

@staticmethod
def _get_local_artifact_repo_mapper() -> Mapping[str, str]:
"""Return the mapping between purl type and its local artifact repo path if that path exists."""
local_artifact_mapper: dict[str, str] = {}

if global_config.local_maven_repo:
m2_repository_dir = os.path.join(global_config.local_maven_repo, "repository")
if os.path.isdir(m2_repository_dir):
local_artifact_mapper["maven"] = m2_repository_dir

if global_config.python_venv_path:
site_packages_dir_pattern = os.path.join(
global_config.python_venv_path,
"lib",
"python3.*",
"site-packages",
)
site_packages_dirs = glob.glob(site_packages_dir_pattern)

if len(site_packages_dirs) == 1:
local_artifact_mapper["pypi"] = site_packages_dirs.pop()

return local_artifact_mapper


class DuplicateCmpError(DuplicateError):
"""This class is used for duplicated software component errors."""
Expand Down
Loading

0 comments on commit 7ac9e54

Please sign in to comment.