diff --git a/code/formatters/spdx.py b/code/formatters/spdx.py index f5f11d3..81f48c4 100644 --- a/code/formatters/spdx.py +++ b/code/formatters/spdx.py @@ -16,6 +16,8 @@ from json import dumps, loads from uuid import uuid4 from io import StringIO +from spdx.checksum import Algorithm, sha1, sha512 +import hashlib APP_VERSION = os.environ.get("SERVICE_VERSION", "0.0.0") @@ -36,7 +38,12 @@ def create_base_spdx_document(name) -> Document: return newDocument def create_spdx_namespace(documentName: str) -> str: - return f'{SPDX_NAMESPACE_BASE}{documentName}-{uuid4()}' + namespace = f'{SPDX_NAMESPACE_BASE}{documentName}-{uuid4()}' + sha1_hash = sha1(namespace.encode('utf-8')).hexdigest() + sha512_hash = sha512(namespace.encode('utf-8')).hexdigest() + # return a dictionary containing both hash values + return {'sha1': sha1_hash, 'sha512': sha512_hash} + def document_to_json(SPDX_document: Document) -> str: out_buffer = StringIO() @@ -80,7 +87,7 @@ def format_dependencies(cls, dependencies: List[ExtractedDependency]): @classmethod def format_report(cls, report: DocumentReport): SPDX_document = create_base_spdx_document('LogFileSource') - SPDX_document.package = Package(name=SPDX_document.name,spdx_id=SPDX_document.spdx_id) + SPDX_document.package = Package(name=SPDX_document.name, spdx_id=SPDX_document.spdx_id) for finding in report.findings: finding_annotation = Annotation() finding_annotation.annotator = SPDX_TOOL @@ -89,19 +96,29 @@ def format_report(cls, report: DocumentReport): finding_annotation.annotation_type = "OTHER" finding_annotation.comment = finding.json() SPDX_document.add_annotation(finding_annotation) + dependent_documents = [] for dependency in report.dependencies: dep_document = create_base_spdx_document(dependency.name) - dep_document.package = Package(name=dependency.name, spdx_id=dep_document.spdx_id, download_location=dependency.download_location, version=dependency.version) + dep_document.package = Package(name=dependency.name, spdx_id=dep_document.spdx_id, + download_location=dependency.download_location, version=dependency.version) + checksums = create_spdx_namespace(dependency.name) + # set SHA1 checksum + dep_document_checksum = Algorithm(identifier="SHA1", value=checksums['sha1']) + Algorithm(identifier="SHA-512", value=hashlib.sha512(cls.__TO_METHOD__(dep_document).encode()).hexdigest()) + # set SHA512 checksum + dep_document_checksum.add_algorithm(Algorithm(identifier="SHA512", value=checksums['sha512'])) SPDX_document.ext_document_references.append( ExternalDocumentRef( external_document_id=dep_document.spdx_id, - check_sum=Algorithm(identifier="SHA1",value=sha1(cls.__TO_METHOD__(dep_document).encode()).hexdigest()), + check_sum=dep_document_checksum, spdx_document_uri=dep_document.namespace - ) - ) + ) + ) dependent_documents.append(dep_document) + return (SPDX_document, dependent_documents) + class SPDXJsonFormatter(SPDXBaseFormatter): diff --git a/code/parsers/yum.py b/code/parsers/yum.py index 5d97bc7..3a2befd 100644 --- a/code/parsers/yum.py +++ b/code/parsers/yum.py @@ -27,14 +27,15 @@ class YumParser(ParserBase): def on_load(self) -> None: self.yumblock_extractor_regex = re.compile( "^\W*=+$(\n|\r\n|\W)*Package(\n|\r\n|\W)+Arch(itecture)?(\n|\r\n|\W)+Version(\n|\r\n|\W)+Repository(\n|\r\n|\W)+Size(\n|\r\n|\W)^\W*=+$([\s\S]*?)Transaction Summary(\n|\r\n)^\W*=+$", - re.MULTILINE, + re.MULTILINE | re.IGNORECASE, ) self.dependency_extractor_regex = re.compile( "( (?P\S+)\s+(?P\S+)\s+(?P\d\S+)\s+(?P\S+)\s+[\d\.]+ [kMGb])" + re.IGNORECASE, ) self.fastest_mirror_block_detection = re.compile( "((Determining fastest mirrors|Loading mirror speeds from cached hostfile)(\n|\r\n)(\s\*\s.*(\n|\r\n))+)", - re.MULTILINE, + re.MULTILINE | re.IGNORECASE, ) self.mirror_mapping_extraction = re.compile("\s\*\s(\w+):\s(\S+)")