Skip to content

fix(uC/lib): handling of product names with special characters #4959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions cve_bin_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
from cve_bin_tool.package_list_parser import PackageListParser
from cve_bin_tool.sbom_manager.parse import SBOMParse
from cve_bin_tool.sbom_manager.sbom_detection import sbom_detection
from cve_bin_tool.util import ProductInfo
from cve_bin_tool.util import ProductInfo, normalize_product_name
from cve_bin_tool.version import VERSION
from cve_bin_tool.version_scanner import VersionScanner
from cve_bin_tool.vex_manager.parse import VEXParse
Expand Down Expand Up @@ -1220,7 +1220,72 @@ def main(argv=None):
return ERROR_CODES[InsufficientArgs]

if args["vex_file"] and args["filter_triage"]:
cve_scanner.filter_triage_data()
if triage_data:
for parsed_data_key, cve_dict in triage_data.items():
# Skip paths key
if "paths" in cve_dict:
continue

# Check if the product is in the scanned data
matching_key = None
for product_info in cve_scanner.all_cve_data.keys():
# Use normalize_product_name for comparison
normalized_parsed_product = normalize_product_name(
parsed_data_key.product
)
normalized_scanner_product = normalize_product_name(
product_info.product
)

if (
parsed_data_key.vendor == product_info.vendor
and normalized_parsed_product == normalized_scanner_product
and parsed_data_key.version == product_info.version
):
matching_key = product_info
break

if not matching_key:
LOGGER.info(
f"Product: {parsed_data_key.product} with Version: {parsed_data_key.version} "
f"not found in Parsed Data, is valid vex file being used?"
)
continue

# Apply triage data
for cve_id, cve_triage_data in cve_dict.items():
if cve_id in cve_scanner.all_cve_data[matching_key]["cves"]:
for i, cve in enumerate(
cve_scanner.all_cve_data[matching_key]["cves"]
):
if cve.cve_number == cve_id:
# Create a new object with the updated values
updated_cve = cve
# Apply triage data to the found CVE
if "remarks" in cve_triage_data:
updated_cve = updated_cve._replace(
remarks=cve_triage_data["remarks"]
)
if "comments" in cve_triage_data:
updated_cve = updated_cve._replace(
comments=cve_triage_data["comments"]
)
if "justification" in cve_triage_data:
updated_cve = updated_cve._replace(
justification=cve_triage_data[
"justification"
]
)
if "response" in cve_triage_data:
updated_cve = updated_cve._replace(
response=cve_triage_data["response"]
)

# Store the updated CVE back in the list
cve_scanner.all_cve_data[matching_key]["cves"][
i
] = updated_cve

# Creates an Object for OutputEngine
output = OutputEngine(
all_cve_data=cve_scanner.all_cve_data,
Expand Down
51 changes: 49 additions & 2 deletions cve_bin_tool/output_engine/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import re
from collections import Counter, defaultdict
from datetime import datetime
from logging import Logger
Expand All @@ -28,6 +29,50 @@
}


def normalize_id(text, existing_ids=None):
"""
Normalize text for use as HTML ID by replacing problematic characters.

Handles special cases to ensure valid HTML IDs:
1. Ensures IDs start with a letter
2. Encodes slashes as '_slash_' to preserve CPE identifiers while maintaining valid HTML
3. Ensures uniqueness when existing_ids is provided

Args:
text: The text to normalize
existing_ids: Optional set of existing IDs to ensure uniqueness

Returns:
A normalized string suitable for use as an HTML ID
"""
if not text:
return "id_empty"

# Ensure the ID starts with a letter
if not text[0].isalpha():
text = "id_" + text

# Replace slashes with _slash_ for valid HTML IDs
text = text.replace("/", "_slash_")

# Replace other problematic characters
result = re.sub(r"[\s.,:;?!@#$%^&*()+=\\]", "_", text)

# Clean up multiple/trailing underscores
result = re.sub(r"__+", "_", result).rstrip("_")

# Ensure uniqueness if tracking IDs
if existing_ids is not None:
original_result = result
counter = 1
while result in existing_ids:
result = f"{original_result}_{counter}"
counter += 1
existing_ids.add(result)

return result


def normalize_severity(severity: str) -> str:
"""Normalize severity values to standard format.

Expand Down Expand Up @@ -306,9 +351,11 @@ def output_html(

# hid is unique for each product
if product_info.vendor != "UNKNOWN":
hid = f"{product_info.vendor}{product_info.product}{''.join(product_info.version.split('.'))}"
hid = normalize_id(
f"{product_info.vendor}{product_info.product}{''.join(product_info.version.split('.'))}"
)
else:
hid = (
hid = normalize_id(
f"{product_info.product}{''.join(product_info.version.split('.'))}"
)
new_cves = render_cves(
Expand Down
2 changes: 1 addition & 1 deletion cve_bin_tool/output_engine/html_reports/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ function handleActive(key, id) {

function filterCVEs(remark, id) {
const classes = ['new', 'confirmed', 'mitigated', 'unexplored', 'false_positive', 'not_affected']
for (let i = 0; i < 6; i++) {
for (let i = 0; i < classes.length; i++) {
let ele = document
.getElementById(`listCVE${id}`)
.getElementsByClassName(classes[i])[0]
Expand Down
3 changes: 2 additions & 1 deletion cve_bin_tool/output_engine/html_reports/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ <h5 class="fw-light p-t-5">Paths of Scanned Files</h5>
<li class="list-group-item">
<h5 class="fw-normal">{{path}}</h5>
{% for product in all_paths[path]%}
<a id="vendorProductPill" data-bs-toggle="modal" data-bs-target="#modal{{ product }}">
<a id="vendorProductPill" data-bs-toggle="modal"
data-bs-target="#modal{{ product | replace('/', '_') | replace('\\', '_') | replace('.', '_') | replace(':', '_') }}">
<span class="badge rounded-pill bg-info">{{product}}</span>
</a>
{% endfor %}
Expand Down
12 changes: 9 additions & 3 deletions cve_bin_tool/output_engine/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ def format_output(
for cve in cve_data["cves"]:
if isinstance(cve, str):
continue

# Ensure proper remarks string value is used
remarks_value = (
cve.remarks.name if hasattr(cve.remarks, "name") else str(cve.remarks)
)

# If EPSS values are not available for a given CVE, assign them a value of "-"
probability = "-"
percentile = "-"
Expand All @@ -206,7 +212,7 @@ def format_output(
"cvss_version": str(cve.cvss_version),
"cvss_vector": cve.cvss_vector,
"paths": paths,
"remarks": cve.remarks.name,
"remarks": remarks_value,
"comments": cve.comments,
}
if metrics:
Expand Down Expand Up @@ -312,12 +318,12 @@ def group_cve_by_remark(
"""Return a dict containing CVE details dict mapped to Remark as Key.
Example:
cve_by_remark = {
cve_by_remarks = {
"NEW":[
{
"cve_number": "CVE-XXX-XXX",
"severity": "High",
"decription: "Lorem Ipsm",
"description": "Lorem Ipsum",
},
{...}
],
Expand Down
78 changes: 78 additions & 0 deletions cve_bin_tool/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,31 @@
from cve_bin_tool.log import LOGGER


def normalize_product_name(product_name: str) -> str:
r"""
Ensure product name is consistently normalized

This function handles normalization of product names,
particularly for handling slashes consistently.

Args:
product_name: The product name that may contain slashes

Returns:
A normalized product name with slashes escaped as \/
"""
# First, make sure any existing escaped slashes (\/) are temporarily marked
# to prevent double-escaping
temp_marker = "###ESCAPED_SLASH###"
marked_name = product_name.replace("\\/", temp_marker)

# Now normalize remaining unescaped slashes
normalized = marked_name.replace("/", "\\/")

# Finally, restore the original escaped slashes
return normalized.replace(temp_marker, "\\/")


class OrderedEnum(Enum):
"""
An enumeration that supports order comparisons.
Expand Down Expand Up @@ -398,6 +423,17 @@ def decode_bom_ref(ref: str):
or None if the reference cannot be decoded.

"""
# If the reference starts with urn:cbt:, use parse_urn to properly handle special characters
if ref.startswith("urn:cbt:"):
try:
vendor, product, version = parse_urn(ref)
return ProductInfo(vendor.strip(), product.strip(), version.strip())
except (ValueError, AttributeError) as e:
LOGGER.debug(f"Failed to parse URN: {ref} - Error: {e}")
# Don't return None here, continue to try other parsing methods
pass

# If the reference couldn't be handled by parse_urn, fall back to regex patterns
# urn:cbt:{bom_version}/{vendor}#{product}-{version}
urn_cbt_ref = re.compile(
r"urn:cbt:(?P<bom_version>.*?)\/(?P<vendor>.*?)#(?P<product>.*?)-(?P<version>.*)"
Expand Down Expand Up @@ -608,6 +644,48 @@ def windows_fixup(filename):
return filename.replace(":", "_").replace("\\", "_")


def generate_urn(vendor, product, version):
"""Generates a URN for a given vendor, product, version combo."""
return f"urn:cbt:1/{vendor}#{product}:{version}"


def parse_urn(urn_string):
"""
Parse a URN string of the format urn:cbt:1/vendorname#productname:version
where product name might contain slashes.

Returns tuple of (vendor, product, version)
"""
try:
# Remove the urn:cbt: prefix
urn_parts = urn_string.replace("urn:cbt:", "")

# Split by the first slash to get the version_part and the rest
version_part, rest = urn_parts.split("/", 1)

# Find the position of the '#' which separates vendor and product
hash_pos = rest.find("#")
if hash_pos == -1:
raise ValueError("Invalid URN format: missing '#' separator")

vendor = rest[:hash_pos]

# Find the position of the ':' which separates product and version
colon_pos = rest.find(":", hash_pos)
if colon_pos == -1:
raise ValueError("Invalid URN format: missing ':' separator")

product = rest[hash_pos + 1 : colon_pos]
version = rest[colon_pos + 1 :]

# Ensure consistent handling of slashes in product names
product = product.replace("\\/", "/")

return vendor, product, version
except (ValueError, AttributeError) as e:
raise ValueError(f"Unable to parse URN '{urn_string}': {str(e)}")


def strip_path(path_element: str, scanned_dir: str) -> str:
path = Path(path_element)
return path.drive + path.root + os.path.relpath(path_element, scanned_dir)
57 changes: 49 additions & 8 deletions cve_bin_tool/vex_manager/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
from lib4vex.parser import VEXParser

from cve_bin_tool.log import LOGGER
from cve_bin_tool.util import ProductInfo, Remarks, decode_bom_ref, decode_purl
from cve_bin_tool.util import (
ProductInfo,
Remarks,
decode_bom_ref,
decode_purl,
normalize_product_name,
parse_urn,
)

TriageData = Dict[str, Union[Dict[str, Any], Set[str]]]

Expand Down Expand Up @@ -124,17 +131,51 @@ def __process_vulnerabilities(self, vulnerabilities) -> None:
product_info = None
serialNumber = ""
if self.vextype == "cyclonedx":
decoded_ref = decode_bom_ref(vuln.get("bom_link"))
if isinstance(decoded_ref, tuple) and not isinstance(
decoded_ref, ProductInfo
):
product_info, serialNumber = decoded_ref
self.serialNumbers.add(serialNumber)
# First try with the parse_urn function to handle slashes in product names
if vuln.get("bom_link") and vuln.get("bom_link").startswith("urn:cbt:"):
try:
vendor, product, version = parse_urn(vuln.get("bom_link"))
# Ensure product name is consistent with how it's stored in scanner data
product = normalize_product_name(product)
product_info = ProductInfo(
vendor=vendor.strip(),
product=product.strip(),
version=version.strip(),
)
self.logger.debug(
f"Successfully parsed URN: {vuln.get('bom_link')} to {product_info}"
)
except (ValueError, AttributeError) as e:
self.logger.debug(
f"Error parsing URN '{vuln.get('bom_link')}': {str(e)}"
)
# If the custom parse fails, fall back to decode_bom_ref
decoded_ref = decode_bom_ref(vuln.get("bom_link"))
if decoded_ref:
product_info = decoded_ref
else:
product_info = decoded_ref
# Fall back to decode_bom_ref for other formats
decoded_ref = decode_bom_ref(vuln.get("bom_link"))
if isinstance(decoded_ref, tuple) and not isinstance(
decoded_ref, ProductInfo
):
product_info, serialNumber = decoded_ref
self.serialNumbers.add(serialNumber)
else:
product_info = decoded_ref
elif self.vextype in ["openvex", "csaf"]:
product_info = decode_purl(vuln.get("purl"))
if product_info and hasattr(product_info, "purl"):
# Create a new ProductInfo without the location field
product_info = ProductInfo(
vendor=product_info.vendor,
product=product_info.product,
version=product_info.version,
purl=product_info.purl,
)

if product_info:
self.logger.debug(f"Processing vuln with product_info: {product_info}")
cve_data = {
"remarks": remarks,
"comments": comments if comments else "",
Expand Down
Loading
Loading