Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f495f90
Move alpine/debian converters into converters dir
jess-lowe Jan 20, 2026
db52aad
Make mirrors dir
jess-lowe Jan 20, 2026
e200103
Move nvd conversion
jess-lowe Jan 20, 2026
2ddd920
move and rename cve5 converters
jess-lowe Jan 20, 2026
f71c2ed
Fix routing
jess-lowe Jan 20, 2026
f7130cd
fix test path
jess-lowe Jan 20, 2026
f99caec
Refactor duplicate use of CPE
jess-lowe Jan 20, 2026
e0b08d9
Move CVE5 and NVD CVE models into models dir, and renamed CVE -> NVDC…
jess-lowe Jan 20, 2026
366145e
move ConversionOutcomes into models for NVD and CVE to share.
jess-lowe Jan 21, 2026
8e9cc6d
Move functions that could be shared between converters
jess-lowe Jan 21, 2026
71c74ef
Merge branch 'master' into refactor/move-converters
jess-lowe Jan 21, 2026
3d193d7
Fix dockerfile routing
jess-lowe Jan 21, 2026
53c1f7e
Merge branch 'refactor/move-converters' into refactor/consolidate-models
jess-lowe Jan 21, 2026
de599e9
Merge branch 'master' into refactor/consolidate-models
jess-lowe Jan 22, 2026
7dfa5f0
fix lint
jess-lowe Jan 22, 2026
ec8ef66
Fix importing
jess-lowe Jan 22, 2026
a7e8493
fix lint (again)
jess-lowe Jan 22, 2026
4be0335
Merge branch 'master' into refactor/consolidate-models
jess-lowe Jan 23, 2026
f07e60b
Merge branch 'master' into refactor/consolidate-models
jess-lowe Jan 23, 2026
ab2f983
Use ConversionMetrics instead of notes throughout conversion
jess-lowe Jan 21, 2026
058c133
fix errors
jess-lowe Jan 21, 2026
4bc036b
Parallelize NVD through making RepoCache mutexable
jess-lowe Jan 22, 2026
8fa92cf
fixa the issues
jess-lowe Jan 23, 2026
7e44934
Write the metrics files out properly
jess-lowe Jan 27, 2026
746a3be
fix formatting
jess-lowe Jan 27, 2026
d42fea6
Merge branch 'master' into refactor/nvd-parallel
jess-lowe Jan 27, 2026
dc89999
updaet some logger calls to use metrics.AddNotes
jess-lowe Jan 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func main() {

close(jobs)
wg.Wait()
logger.Info("Conversion run complete")
logger.Info("CVE5 Conversion run complete")
}

// worker is a function that processes CVE files from the jobs channel.
Expand Down
250 changes: 61 additions & 189 deletions vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
package main

import (
"encoding/csv"
"encoding/json"
"errors"
"flag"
"fmt"
"log/slog"
"os"
"path/filepath"
"slices"
"strings"
"sync"

"github.com/google/osv/vulnfeeds/conversion/nvd"
"github.com/google/osv/vulnfeeds/cves"
Expand All @@ -20,60 +18,28 @@ import (
"github.com/google/osv/vulnfeeds/utility/logger"
)

var ErrNoRanges = errors.New("no ranges")

var ErrUnresolvedFix = errors.New("fixes not resolved to commits")

var (
jsonPath = flag.String("nvd-json", "", "Path to NVD CVE JSON to examine.")
parsedCPEDictionary = flag.String("cpe-repos", "", "Path to JSON mapping of CPEs to repos generated by cpe-repo-gen")
outDir = flag.String("out-dir", "", "Path to output results.")
outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}")
workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.")
)
var RepoTagsCache git.RepoTagsCache
var Metrics struct {
TotalCVEs int
CVEsForApplications int
CVEsForKnownRepos int
OSVRecordsGenerated int
Outcomes map[models.CVEID]models.ConversionOutcome // Per-CVE-ID record of conversion result.
}

func loadCPEDictionary(productToRepo *cves.VendorProductToRepoMap, f string) error {
data, err := os.ReadFile(f)
if err != nil {
return err
}
var RepoTagsCache = &git.RepoTagsCache{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these should be defined in the main function and passed through arguments.

var VPRepoCache = &cves.VPRepoCache{}

return json.Unmarshal(data, &productToRepo)
}

// Output a CSV summarizing per-CVE how it was handled.
func outputOutcomes(outcomes map[models.CVEID]models.ConversionOutcome, reposForCVE map[models.CVEID][]string, directory string) error {
outcomesFile, err := os.Create(filepath.Join(directory, "outcomes.csv"))
func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error {
data, err := os.ReadFile(f)
if err != nil {
return err
}
defer outcomesFile.Close()
w := csv.NewWriter(outcomesFile)
if err := w.Write([]string{"CVE", "outcome", "repos"}); err != nil {
return err
}
for CVE, outcome := range outcomes {
// It's conceivable to have more than one repo for a CVE, so concatenate them.
r := ""
if repos, ok := reposForCVE[CVE]; ok {
r = strings.Join(repos, " ")
}
if err := w.Write([]string{string(CVE), outcome.String(), r}); err != nil {
return err
}
}
w.Flush()

if err = w.Error(); err != nil {
var tempMap cves.VendorProductToRepoMap
if err := json.Unmarshal(data, &tempMap); err != nil {
return err
}
productToRepo.M = tempMap

return nil
}
Expand All @@ -85,8 +51,6 @@ func main() {
os.Exit(1)
}

Metrics.Outcomes = make(map[models.CVEID]models.ConversionOutcome)

logger.InitGlobalLogger()

data, err := os.ReadFile(*jsonPath)
Expand All @@ -100,165 +64,73 @@ func main() {
logger.Fatal("Failed to parse NVD CVE JSON", slog.Any("err", err))
}

VPRepoCache := make(cves.VendorProductToRepoMap)

ReposForCVE := make(map[models.CVEID][]string)

if *parsedCPEDictionary != "" {
err = loadCPEDictionary(&VPRepoCache, *parsedCPEDictionary)
err = loadCPEDictionary(VPRepoCache, *parsedCPEDictionary)
if err != nil {
logger.Fatal("Failed to load parsed CPE dictionary", slog.Any("err", err))
}
logger.Info("VendorProductToRepoMap cache has entries preloaded", slog.Int("count", len(VPRepoCache)))
logger.Info("VendorProductToRepoMap cache has entries preloaded", slog.Int("count", len(VPRepoCache.M)))
}

for _, cve := range parsed.Vulnerabilities {
refs := cve.CVE.References
CPEs := cves.CPEs(cve.CVE)
CVEID := cve.CVE.ID

if len(refs) == 0 && len(CPEs) == 0 {
logger.Info("Skipping due to lack of CPEs and lack of references", slog.String("cve", string(CVEID)))
// 100% of these in 2022 were rejected CVEs

continue
}

// Edge case: No CPEs, but perhaps usable references.
if len(refs) > 0 && len(CPEs) == 0 {
repos := cves.ReposFromReferences(string(CVEID), nil, nil, refs, cves.RefTagDenyList)
if len(repos) == 0 {
logger.Warn("Failed to derive any repos and there were no CPEs", slog.String("cve", string(CVEID)))
continue
}
logger.Info("Derived repos for CVE with no CPEs", slog.String("cve", string(CVEID)), slog.Any("repos", repos))
ReposForCVE[CVEID] = repos
}

// Does it have any application CPEs? Look for pre-computed repos based on VendorProduct.
appCPECount := 0
for _, CPEstr := range cves.CPEs(cve.CVE) {
CPE, err := cves.ParseCPE(CPEstr)
if err != nil {
logger.Warn("Failed to parse CPE", slog.String("cve", string(CVEID)), slog.String("cpe", CPEstr), slog.Any("err", err))
jobs := make(chan models.NVDCVE)
var wg sync.WaitGroup

continue
}
if CPE.Part == "a" {
appCPECount += 1
}
vendorProductKey := cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}
if _, ok := VPRepoCache[vendorProductKey]; ok {
logger.Info("Pre-references, derived repos using cache", slog.String("cve", string(CVEID)), slog.Any("repos", VPRepoCache[vendorProductKey]), slog.String("vendor", CPE.Vendor), slog.String("product", CPE.Product))
if _, ok := ReposForCVE[CVEID]; !ok {
ReposForCVE[CVEID] = VPRepoCache[vendorProductKey]
continue
}
// Don't append duplicates.
for _, repo := range VPRepoCache[vendorProductKey] {
if !slices.Contains(ReposForCVE[CVEID], repo) {
ReposForCVE[CVEID] = append(ReposForCVE[CVEID], repo)
}
}
}
}
for range *workers {
wg.Add(1)
go worker(&wg, jobs, *outDir)
}

if len(CPEs) > 0 && appCPECount == 0 {
// This CVE is not for software (based on there being CPEs but not any application ones), skip.
continue
}
for _, cve := range parsed.Vulnerabilities {
jobs <- cve.CVE
}

if appCPECount > 0 {
Metrics.CVEsForApplications++
}
close(jobs)
wg.Wait()
logger.Info("NVD Conversion run complete")
}

// If there wasn't a repo from the CPE Dictionary, try and derive one from the CVE references.
if _, ok := ReposForCVE[CVEID]; !ok && len(refs) > 0 {
for _, CPEstr := range cves.CPEs(cve.CVE) {
CPE, err := cves.ParseCPE(CPEstr)
if err != nil {
logger.Warn("Failed to parse CPE", slog.String("cve", string(CVEID)), slog.String("cpe", CPEstr), slog.Any("err", err))
continue
}
// Continue to only focus on application CPEs.
if CPE.Part != "a" {
continue
}
if slices.Contains(cves.VendorProductDenyList, cves.VendorProduct{Vendor: CPE.Vendor, Product: ""}) {
continue
}
if slices.Contains(cves.VendorProductDenyList, cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}) {
continue
}
repos := cves.ReposFromReferences(string(CVEID), VPRepoCache, &cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}, refs, cves.RefTagDenyList)
if len(repos) == 0 {
logger.Warn("Failed to derive any repos", slog.String("cve", string(CVEID)), slog.String("vendor", CPE.Vendor), slog.String("product", CPE.Product))
continue
}
logger.Info("Derived repos", slog.String("cve", string(CVEID)), slog.Any("repos", repos), slog.String("vendor", CPE.Vendor), slog.String("product", CPE.Product))
ReposForCVE[CVEID] = repos
}
func processCVE(cve models.NVDCVE) error {
metrics := &models.ConversionMetrics{
CVEID: cve.ID,
CNA: "nvd",
}
repos := nvd.FindRepos(cve, VPRepoCache, metrics)
metrics.Repos = repos

var err error
switch *outFormat {
case "OSV":
err = nvd.CVEToOSV(cve, repos, RepoTagsCache, *outDir, metrics)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: RepoTagsCache and VPRepoCache should be passed in via arguments

case "PackageInfo":
err = nvd.CVEToPackageInfo(cve, repos, RepoTagsCache, *outDir, metrics)
}
// Parse this error to determine which failure mode it was
if err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cve.ID)), slog.Any("err", err))
if errors.Is(err, nvd.ErrNoRanges) {
metrics.Outcome = models.NoRanges
return err
}

logger.Info("Finished processing "+string(CVEID),
slog.String("cve", string(CVEID)),
slog.Int("cpes", len(CPEs)),
slog.Int("app_cpes", appCPECount),
slog.Int("derived_repos", len(ReposForCVE[CVEID])))

// If we've made it to here, we may have a CVE:
// * that has Application-related CPEs (so applies to software)
// * has a reference that is a known repository URL
// OR
// * a derived repository for the software package
//
// We do not yet have:
// * any knowledge of the language used
// * definitive version information

if _, ok := ReposForCVE[CVEID]; !ok {
// We have nothing useful to work with, so we'll assume it's out of scope
logger.Info("Passing due to lack of viable repository", slog.String("cve", string(CVEID)))
Metrics.Outcomes[CVEID] = models.NoRepos

continue
if errors.Is(err, nvd.ErrUnresolvedFix) {
metrics.Outcome = models.FixUnresolvable
return err
}
metrics.Outcome = models.ConversionUnknown

logger.Info("Found Repos for CVE "+string(CVEID), slog.String("cve", string(CVEID)), slog.Any("repos", ReposForCVE[CVEID]))

Metrics.CVEsForKnownRepos++
return err
}
metrics.Outcome = models.Successful

switch *outFormat {
case "OSV":
err = nvd.CVEToOSV(cve.CVE, ReposForCVE[CVEID], RepoTagsCache, *outDir)
case "PackageInfo":
err = nvd.CVEToPackageInfo(cve.CVE, ReposForCVE[CVEID], RepoTagsCache, *outDir)
}
// Parse this error to determine which failure mode it was
if err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(CVEID)), slog.Any("err", err))
if errors.Is(err, ErrNoRanges) {
Metrics.Outcomes[CVEID] = models.NoRanges
continue
}
if errors.Is(err, ErrUnresolvedFix) {
Metrics.Outcomes[CVEID] = models.FixUnresolvable
continue
}
Metrics.Outcomes[CVEID] = models.ConversionUnknown
return nil
}

continue
func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, _ string) {
defer wg.Done()
for cve := range jobs {
if err := processCVE(cve); err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cve.ID)), slog.Any("err", err))
} else {
logger.Info("Generated OSV record for "+string(cve.ID), slog.String("cve", string(cve.ID)))
}
Metrics.OSVRecordsGenerated++
Metrics.Outcomes[CVEID] = models.Successful
}
Metrics.TotalCVEs = len(parsed.Vulnerabilities)
err = outputOutcomes(Metrics.Outcomes, ReposForCVE, *outDir)
if err != nil {
// Log entry with size 1.15M exceeds maximum size of 256.0K
fmt.Fprintf(os.Stderr, "Failed to write out metrics: %v", err)
}
// Outcomes is too big to log, so zero it out.
Metrics.Outcomes = nil
logger.Info("Metrics", slog.String("path", filepath.Base(*jsonPath)), slog.Any("metrics", Metrics))
}
12 changes: 7 additions & 5 deletions vulnfeeds/cmd/pypi/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,12 @@ func main() {
Ecosystem: "PyPI",
PURL: purl,
}

metrics := &models.ConversionMetrics{
CVEID: cve.CVE.ID,
}
v := vulns.FromNVDCVE(id, cve.CVE)
v.AddPkgInfo(pkgInfo)
versions, notes := cves.ExtractVersionInfo(cve.CVE, validVersions, http.DefaultClient)
versions := cves.ExtractVersionInfo(cve.CVE, validVersions, http.DefaultClient, metrics)

vulns.AttachExtractedVersionInfo(v, versions)
if len(v.Affected[0].GetRanges()) == 0 {
Expand All @@ -192,7 +194,7 @@ func main() {
continue
}

if len(notes) > 0 && *withoutNotes {
if len(metrics.Notes) > 0 && *withoutNotes {
logger.Info("Skipping as there are notes associated with it", slog.String("path", vulnPath))
continue
}
Expand All @@ -208,9 +210,9 @@ func main() {
}

// If there are notes that require human intervention, write them to the end of the YAML.
if len(notes) > 0 {
if len(metrics.Notes) > 0 {
notesPath := filepath.Join(pkgDir, v.Id+".notes")
_, err = f.WriteString("\n# <Vulnfeeds Notes>\n# " + strings.Join(notes, "\n# "))
_, err = f.WriteString("\n# <Vulnfeeds Notes>\n# " + strings.Join(metrics.Notes, "\n# "))
if err != nil {
logger.Panic("Failed to write", slog.String("path", notesPath), slog.Any("err", err))
}
Expand Down
Loading
Loading