Skip to content

Commit

Permalink
Merge pull request #43 from nebius/feature/dcgm-service-monitor
Browse files Browse the repository at this point in the history
Allow enabling DCGM ServiceMonitor
  • Loading branch information
dstaroff authored Oct 22, 2024
2 parents beb828a + 6e556ec commit 6253b42
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
5 changes: 3 additions & 2 deletions modules/gpu-operator/helm.tf → modules/gpu-operator/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ resource "nebius_applications_v1alpha1_k8s_release" "gpu-operator" {

application_name = "gpu-operator"
namespace = "gpu-operator"
product_slug = "nebius/nvidia-gpu-operator"
product_slug = var.product_slug

set = {
"driver.version" : var.driver_version
"driver.version" : var.driver_version,
"dcgmExporter.serviceMonitor.enabled" : var.enable_dcgm_service_monitor
}
}
8 changes: 7 additions & 1 deletion modules/gpu-operator/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ variable "product_slug" {
}

variable "driver_version" {
description = "Enable Node Feature Discovery."
description = "GPU driver version."
type = string
default = "550.54.15"
}

variable "enable_dcgm_service_monitor" {
description = "Whether to enable DCGM service monitor."
type = bool
default = false
}
File renamed without changes.

0 comments on commit 6253b42

Please sign in to comment.