From 4873c7134cee7ebdce33029ffea7408bd878c8ab Mon Sep 17 00:00:00 2001 From: amarin-dspace <131253145+amarin-dspace@users.noreply.github.com> Date: Mon, 27 May 2024 13:11:52 +0200 Subject: [PATCH] Added configurable disk size for all node groups (#99) --- main.tf | 5 ++++- modules/simphera_base/k8s.tf | 6 +++--- modules/simphera_base/variables.tf | 18 ++++++++++++++++++ variables.tf | 20 +++++++++++++++++++- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/main.tf b/main.tf index e30ae94..274b410 100644 --- a/main.tf +++ b/main.tf @@ -4,7 +4,7 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "3.86.0" + version = "3.92.0" } random = { version = "3.5.1" @@ -29,9 +29,11 @@ module "simphera_base" { infrastructurename = var.infrastructurename tags = var.tags linuxNodeSize = var.linuxNodeSize + linuxNodeDiskSize = var.linuxNodeDiskSize linuxNodeCountMin = var.linuxNodeCountMin linuxNodeCountMax = var.linuxNodeCountMax linuxExecutionNodeSize = var.linuxExecutionNodeSize + linuxExecutionNodeDiskSize = var.linuxExecutionNodeDiskSize linuxExecutionNodeCountMin = var.linuxExecutionNodeCountMin linuxExecutionNodeCountMax = var.linuxExecutionNodeCountMax linuxExecutionNodeDeallocate = var.linuxExecutionNodeDeallocate @@ -39,6 +41,7 @@ module "simphera_base" { gpuNodeCountMin = var.gpuNodeCountMin gpuNodeCountMax = var.gpuNodeCountMax gpuNodeSize = var.gpuNodeSize + gpuNodeDiskSize = var.gpuNodeDiskSize gpuNodeDeallocate = var.gpuNodeDeallocate ssh_public_key_path = var.ssh_public_key_path licenseServer = var.licenseServer diff --git a/modules/simphera_base/k8s.tf b/modules/simphera_base/k8s.tf index 625a5e8..6b16877 100644 --- a/modules/simphera_base/k8s.tf +++ b/modules/simphera_base/k8s.tf @@ -79,7 +79,7 @@ resource "azurerm_kubernetes_cluster" "aks" { min_count = var.linuxNodeCountMin max_count = var.linuxNodeCountMax enable_auto_scaling = true - os_disk_size_gb = 128 + os_disk_size_gb = var.linuxNodeDiskSize type = "VirtualMachineScaleSets" max_pods = 110 vnet_subnet_id = azurerm_subnet.default-node-pool-subnet.id @@ -119,7 +119,7 @@ resource "azurerm_kubernetes_cluster_node_pool" "execution-nodes" { name = "execnodes" mode = "User" orchestrator_version = var.kubernetesVersion - os_disk_size_gb = 128 + os_disk_size_gb = var.linuxExecutionNodeDiskSize kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id min_count = var.linuxExecutionNodeCountMin max_count = var.linuxExecutionNodeCountMax @@ -156,7 +156,7 @@ resource "azurerm_kubernetes_cluster_node_pool" "gpu-execution-nodes" { name = "gpuexecnodes" mode = "User" orchestrator_version = var.kubernetesVersion - os_disk_size_gb = 128 + os_disk_size_gb = var.gpuNodeDiskSize kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id min_count = var.gpuNodeCountMin max_count = var.gpuNodeCountMax diff --git a/modules/simphera_base/variables.tf b/modules/simphera_base/variables.tf index 8c870ba..756a1e5 100644 --- a/modules/simphera_base/variables.tf +++ b/modules/simphera_base/variables.tf @@ -20,6 +20,12 @@ variable "linuxNodeSize" { default = "Standard_D4s_v4" } +variable "linuxNodeDiskSize" { + type = number + description = "The disk size in GiB of the nodes for the regular services" + default = 200 +} + variable "linuxNodeCountMin" { type = number description = "The minimum number of Linux nodes for the regular services" @@ -38,6 +44,12 @@ variable "linuxExecutionNodeSize" { default = "Standard_D16s_v4" } +variable "linuxExecutionNodeDiskSize" { + type = number + description = "The disk size in GiB of the nodes for the job execution" + default = 200 +} + variable "linuxExecutionNodeCountMin" { type = number description = "The minimum number of Linux nodes for the job execution" @@ -80,6 +92,12 @@ variable "gpuNodeSize" { default = "Standard_NC16as_T4_v3" } +variable "gpuNodeDiskSize" { + type = number + description = "The disk size in GiB of the nodes for gpu job execution" + default = 200 +} + variable "gpuNodeDeallocate" { type = bool description = "Configures whether the nodes for the gpu job execution are 'Deallocated (Stopped)' by the cluster auto scaler or 'Deleted'." diff --git a/variables.tf b/variables.tf index 94629a3..8d8e5df 100644 --- a/variables.tf +++ b/variables.tf @@ -31,6 +31,12 @@ variable "linuxNodeSize" { default = "Standard_D4s_v4" } +variable "linuxNodeDiskSize" { + type = number + description = "The disk size in GiB of the nodes for the regular services" + default = 200 +} + variable "linuxNodeCountMin" { type = number description = "The minimum number of Linux nodes for the regular services" @@ -49,6 +55,12 @@ variable "linuxExecutionNodeSize" { default = "Standard_D16s_v4" } +variable "linuxExecutionNodeDiskSize" { + type = number + description = "The disk size in GiB of the nodes for the job execution" + default = 200 +} + variable "linuxExecutionNodeCountMin" { type = number description = "The minimum number of Linux nodes for the job execution" @@ -91,6 +103,12 @@ variable "gpuNodeSize" { default = "Standard_NC16as_T4_v3" } +variable "gpuNodeDiskSize" { + type = number + description = "The disk size in GiB of the gpu nodes" + default = 200 +} + variable "gpuNodeDeallocate" { type = bool description = "Configures whether the nodes for the gpu job execution are 'Deallocated (Stopped)' by the cluster auto scaler or 'Deleted'." @@ -142,7 +160,7 @@ variable "logAnalyticsWorkspaceResourceGroupName" { variable "kubernetesVersion" { type = string description = "The version of the AKS cluster." - default = "1.28.3" + default = "1.28.9" } variable "kubernetesTier" {