From d593f2fc24a67a8811cc59abe9de1f934710fd40 Mon Sep 17 00:00:00 2001 From: Arvind Shyamsundar Date: Mon, 13 Nov 2023 10:38:13 -0800 Subject: [PATCH] Add support for Azure Lsv3 VMs (#435) Adds the Lsv3 VM SKUs to the list of VMs which have NVME storage. --- conf/muchos.props.example | 4 ++-- docs/azure-ephemeral-disks.md | 8 ++++---- lib/muchos/config/azure.py | 20 ++++++++++++++++---- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/conf/muchos.props.example b/conf/muchos.props.example index 4d59a65c..24f8f59b 100644 --- a/conf/muchos.props.example +++ b/conf/muchos.props.example @@ -140,14 +140,14 @@ azure_image_reference = CentOS|OpenLogic|7_9|latest| # The minimum allowed size for this is 3 nodes for non-HA & 4 nodes for HA setup numnodes = 8 # The size of each virtual machine. See the following link for other sizes: -# https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general +# https://learn.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general vm_sku = Standard_D8s_v3 # Each VM will be provisioned with the following type of managed disk # The azure_disk_device* parameters below specify the Linux device paths Muchos looks for when selecting disks for storage # The default values below are for using Azure managed disks azure_disk_device_path = /dev/disk/azure/scsi1 azure_disk_device_pattern = lun* -# If using Azure Lsv2 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults +# If using Azure Lsv2 or Lsv3 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults # azure_disk_device_path = /dev # azure_disk_device_pattern = nvme*n1 # Type of the data disk attached to the VMSS. 'Standard_LRS' for HDD, 'Premium_LRS' for SSD, 'StandardSSD_LRS' for Standard SSD diff --git a/docs/azure-ephemeral-disks.md b/docs/azure-ephemeral-disks.md index b4e2cf58..221293e7 100644 --- a/docs/azure-ephemeral-disks.md +++ b/docs/azure-ephemeral-disks.md @@ -2,15 +2,15 @@ Using ephemeral storage within clusters deployed by Muchos for Azure -------------------------------------------------------------------- By default for Azure based clusters, Muchos will create 3 data disks, each of size 128GiB, attached to each VM. These -[managed disks](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/managed-disks-overview) provide +[managed disks](https://learn.microsoft.com/en-us/azure/virtual-machines/managed-disks-overview) provide persistent storage which ensures that the data in HDFS is safe and consistent even if the VMs are deallocated (stopped). However, if you'd like to use only the ephemeral / temporary disk storage for HDFS, you first need to understand that using temp storage will result in lost data across VM deallocate - start cycles. If that behavior is acceptable for your dev/test scenario, there are two options available to use ephemeral storage within Azure: * Use the temporary SSD disk which is available on most VM types. This tends to be smaller in size. Refer to the -[Azure VM sizes](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series) page for details on temp storage sizes -* Use the [Lsv2 series VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/lsv2-series) which offer larger amounts of NVME based temp storage +[Azure VM sizes](https://learn.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series) page for details on temp storage sizes +* Use the [Lsv2 series VMs](https://learn.microsoft.com/en-us/azure/virtual-machines/lsv2-series), [Lasv3 series VMs](https://learn.microsoft.com/en-us/azure/virtual-machines/lasv3-series) or [Lsv3 series VMs](https://learn.microsoft.com/en-us/azure/virtual-machines/lsv3-series) which offer larger amounts of NVME based temp storage For using "regular" temporary storage (non-NVME), you need to change the following within the `azure` section within muchos.props: * `data_disk_count` needs to be set to 0 @@ -18,7 +18,7 @@ For using "regular" temporary storage (non-NVME), you need to change the followi If you'd like larger NVME temporary disks, another option is to use the storage-optimized Lsv2 VM type in Azure. To use the NVME disks available in these VMs, you must change the following within the `azure` section within muchos.props: -* `vm_sku` needs to be set to one of the sizes from [this page](https://docs.microsoft.com/en-us/azure/virtual-machines/lsv2-series), for example Standard_L8s_v2 +* `vm_sku` needs to be set to one of the sizes from [this page](https://learn.microsoft.com/en-us/azure/virtual-machines/lsv2-series), [this page](https://learn.microsoft.com/en-us/azure/virtual-machines/lasv3-series) or [this page](https://learn.microsoft.com/en-us/azure/virtual-machines/lsv3-series), for example Standard_L8s_v2. * `data_disk_count` needs to be set to 0 * `mount_root` within the `azure` section should be set to `/var/data` (which is also the default) * `azure_disk_device_path` should be set to `/dev` diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py index f90810ba..7aac229a 100644 --- a/lib/muchos/config/azure.py +++ b/lib/muchos/config/azure.py @@ -102,20 +102,32 @@ def data_dirs_internal( data_dirs.append(mount_root_actual) return data_dirs - # Check if using Lsv2 NVME temp storage for HDFS - lsv2_vm_disk_map = { + # Check if using Lsv2 or Lsv3 NVME temp storage for HDFS + nvme_vm_disk_map = { "Standard_L8s_v2": 1, "Standard_L16s_v2": 2, "Standard_L32s_v2": 4, "Standard_L48s_v2": 6, "Standard_L64s_v2": 8, "Standard_L80s_v2": 10, + "Standard_L8s_v3": 1, + "Standard_L16s_v3": 2, + "Standard_L32s_v3": 4, + "Standard_L48s_v3": 6, + "Standard_L64s_v3": 8, + "Standard_L80s_v3": 10, + "Standard_L8as_v3": 1, + "Standard_L16as_v3": 2, + "Standard_L32as_v3": 4, + "Standard_L48as_v3": 6, + "Standard_L64as_v3": 8, + "Standard_L80as_v3": 10, } - if num_disks == 0 and curr_vm_sku in lsv2_vm_disk_map.keys(): + if num_disks == 0 and curr_vm_sku in nvme_vm_disk_map.keys(): # pretend that we have N data disks # in this case those are NVME temp disks - num_disks = lsv2_vm_disk_map[curr_vm_sku] + num_disks = nvme_vm_disk_map[curr_vm_sku] # Persistent data disks attached to VMs range_var = num_disks + 1