From 15cfd92dac08bab56bff0586670fa62dadfe0466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9g=C3=A9?= Date: Fri, 23 Feb 2024 11:08:18 +0100 Subject: [PATCH 1/2] feat: make *_LOCK_DURATION options more clear --- config/opts.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/opts.go b/config/opts.go index 122302d..5844f86 100644 --- a/config/opts.go +++ b/config/opts.go @@ -49,8 +49,8 @@ type ( Crontab string `long:"repair.crontab" env:"REPAIR_CRONTAB" description:"Crontab of check runs" default:"@every 2m"` NotReadyThreshold time.Duration `long:"repair.notready-threshold" env:"REPAIR_NOTREADY_THRESHOLD" description:"Threshold (duration) when the automatic repair should be tried (eg. after 10 mins of NotReady state after last successfull heartbeat)" default:"10m"` Limit int `long:"repair.concurrency" env:"REPAIR_CONCURRENCY" description:"How many VMs should be redeployed concurrently" default:"1"` - LockDuration time.Duration `long:"repair.lock-duration" env:"REPAIR_LOCK_DURATION" description:"Duration how long should be waited for another redeploy" default:"30m"` - LockDurationError time.Duration `long:"repair.lock-duration-error" env:"REPAIR_LOCK_DURATION_ERROR" description:"Duration how long should be waited for another redeploy in case an error occurred" default:"5m"` + LockDuration time.Duration `long:"repair.lock-duration" env:"REPAIR_LOCK_DURATION" description:"Duration how long should be waited for another redeploy on the same node" default:"30m"` + LockDurationError time.Duration `long:"repair.lock-duration-error" env:"REPAIR_LOCK_DURATION_ERROR" description:"Duration how long should be waited for another redeploy on the same node in case an error occurred" default:"5m"` AzureVmssAction string `long:"repair.azure.vmss.action" env:"REPAIR_AZURE_VMSS_ACTION" description:"Defines the action which should be tried to repair the node (VMSS)" default:"redeploy" choice:"restart" choice:"redeploy" choice:"reimage" choice:"delete"` //nolint:staticcheck AzureVmAction string `long:"repair.azure.vm.action" env:"REPAIR_AZURE_VM_ACTION" description:"Defines the action which should be tried to repair the node (VM)" default:"redeploy" choice:"restart" choice:"redeploy"` //nolint:staticcheck ProvisioningState []string `long:"repair.azure.provisioningstate" env:"REPAIR_AZURE_PROVISIONINGSTATE" description:"Azure VM provisioning states where repair should be tried (eg. avoid repair in \"upgrading\" state; \"*\" to accept all states)" default:"succeeded" default:"failed" env-delim:" "` //nolint:staticcheck @@ -62,8 +62,8 @@ type ( Update struct { Crontab string `long:"update.crontab" env:"UPDATE_CRONTAB" description:"Crontab of check runs" default:"@every 15m"` Limit int `long:"update.concurrency" env:"UPDATE_CONCURRENCY" description:"How many VMs should be updated concurrently" default:"1"` - LockDuration time.Duration `long:"update.lock-duration" env:"UPDATE_LOCK_DURATION" description:"Duration how long should be waited for another update" default:"15m"` - LockDurationError time.Duration `long:"update.lock-duration-error" env:"UPDATE_LOCK_DURATION_ERROR" description:"Duration how long should be waited for another update in case an error occurred" default:"5m"` + LockDuration time.Duration `long:"update.lock-duration" env:"UPDATE_LOCK_DURATION" description:"Duration how long should be waited for another update on the same node" default:"15m"` + LockDurationError time.Duration `long:"update.lock-duration-error" env:"UPDATE_LOCK_DURATION_ERROR" description:"Duration how long should be waited for another update on the same node in case an error occurred" default:"5m"` NodeLockAnnotation string `long:"update.lock-annotation" env:"UPDATE_LOCK_ANNOTATION" description:"Node annotation for update lock time" default:"autopilot.webdevops.io/update-lock"` NodeOngoingAnnotation string `long:"update.ongoing-annotation" env:"UPDATE_ONGOING_ANNOTATION" description:"Node annotation for ongoing update lock" default:"autopilot.webdevops.io/update-ongoing"` NodeExcludeAnnotation string `long:"update.exclude-annotation" env:"UPDATE_EXCLUDE_ANNOTATION" description:"Node annotation for excluding node for updates" default:"autopilot.webdevops.io/exclude"` From a033b4ba612aab679fa5ab0a825e2e337368dc5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9g=C3=A9?= Date: Fri, 23 Feb 2024 11:09:35 +0100 Subject: [PATCH 2/2] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 96d1daa..07b0258 100644 --- a/README.md +++ b/README.md @@ -44,9 +44,9 @@ Application Options: (default: 10m) [$REPAIR_NOTREADY_THRESHOLD] --repair.concurrency= How many VMs should be redeployed concurrently (default: 1) [$REPAIR_CONCURRENCY] - --repair.lock-duration= Duration how long should be waited for another redeploy (default: 30m) + --repair.lock-duration= Duration how long should be waited for another redeploy on the same node (default: 30m) [$REPAIR_LOCK_DURATION] - --repair.lock-duration-error= Duration how long should be waited for another redeploy in case an error + --repair.lock-duration-error= Duration how long should be waited for another redeploy on the same node in case an error occurred (default: 5m) [$REPAIR_LOCK_DURATION_ERROR] --repair.azure.vmss.action=[restart|redeploy|reimage|delete] Defines the action which should be tried to repair the node (VMSS) (default: redeploy) [$REPAIR_AZURE_VMSS_ACTION] @@ -60,9 +60,9 @@ Application Options: --update.crontab= Crontab of check runs (default: @every 15m) [$UPDATE_CRONTAB] --update.concurrency= How many VMs should be updated concurrently (default: 1) [$UPDATE_CONCURRENCY] - --update.lock-duration= Duration how long should be waited for another update (default: 15m) + --update.lock-duration= Duration how long should be waited for another update on the same node (default: 15m) [$UPDATE_LOCK_DURATION] - --update.lock-duration-error= Duration how long should be waited for another update in case an error + --update.lock-duration-error= Duration how long should be waited for another update on the same node in case an error occurred (default: 5m) [$UPDATE_LOCK_DURATION_ERROR] --update.lock-annotation= Node annotation for update lock time (default: autopilot.webdevops.io/update-lock) [$UPDATE_LOCK_ANNOTATION]