Skip to content

Commit 8f0207a

Browse files
authored
feat: add config variable POD_TERMINATION_GRACE_PERIOD (#29)
Fixes #28
1 parent fc91d4a commit 8f0207a

File tree

6 files changed

+56
-43
lines changed

6 files changed

+56
-43
lines changed

README.md

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,20 @@ Therefore, this application will not run into any issues if it is restarted, res
4040

4141
## Usage
4242

43-
| Environment variable | Description | Required | Default |
44-
|:-------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:------------|
45-
| CLUSTER_NAME | Name of the eks-cluster, used in place of `AUTODISCOVERRY_TAGS` and `AUTO_SCALING_GROUP_NAMES`. Checks for `k8s.io/cluster-autoscaler/<CLUSTER_NAME>: owned` and `k8s.io/cluster-autoscaler/enabled: true` tags on ASG | yes | `""` |
46-
| AUTODISCOVERY_TAGS | Comma separated key value string with tags to autodiscover ASGs, used in place of `CLUSTER_NAME` and `AUTO_SCALING_GROUP_NAMES`. | yes | `""` |
47-
| AUTO_SCALING_GROUP_NAMES | Comma-separated list of ASGs, CLUSTER_NAME takes priority. | yes | `""` |
48-
| IGNORE_DAEMON_SETS | Whether to ignore DaemonSets when draining the nodes | no | `true` |
49-
| DELETE_EMPTY_DIR_DATA | Whether to delete empty dir data when draining the nodes | no | `true` |
50-
| AWS_REGION | Self-explanatory | no | `us-west-2` |
51-
| ENVIRONMENT | If set to `dev`, will try to create the Kubernetes client using your local kubeconfig. Any other values will use the in-cluster configuration | no | `""` |
52-
| EXECUTION_INTERVAL | Duration to sleep between each execution in seconds | no | `20` |
53-
| EXECUTION_TIMEOUT | Maximum execution duration before timing out in seconds | no | `900` |
54-
| METRICS_PORT | Port to bind metrics server to | no | `8080` |
55-
| METRICS | Expose metrics in Promtheus format at `:${METRICS_PORT}/metrics` | no | `""` |
43+
| Environment variable | Description | Required | Default |
44+
|:-----------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:------------|
45+
| CLUSTER_NAME | Name of the eks-cluster, used in place of `AUTODISCOVERRY_TAGS` and `AUTO_SCALING_GROUP_NAMES`. Checks for `k8s.io/cluster-autoscaler/<CLUSTER_NAME>: owned` and `k8s.io/cluster-autoscaler/enabled: true` tags on ASG | yes | `""` |
46+
| AUTODISCOVERY_TAGS | Comma separated key value string with tags to autodiscover ASGs, used in place of `CLUSTER_NAME` and `AUTO_SCALING_GROUP_NAMES`. | yes | `""` |
47+
| AUTO_SCALING_GROUP_NAMES | Comma-separated list of ASGs, CLUSTER_NAME takes priority. | yes | `""` |
48+
| IGNORE_DAEMON_SETS | Whether to ignore DaemonSets when draining the nodes | no | `true` |
49+
| DELETE_EMPTY_DIR_DATA | Whether to delete empty dir data when draining the nodes | no | `true` |
50+
| AWS_REGION | Self-explanatory | no | `us-west-2` |
51+
| ENVIRONMENT | If set to `dev`, will try to create the Kubernetes client using your local kubeconfig. Any other values will use the in-cluster configuration | no | `""` |
52+
| EXECUTION_INTERVAL | Duration to sleep between each execution in seconds | no | `20` |
53+
| EXECUTION_TIMEOUT | Maximum execution duration before timing out in seconds | no | `900` |
54+
| POD_TERMINATION_GRACE_PERIOD | How long to wait for a pod to terminate in seconds; 0 means "delete immediately"; set to a negative value to use the pod's terminationGracePeriodSeconds. | no | `-1` |
55+
| METRICS_PORT | Port to bind metrics server to | no | `8080` |
56+
| METRICS | Expose metrics in Promtheus format at `:${METRICS_PORT}/metrics` | no | `""` |
5657

5758

5859
## Metrics

config/config.go

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,35 @@ import (
1212
var cfg *config
1313

1414
const (
15-
EnvEnvironment = "ENVIRONMENT"
16-
EnvDebug = "DEBUG"
17-
EnvIgnoreDaemonSets = "IGNORE_DAEMON_SETS"
18-
EnvDeleteLocalData = "DELETE_LOCAL_DATA" // Deprecated: in favor of DeleteEmptyDirData (DELETE_EMPTY_DIR_DATA)
19-
EnvDeleteEmptyDirData = "DELETE_EMPTY_DIR_DATA"
20-
EnvClusterName = "CLUSTER_NAME"
21-
EnvAutodiscoveryTags = "AUTODISCOVERY_TAGS"
22-
EnvAutoScalingGroupNames = "AUTO_SCALING_GROUP_NAMES"
23-
EnvAwsRegion = "AWS_REGION"
24-
EnvExecutionInterval = "EXECUTION_INTERVAL"
25-
EnvExecutionTimeout = "EXECUTION_TIMEOUT"
26-
EnvMetrics = "METRICS"
27-
EnvMetricsPort = "METRICS_PORT"
15+
EnvEnvironment = "ENVIRONMENT"
16+
EnvDebug = "DEBUG"
17+
EnvIgnoreDaemonSets = "IGNORE_DAEMON_SETS"
18+
EnvDeleteLocalData = "DELETE_LOCAL_DATA" // Deprecated: in favor of DeleteEmptyDirData (DELETE_EMPTY_DIR_DATA)
19+
EnvDeleteEmptyDirData = "DELETE_EMPTY_DIR_DATA"
20+
EnvClusterName = "CLUSTER_NAME"
21+
EnvAutodiscoveryTags = "AUTODISCOVERY_TAGS"
22+
EnvAutoScalingGroupNames = "AUTO_SCALING_GROUP_NAMES"
23+
EnvAwsRegion = "AWS_REGION"
24+
EnvExecutionInterval = "EXECUTION_INTERVAL"
25+
EnvExecutionTimeout = "EXECUTION_TIMEOUT"
26+
EnvPodTerminationGracePeriod = "POD_TERMINATION_GRACE_PERIOD"
27+
EnvMetrics = "METRICS"
28+
EnvMetricsPort = "METRICS_PORT"
2829
)
2930

3031
type config struct {
31-
Environment string // Optional
32-
Debug bool // Defaults to false
33-
AutoScalingGroupNames []string // Required if AutodiscoveryTags not provided
34-
AutodiscoveryTags string // Required if AutoScalingGroupNames not provided
35-
AwsRegion string // Defaults to us-west-2
36-
IgnoreDaemonSets bool // Defaults to true
37-
DeleteEmptyDirData bool // Defaults to true
38-
ExecutionInterval time.Duration // Defaults to 20s
39-
ExecutionTimeout time.Duration // Defaults to 900s
40-
Metrics bool // Defaults to false
41-
MetricsPort int // Defaults to 8080
32+
Environment string // Optional
33+
Debug bool // Defaults to false
34+
AutoScalingGroupNames []string // Required if AutodiscoveryTags not provided
35+
AutodiscoveryTags string // Required if AutoScalingGroupNames not provided
36+
AwsRegion string // Defaults to us-west-2
37+
IgnoreDaemonSets bool // Defaults to true
38+
DeleteEmptyDirData bool // Defaults to true
39+
ExecutionInterval time.Duration // Defaults to 20s
40+
ExecutionTimeout time.Duration // Defaults to 900s
41+
PodTerminationGracePeriod int // Defaults to -1
42+
Metrics bool // Defaults to false
43+
MetricsPort int // Defaults to 8080
4244
}
4345

4446
// Initialize is used to initialize the application's configuration
@@ -111,6 +113,16 @@ func Initialize() error {
111113
log.Printf("Environment variable '%s' not specified, defaulting to 900 seconds", EnvExecutionTimeout)
112114
cfg.ExecutionTimeout = time.Second * 900
113115
}
116+
if terminationGracePeriod := os.Getenv(EnvPodTerminationGracePeriod); len(terminationGracePeriod) > 0 {
117+
if gracePeriod, err := strconv.Atoi(terminationGracePeriod); err != nil {
118+
return fmt.Errorf("environment variable '%s' must be an integer", EnvPodTerminationGracePeriod)
119+
} else {
120+
cfg.PodTerminationGracePeriod = gracePeriod
121+
}
122+
} else {
123+
log.Printf("Environment variable '%s' not specified, defaulting to -1 (pod's terminationGracePeriodSeconds)", EnvPodTerminationGracePeriod)
124+
cfg.PodTerminationGracePeriod = -1
125+
}
114126
return nil
115127
}
116128

k8s/client.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type ClientAPI interface {
3333
GetNodeByAutoScalingInstance(instance *autoscaling.Instance) (*v1.Node, error)
3434
FilterNodeByAutoScalingInstance(nodes []v1.Node, instance *autoscaling.Instance) (*v1.Node, error)
3535
UpdateNode(node *v1.Node) error
36-
Drain(nodeName string, ignoreDaemonSets, deleteEmptyDirData bool) error
36+
Drain(nodeName string, ignoreDaemonSets, deleteEmptyDirData bool, podTerminationGracePeriod int) error
3737
}
3838

3939
type Client struct {
@@ -108,7 +108,7 @@ func (k *Client) UpdateNode(node *v1.Node) error {
108108
}
109109

110110
// Drain gracefully deletes all pods from a given node
111-
func (k *Client) Drain(nodeName string, ignoreDaemonSets, deleteEmptyDirData bool) error {
111+
func (k *Client) Drain(nodeName string, ignoreDaemonSets, deleteEmptyDirData bool, podTerminationGracePeriod int) error {
112112
node, err := k.client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
113113
if err != nil {
114114
return err
@@ -118,7 +118,7 @@ func (k *Client) Drain(nodeName string, ignoreDaemonSets, deleteEmptyDirData boo
118118
Force: true,
119119
IgnoreAllDaemonSets: ignoreDaemonSets,
120120
DeleteEmptyDirData: deleteEmptyDirData,
121-
GracePeriodSeconds: -1,
121+
GracePeriodSeconds: podTerminationGracePeriod,
122122
Timeout: 5 * time.Minute,
123123
Ctx: context.TODO(),
124124
Out: drainLogger{NodeName: nodeName},

k8s/client_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
func TestClient_Drain(t *testing.T) {
1212
fakeKubernetesClient := fakekubernetes.NewSimpleClientset(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "default"}})
1313
kc := NewClient(fakeKubernetesClient)
14-
err := kc.Drain("default", true, true)
14+
err := kc.Drain("default", true, true, -1)
1515
if err != nil {
1616
t.Errorf("Unexpected error: %v", err)
1717
}

k8stest/k8stest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (mock *MockClient) UpdateNode(node *v1.Node) error {
7676
return nil
7777
}
7878

79-
func (mock *MockClient) Drain(nodeName string, ignoreDaemonSets, deleteLocalData bool) error {
79+
func (mock *MockClient) Drain(nodeName string, ignoreDaemonSets, deleteLocalData bool, podTerminationGracePeriod int) error {
8080
mock.Counter["Drain"]++
8181
return nil
8282
}

main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func DoHandleRollingUpgrade(client k8s.ClientAPI, ec2Service ec2iface.EC2API, au
166166
log.Printf("[%s][%s] Updated nodes have enough resources available", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId))
167167
if minutesSinceDrained == -1 {
168168
log.Printf("[%s][%s] Draining node", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId))
169-
err := client.Drain(node.Name, config.Get().IgnoreDaemonSets, config.Get().DeleteEmptyDirData)
169+
err := client.Drain(node.Name, config.Get().IgnoreDaemonSets, config.Get().DeleteEmptyDirData, config.Get().PodTerminationGracePeriod)
170170
if err != nil {
171171
metrics.Server.Errors.Inc()
172172
log.Printf("[%s][%s] Skipping because ran into error while draining node: %v", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId), err.Error())

0 commit comments

Comments
 (0)