Skip to content

Commit 49258b0

Browse files
authored
Merge pull request #7 from streamnsight/update/testing
Update/testing
2 parents b3e886e + d119d89 commit 49258b0

19 files changed

+562
-20
lines changed

.github/workflows/tests.yaml

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
name: tests
2+
on:
3+
schedule:
4+
- cron: '0 7 * * 1' # Mondays at 7AM
5+
jobs:
6+
tests:
7+
runs-on: ubuntu-latest
8+
name: tests
9+
10+
env:
11+
OCI_KEY: ${{ secrets.OCI_KEY }}
12+
OCI_CONFIG: ${{ secrets.OCI_CONFIG }}
13+
TENANCY_OCID: ${{ secrets.TENANCY_OCID }}
14+
COMPARTMENT_OCID: ${{ secrets.COMPARTMENT_OCID }}
15+
steps:
16+
- uses: actions/checkout@v3
17+
- name: Run tests
18+
run: |
19+
# Install OCI CLI
20+
wget https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh
21+
chmod +x ./install.sh
22+
bash -c "./install.sh --accept-all-defaults"
23+
mkdir -p ~/.oci
24+
echo "$OCI_KEY" > ~/.oci/oci_ci_user.pem
25+
echo "$OCI_CONFIG" > ~/.oci/config
26+
# install terraform and kubectl
27+
# curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add -
28+
# sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
29+
sudo apt-get update
30+
sudo apt-get install -y terraform=1.2.9 kubectl=1.24.12
31+
# sudo apt-get update && sudo apt-get install -y gnupg software-properties-common
32+
# wget -O- https://apt.releases.hashicorp.com/gpg | \
33+
# gpg --dearmor | \
34+
# sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg
35+
36+
./tests/tests.sh

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
.terraform.lock.hcl
33
terraform.tfstate
44
terraform.tfstate.backup
5+
test.tfvars

add_on_k8s_autoscaler.tf

+32-7
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ locals {
77
cluster_autoscaler_image_version = lookup(local.cluster_autoscaler_supported_k8s_versions, local.k8s_major_minor_version, reverse(values(local.cluster_autoscaler_supported_k8s_versions))[0])
88
cluster_autoscaler_image = "iad.ocir.io/oracle/oci-cluster-autoscaler:${local.cluster_autoscaler_image_version}"
99
cluster_autoscaler_log_level_verbosity = 4
10-
cluster_autoscaler_max_node_provision_time = "25m"
11-
cluster_autoscaler_scale_down_delay_after_add = "10m"
12-
cluster_autoscaler_scale_down_unneeded_time = "10m"
13-
cluster_autoscaler_unremovable_node_recheck_timeout = "5m"
10+
cluster_autoscaler_max_node_provision_time = "${var.cluster_autoscaler_max_node_provision_time}m"
11+
cluster_autoscaler_scale_down_delay_after_add = "${var.cluster_autoscaler_scale_down_delay_after_add}m"
12+
cluster_autoscaler_scale_down_unneeded_time = "${var.cluster_autoscaler_scale_down_unneeded_time}m"
13+
cluster_autoscaler_unremovable_node_recheck_timeout = "${var.cluster_autoscaler_unremovable_node_recheck_timeout}m"
1414
cluster_autoscaler_cloud_provider = tonumber(local.k8s_minor_version) <= 23 ? "oci" : "oci-oke"
1515
cluster_autoscaler_enabled = contains(keys(local.cluster_autoscaler_supported_k8s_versions), local.k8s_major_minor_version) ? (var.np1_enable_autoscaler || var.np2_enable_autoscaler || var.np3_enable_autoscaler) : false
1616
k8s_major_minor_version = regex("\\d+(?:\\.(?:\\d+|x)(?:))", local.kubernetes_version)
@@ -273,9 +273,9 @@ resource "kubernetes_deployment" "cluster_autoscaler_deployment" {
273273
"--stderrthreshold=info",
274274
"--cloud-provider=${local.cluster_autoscaler_cloud_provider}",
275275
"--max-node-provision-time=${local.cluster_autoscaler_max_node_provision_time}",
276-
"--nodes=${var.np1_autoscaler_min_nodes}:${var.np1_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[0].id}",
277-
var.node_pool_count >= 2 ? "--nodes=${var.np2_autoscaler_min_nodes}:${var.np2_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "",
278-
var.node_pool_count >= 3 ? "--nodes=${var.np3_autoscaler_min_nodes}:${var.np3_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[2].id}" : "",
276+
var.np1_enable_autoscaler ? "--nodes=${var.np1_autoscaler_min_nodes}:${var.np1_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[0].id}" : "",
277+
var.node_pool_count >= 2 && var.np2_enable_autoscaler ? "--nodes=${var.np2_autoscaler_min_nodes}:${var.np2_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "",
278+
var.node_pool_count >= 3 && var.np3_enable_autoscaler ? "--nodes=${var.np3_autoscaler_min_nodes}:${var.np3_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[2].id}" : "",
279279
"--scale-down-delay-after-add=${local.cluster_autoscaler_scale_down_delay_after_add}",
280280
"--scale-down-unneeded-time=${local.cluster_autoscaler_scale_down_unneeded_time}",
281281
"--unremovable-node-recheck-timeout=${local.cluster_autoscaler_unremovable_node_recheck_timeout}",
@@ -338,3 +338,28 @@ resource "kubernetes_pod_disruption_budget_v1" "core_dns_pod_disruption_budget"
338338
oci_containerengine_node_pool.oci_oke_node_pool
339339
]
340340
}
341+
342+
resource "kubernetes_pod_disruption_budget_v1" "cluster_autoscaler_pod_disruption_budget" {
343+
count = local.cluster_autoscaler_enabled ? 1 : 0
344+
345+
metadata {
346+
name = "cluster-autoscaler-pdb"
347+
namespace = "kube-system"
348+
labels = {
349+
k8s-app = "cluster-autoscaler"
350+
}
351+
}
352+
spec {
353+
max_unavailable = "1"
354+
selector {
355+
match_labels = {
356+
app = "cluster-autoscaler"
357+
}
358+
}
359+
}
360+
361+
depends_on = [
362+
data.oci_containerengine_cluster_kube_config.oke,
363+
oci_containerengine_node_pool.oci_oke_node_pool
364+
]
365+
}

examples/flink-basic-example.yaml

+7-4
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ apiVersion: flink.apache.org/v1beta1
2020
kind: FlinkDeployment
2121
metadata:
2222
name: basic-example
23+
namespace: flink
24+
labels:
25+
app: flink
2326
spec:
24-
image: flink:1.15
27+
image: sjc.ocir.io/bigdatadatasciencelarge/testing/flink:1.15
2528
flinkVersion: v1_15
2629
flinkConfiguration:
2730
taskmanager.numberOfTaskSlots: "2"
@@ -30,12 +33,12 @@ spec:
3033
serviceAccount: flink
3134
jobManager:
3235
resource:
33-
memory: "2048m"
34-
cpu: 1
36+
memory: "1024m"
37+
cpu: 0.2
3538
taskManager:
3639
resource:
3740
memory: "2048m"
38-
cpu: 1
41+
cpu: 0.2
3942
job:
4043
jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
4144
parallelism: 2

oke_cluster.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl
33

44
locals {
5-
kubernetes_version = var.kubernetes_version != "" ? var.kubernetes_version : reverse(data.oci_containerengine_cluster_option.cluster_options.kubernetes_versions)[0]
5+
kubernetes_version = (var.kubernetes_version != "" && var.kubernetes_version != null) ? var.kubernetes_version : reverse(data.oci_containerengine_cluster_option.cluster_options.kubernetes_versions)[0]
66
}
77

88
resource "oci_containerengine_cluster" "oci_oke_cluster" {

oke_node_pools.tf

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ resource "oci_containerengine_node_pool" "oci_oke_node_pool" {
5757

5858
cluster_id = oci_containerengine_cluster.oci_oke_cluster.id
5959
compartment_id = var.cluster_compartment_id
60-
kubernetes_version = var.kubernetes_version != "" ? var.kubernetes_version : reverse(data.oci_containerengine_cluster_option.cluster_options.kubernetes_versions)[0]
61-
name = "${local.node_pools[count.index]["node_shape"]}_Node_Pool"
60+
kubernetes_version = (var.kubernetes_version != "" && var.kubernetes_version != null) ? var.kubernetes_version : reverse(data.oci_containerengine_cluster_option.cluster_options.kubernetes_versions)[0]
61+
name = "${replace(local.node_pools[count.index]["node_shape"], "Standard", "Std")}${length(regexall("Flex", local.node_pools[count.index]["node_shape"])) > 0 ? "-${local.node_pools[count.index]["ocpus"]}-${local.node_pools[count.index]["memory_gb"]}GB" : ""}"
6262
node_shape = local.node_pools[count.index]["node_shape"]
6363

6464
# initial_node_labels {

schema.yaml

+29-1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,18 @@ variableGroups:
156156
variables:
157157
- enable_pod_admission_controller
158158

159+
- title: "Cluster Autoscaler Settings"
160+
variables:
161+
- cluster_autoscaler_max_node_provision_time
162+
- cluster_autoscaler_scale_down_delay_after_add
163+
- cluster_autoscaler_scale_down_unneeded_time
164+
- cluster_autoscaler_unremovable_node_recheck_timeout
165+
visible:
166+
or:
167+
- np1_enable_autoscaler
168+
- np2_enable_autoscaler
169+
- np3
170+
159171
- title: "Deployments"
160172
variables:
161173
- enable_flink
@@ -1079,6 +1091,22 @@ variables:
10791091
description: |
10801092
Deploys Prometheus, Grafana and related datasources, plugins and dashboards.
10811093
1094+
cluster_autoscaler_max_node_provision_time:
1095+
type: number
1096+
title: Max node provisioning time before it is considered failed.
1097+
1098+
cluster_autoscaler_scale_down_delay_after_add:
1099+
type: number
1100+
title: Wait time before scaling a node down after it was added.
1101+
1102+
cluster_autoscaler_scale_down_unneeded_time:
1103+
type: number
1104+
title: Wait time before scaling a node down after it is unneeded.
1105+
1106+
cluster_autoscaler_unremovable_node_recheck_timeout:
1107+
type: number
1108+
title: Wait time between checks on unremovable nodes.
1109+
10821110
outputGroups:
10831111
- title: Access
10841112
outputs:
@@ -1117,4 +1145,4 @@ outputs:
11171145
type: copyableString
11181146
title: Grafana access
11191147
displayText: "Grafana password for 'admin' user"
1120-
visible:
1148+
visible: enable_monitoring_stack

terraform.tfvars.template

+5-4
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl
33

44
region = "us-ashburn-1"
5-
tenancy_ocid = "ocid1.tenancy.oc1..."
5+
tenancy_ocid = "ocid1.tenancy.oc1.."
66

77
## Compartments
8-
vcn_compartment_id = "ocid1.compartment.oc1..."
9-
cluster_compartment_id = "ocid1.compartment.oc1..."
8+
vcn_compartment_id = "ocid1.compartment.oc1.."
9+
cluster_compartment_id = "ocid1.compartment.oc1.."
1010

1111
## Network
1212
use_existing_vcn = false
@@ -17,7 +17,7 @@ is_endpoint_public = true
1717

1818
## Cluster
1919
cluster_name = "Flink Cluster"
20-
# kubernetes_version=
20+
kubernetes_version=""
2121
ssh_public_key = "ssh-rsa AAAA..."
2222
node_pool_count = 1
2323
# add_cluster_tag=
@@ -76,3 +76,4 @@ image_validation_key_id = null # "ocid1.key.oc1..."
7676
enable_cert_manager = true
7777
enable_flink = true
7878
enable_metrics_server = true
79+
enable_monitoring_stack = true

tests/A1_full_1np.tfvars

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
region = "us-sanjose-1"
2+
tenancy_ocid = "ocid1.tenancy.oc1.."
3+
4+
## Compartments
5+
vcn_compartment_id = "ocid1.compartment.oc1.."
6+
cluster_compartment_id = "ocid1.compartment.oc1.."
7+
8+
## Network
9+
use_existing_vcn = false
10+
vcn_cidr = "10.0.0.0/16"
11+
12+
## Endpoint
13+
is_endpoint_public = true
14+
15+
## Cluster
16+
cluster_name = "Flink Cluster"
17+
kubernetes_version=null
18+
ssh_public_key = ""
19+
node_pool_count = 1
20+
# add_cluster_tag=
21+
# cluster_tag=
22+
# pods_cidr="10.1.0.0/16"
23+
# services_cidr="10.2.0.0/16"
24+
# np1_subnet=
25+
np1_node_count = 1
26+
np1_node_shape = "VM.Standard.E3.Flex"
27+
np1_image_id = "ocid1.image.oc1.us-sanjose-1.aaaaaaaaysxt7adhnhzammcd7qmk423vtrl562lzufquxedyjqp63u4meg7a"
28+
# np1_add_tag=
29+
# np1_tag=
30+
np1_ocpus = 2
31+
np1_memory_gb = 8
32+
np1_enable_autoscaler = true
33+
np1_autoscaler_min_nodes = 1
34+
np1_autoscaler_max_nodes = 3
35+
# np2_subnet=
36+
np2_create_new_subnet = true
37+
np2_node_count = 0
38+
np2_node_shape = ""
39+
np2_image_id = "ocid1.image.oc1.us-sanjose-1.aaaaaaaanoevcbgqidanfngql2judmt35azqzlwjkq7oqnvjp6qujyrvqqia"
40+
np2_ha = true
41+
# np2_availability_domain="UWQV:US-ASHBURN-AD-2"
42+
# np2_ocpus = 4
43+
# np2_memory_gb = 32
44+
np2_enable_autoscaler = true
45+
np2_autoscaler_min_nodes = 0
46+
np2_autoscaler_max_nodes = 3
47+
# np2_add_tag=
48+
# np2_tag=
49+
# np3_subnet=
50+
np3_create_new_subnet = true
51+
# np3_node_count=
52+
# np3_node_shape=
53+
# np3_ocpus = 4
54+
# np3_memory_gb = 32
55+
# np3_enable_autoscaler=true
56+
# np3_autoscaler_min_nodes=0
57+
# np3_autoscaler_max_nodes=3
58+
# np3_image_id=
59+
# np3_add_tag=
60+
# np3_tag=
61+
# allow_deploy_private_lb = false
62+
# private_lb_subnet=
63+
allow_deploy_public_lb = true
64+
# public_lb_subnet=
65+
66+
enable_secret_encryption = false
67+
secrets_key_id = null
68+
69+
enable_image_validation = false
70+
image_validation_key_id = null
71+
# enable_pod_admission_controller=
72+
73+
enable_cert_manager = true
74+
enable_flink = true
75+
enable_metrics_server = true
76+
enable_monitoring_stack = true
77+
78+
cluster_autoscaler_max_node_provision_time=25
79+
cluster_autoscaler_scale_down_delay_after_add=1
80+
cluster_autoscaler_scale_down_unneeded_time=1
81+
cluster_autoscaler_unremovable_node_recheck_timeout=1

tests/A2_full_3np.tfvars

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
region = "us-sanjose-1"
2+
tenancy_ocid = "ocid1.tenancy.oc1.."
3+
4+
## Compartments
5+
vcn_compartment_id = "ocid1.compartment.oc1.."
6+
cluster_compartment_id = "ocid1.compartment.oc1.."
7+
8+
## Network
9+
use_existing_vcn = false
10+
vcn_cidr = "10.0.0.0/16"
11+
12+
## Endpoint
13+
is_endpoint_public = true
14+
15+
## Cluster
16+
cluster_name = "Flink Cluster"
17+
kubernetes_version=null
18+
ssh_public_key = ""
19+
node_pool_count = 3
20+
# add_cluster_tag=
21+
# cluster_tag=
22+
# pods_cidr="10.1.0.0/16"
23+
# services_cidr="10.2.0.0/16"
24+
# np1_subnet=
25+
np1_node_count = 1
26+
np1_node_shape = "VM.Standard.E3.Flex"
27+
np1_image_id = "ocid1.image.oc1.us-sanjose-1.aaaaaaaaysxt7adhnhzammcd7qmk423vtrl562lzufquxedyjqp63u4meg7a"
28+
# np1_add_tag=
29+
# np1_tag=
30+
np1_ocpus = 2
31+
np1_memory_gb = 8
32+
np1_enable_autoscaler = true
33+
np1_autoscaler_min_nodes = 1
34+
np1_autoscaler_max_nodes = 3
35+
# np2_subnet=
36+
np2_create_new_subnet = true
37+
np2_node_count = 1
38+
np2_node_shape = "VM.Standard.E3.Flex"
39+
np2_image_id = "ocid1.image.oc1.us-sanjose-1.aaaaaaaaysxt7adhnhzammcd7qmk423vtrl562lzufquxedyjqp63u4meg7a"
40+
np2_ha = true
41+
# np2_availability_domain="UWQV:US-ASHBURN-AD-2"
42+
np2_ocpus = 1
43+
np2_memory_gb = 16
44+
np2_enable_autoscaler = true
45+
np2_autoscaler_min_nodes = 0
46+
np2_autoscaler_max_nodes = 3
47+
# np2_add_tag=
48+
# np2_tag=
49+
# np3_subnet=
50+
np3_create_new_subnet = true
51+
np3_node_count=1
52+
np3_node_shape="VM.Standard.E3.Flex"
53+
np3_ocpus = 1
54+
np3_memory_gb = 4
55+
np3_enable_autoscaler=false
56+
# np3_autoscaler_min_nodes=0
57+
# np3_autoscaler_max_nodes=3
58+
np3_image_id="ocid1.image.oc1.us-sanjose-1.aaaaaaaaysxt7adhnhzammcd7qmk423vtrl562lzufquxedyjqp63u4meg7a"
59+
# np3_add_tag=
60+
# np3_tag=
61+
# allow_deploy_private_lb = false
62+
# private_lb_subnet=
63+
allow_deploy_public_lb = true
64+
# public_lb_subnet=
65+
66+
enable_secret_encryption = false
67+
secrets_key_id = null
68+
69+
enable_image_validation = false
70+
image_validation_key_id = null
71+
# enable_pod_admission_controller=
72+
73+
enable_cert_manager = true
74+
enable_flink = true
75+
enable_metrics_server = true
76+
enable_monitoring_stack = true
77+
78+
cluster_autoscaler_max_node_provision_time=25
79+
cluster_autoscaler_scale_down_delay_after_add=1
80+
cluster_autoscaler_scale_down_unneeded_time=1
81+
cluster_autoscaler_unremovable_node_recheck_timeout=1

0 commit comments

Comments
 (0)