Skip to content

Commit dbcaaf9

Browse files
authored
Merge pull request #5 from oracle-quickstart/feature/monitoring
add monitoring stack
2 parents cf0e1f9 + ad8675a commit dbcaaf9

20 files changed

+1529
-22
lines changed

add_on_dependency_matrix.tf

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
## Copyright © 2023, Oracle and/or its affiliates.
2+
## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl
3+
4+
# defines trigger to enable specific components based on selection
5+
locals {
6+
enable_flink = var.enable_flink
7+
enable_cert_manager = local.enable_flink || var.enable_cert_manager
8+
enable_cluster_autoscaler = var.np1_enable_autoscaler || var.np2_enable_autoscaler || var.np3_enable_autoscaler
9+
enable_monitoring_stack = var.enable_monitoring_stack
10+
enable_metrics_server = local.enable_cluster_autoscaler || var.enable_metrics_server || local.enable_monitoring_stack
11+
enable_grafana_flink_dashboards = local.enable_monitoring_stack && local.enable_flink
12+
}

helm_cert_manager.tf renamed to add_on_helm_cert_manager.tf

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
## https://github.com/jetstack/cert-manager/blob/master/README.md
55
## https://artifacthub.io/packages/helm/cert-manager/cert-manager
66

7-
locals {
8-
enable_cert_manager = var.enable_flink ? true : var.enable_cert_manager
9-
}
7+
# locals {
8+
# enable_cert_manager = var.enable_flink ? true : var.enable_cert_manager
9+
# }
1010

1111
resource "helm_release" "cert_manager" {
1212
count = local.enable_cert_manager ? 1 : 0
File renamed without changes.
File renamed without changes.

add_on_monitoring_stack.tf

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at http://oss.oracle.com/licenses/upl.
3+
4+
locals {
5+
deployment_name = "kps"
6+
vars = { "region" = var.region, "tenancy_ocid" = var.tenancy_ocid }
7+
scrape_configs = flatten([for i in fileset("${path.module}/templates", "*.scrapeConfigs.yaml") : file("${path.module}/templates/${i}")])
8+
grafana_datasources = flatten([for i in fileset("${path.module}/templates", "grafana.*.datasource.yaml") : yamldecode(templatefile("${path.module}/templates/${i}", local.vars))])
9+
grafana_dashboards = flatten([for i in fileset("${path.module}/templates", "grafana.*.dashboard.json") : { "name" = i, "label" = split(".", i)[1] }])
10+
grafana_plugins = file("${path.module}/templates/grafana.plugins.yaml")
11+
}
12+
13+
output dash {
14+
value = local.grafana_dashboards
15+
}
16+
17+
resource "random_password" "grafana_password" {
18+
count = local.enable_monitoring_stack ? 1 : 0
19+
length = 20
20+
special = true
21+
override_special = "#$%&@!_+=./;:][{}]"
22+
}
23+
24+
output "grafana_password" {
25+
value = local.enable_monitoring_stack ? random_password.grafana_password[0].result : ""
26+
sensitive = true
27+
}
28+
29+
resource "helm_release" "kube_prometheus_stack" {
30+
count = local.enable_monitoring_stack ? 1 : 0
31+
name = local.deployment_name
32+
repository = "https://prometheus-community.github.io/helm-charts"
33+
chart = "kube-prometheus-stack"
34+
namespace = "monitoring"
35+
version = "45.8.0"
36+
wait = false
37+
create_namespace = true
38+
39+
set {
40+
name = "prometheus.prometheusSpec.additionalScrapeConfigs"
41+
value = join("\n", local.scrape_configs)
42+
}
43+
44+
# set {
45+
# name = "grafana.sidecar.datasources.defaultDatasourceEnabled"
46+
# value = false
47+
# }
48+
49+
set {
50+
name = "grafana.adminPassword"
51+
value = random_password.grafana_password[0].result
52+
}
53+
54+
values = [
55+
yamlencode({ "grafana" = {
56+
"additionalDataSources" = local.grafana_datasources,
57+
"plugins" = yamldecode(local.grafana_plugins)
58+
} })
59+
]
60+
61+
depends_on = [
62+
oci_containerengine_node_pool.oci_oke_node_pool
63+
]
64+
}
65+
66+
resource "kubernetes_config_map_v1" "grafana_dashboards" {
67+
count = local.enable_monitoring_stack ? length(local.grafana_dashboards) : 0
68+
69+
metadata {
70+
name = "${local.deployment_name}-grafana-${local.grafana_dashboards[count.index].label}"
71+
namespace = "monitoring"
72+
labels = {
73+
"grafana_dashboard" = "1"
74+
}
75+
}
76+
77+
data = {
78+
"${local.grafana_dashboards[count.index].name}" = "${file("${path.module}/templates/${local.grafana_dashboards[count.index].name}")}"
79+
}
80+
depends_on = [
81+
helm_release.kube_prometheus_stack
82+
]
83+
}
84+
85+
resource "kubernetes_config_map_v1" "grafana_plugins" {
86+
count = local.enable_monitoring_stack ? 1 : 0
87+
88+
metadata {
89+
name = "${local.deployment_name}-grafana-plugins"
90+
namespace = "monitoring"
91+
labels = {
92+
"grafana_plugin" = "1"
93+
}
94+
}
95+
96+
data = {
97+
"plugins" = local.grafana_plugins
98+
}
99+
100+
depends_on = [
101+
helm_release.kube_prometheus_stack
102+
]
103+
}

helm_metrics.tf renamed to add_onn_helm_metrics.tf

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
## Copyright © 2022-2023, Oracle and/or its affiliates.
22
## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl
33

4-
locals {
5-
enable_metrics_server = var.np1_enable_autoscaler || var.np2_enable_autoscaler || var.np3_enable_autoscaler ? true : var.enable_metrics_server
6-
}
4+
# locals {
5+
# enable_metrics_server = var.np1_enable_autoscaler || var.np2_enable_autoscaler || var.np3_enable_autoscaler ? true : var.enable_metrics_server
6+
# }
77

88
resource "helm_release" "metrics_server" {
99
count = local.enable_metrics_server ? 1 : 0

datasources.tf

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## Copyright © 2022, Oracle and/or its affiliates.
1+
## Copyright © 2022-2023, Oracle and/or its affiliates.
22
## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl
33

44
data "oci_containerengine_cluster_option" "cluster_options" {
@@ -12,8 +12,7 @@ data "oci_containerengine_node_pool_option" "oci_oke_node_pool_option" {
1212
# Gets home and current regions
1313
data "oci_identity_tenancy" "tenant_details" {
1414
tenancy_id = var.tenancy_ocid
15-
16-
provider = oci.current_region
15+
provider = oci.current_region
1716
}
1817

1918
data "oci_identity_regions" "home_region" {
@@ -49,6 +48,7 @@ data "oci_limits_limit_definitions" "limit_def" {
4948
service_name = "compute"
5049
}
5150

51+
# buidl maps of valid shapes for each AD
5252
locals {
5353
availability_map = [for def in data.oci_limits_limit_definitions.limit_def.limit_definitions : def if contains(compact([var.np1_node_shape, var.np2_node_shape, var.np3_node_shape]), def.description)]
5454
limits_definitions = [
@@ -75,6 +75,7 @@ data "oci_core_shapes" "valid_shapes" {
7575
availability_domain = data.oci_identity_availability_domains.ADs.availability_domains[count.index].name
7676
}
7777

78+
# Deploy ID to uniquely identify this cluster and associated resources.
7879
resource "random_string" "deploy_id" {
7980
length = 4
8081
special = false

images.tf

+1-9
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ data "oci_core_image" "np3_image" {
1515
image_id = var.np3_image_id
1616
}
1717

18+
# Identify if an OKE specific image is available for the Compute image selected
1819
locals {
1920
k8s_version = replace(local.kubernetes_version, "v", "")
2021
np1_oke_image = var.node_pool_count >= 1 ? [for option
@@ -30,12 +31,3 @@ locals {
3031
option if length(regexall("${data.oci_core_image.np3_image[0].display_name}-OKE-${local.k8s_version}", option.source_name)) > 0] : []
3132
np3_oke_image_id = length(local.np3_oke_image) > 0 ? local.np3_oke_image[0].image_id : var.np3_image_id
3233
}
33-
34-
# output "images" {
35-
# value = {
36-
# k8s_version = local.k8s_version
37-
# np1_oke_image = local.np1_oke_image
38-
# np2_oke_image = local.np2_oke_image
39-
# np3_oke_image = local.np3_oke_image
40-
# }
41-
# }

outputs.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ output "access_command" {
6161
}
6262

6363
output "flink_demo_job" {
64-
value = "kubectl create -f https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.2/examples/basic.yaml"
64+
value = "kubectl create -f https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.3/examples/basic.yaml"
6565
}
6666

6767
output "flink_ui_port_forward" {

policies.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ locals {
1818
}
1919

2020
resource "oci_identity_network_source" "node_pool_network_source" {
21-
provider = oci.home_region
21+
provider = oci.home_region
2222
#Required
2323
compartment_id = var.tenancy_ocid
2424
description = "NSG for ${local.nsg_name} autoscaler"

schema.yaml

+17-2
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ variableGroups:
161161
- enable_flink
162162
- enable_cert_manager
163163
- enable_metrics_server
164+
- enable_monitoring_stack
164165

165166
variables:
166167

@@ -1069,13 +1070,21 @@ variables:
10691070
description: |
10701071
Apache Flink will be installed using the Flink Operator.
10711072
1073+
enable_monitoring_stack:
1074+
type: boolean
1075+
default: true
1076+
title: Deploy Monitoring Stack
1077+
description: |
1078+
Deploys Prometheus, Grafana and related datasources, plugins and dashboards.
1079+
10721080
outputGroups:
1073-
- title: Access Command
1081+
- title: Access
10741082
outputs:
10751083
- access_command
10761084
- flink_demo_job
10771085
- flink_ui_port_forward
10781086
- flink_ui_access
1087+
- grafana_password
10791088

10801089
outputs:
10811090
access_command:
@@ -1100,4 +1109,10 @@ outputs:
11001109
type: link
11011110
title: Flink UI
11021111
displayText: "Access the Flink UI"
1103-
visible: true
1112+
visible: true
1113+
1114+
grafana_password:
1115+
type: copyableString
1116+
title: Grafana access
1117+
displayText: "Grafana password for 'admin' user"
1118+
visible:
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
k8s-cluster:
2+
gnetId: 7249
3+
revision: 1
4+
datasource: Prometheus
5+
k8s-cluster-metrics:
6+
gnetId: 11663
7+
revision: 1
8+
datasource: Prometheus
9+
k8s-cluster-metrics-simple:
10+
gnetId: 6417
11+
revision: 1
12+
datasource: Prometheus
13+
k8s-pods-monitoring:
14+
gnetId: 13498
15+
revision: 1
16+
datasource: Prometheus
17+
k8s-memory:
18+
gnetId: 13421
19+
revision: 1
20+
datasource: Prometheus
21+
k8s-networking:
22+
gnetId: 12658
23+
revision: 1
24+
datasource: Prometheus
25+
k8s-cluster-autoscaler:
26+
gnetId: 3831
27+
revision: 1
28+
datasource: Prometheus
29+
k8s-hpa:
30+
gnetId: 10257
31+
revision: 1
32+
datasource: Prometheus
33+
k8s-pods:
34+
gnetId: 6336
35+
revision: 1
36+
datasource: Prometheus
37+
oci-compute:
38+
gnetId: 13596
39+
revision: 1
40+
datasource: Oracle Cloud Infrastructure Metrics
41+
oci-oke:
42+
gnetId: 13594
43+
revision: 1
44+
datasource: Oracle Cloud Infrastructure Metrics

0 commit comments

Comments
 (0)