diff --git a/.gitignore b/.gitignore index 68bc17f..0ebbea4 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ # C extensions *.so +runtime/ # Distribution / packaging .Python diff --git a/README.md b/README.md index 0c340e5..31db4e8 100644 --- a/README.md +++ b/README.md @@ -1 +1,51 @@ -# airflow_etl \ No newline at end of file +# estela-airflow-etl + +This project manages an Apache Airflow in deployment with estela using Helm. + +## Commands +You need to go to the `k8s_installation` dir. + +### Build the Docker Image + +```bash +make build +``` +Builds the Airflow Docker image and tags it. +Default values: +- `PLATFORM=linux/amd64` +- `IMAGE_NAME=airflow` +- `TAG=latest` +- `REGISTRY=localhost:5001` + +### Push the Docker Image + +```bash +make push +``` +Pushes the Docker image to the registry. + +### Install Airflow + +```bash +make install +``` +Installs Airflow using Helm. To configure connections, specify the Airflow Docker image, manage resource limits, and more, modify the `override.yaml` file. The file includes detailed instructions on how to apply these settings and also allows you to set Git sync credentials. + +Default values: +- `RELEASE_NAME=airflow` +- `NAMESPACE=airflow` +- `SSH_KEY=~/.ssh/dags_ssh` + +### Uninstall Airflow + +```bash +make uninstall +``` +Removes the Airflow deployment from the cluster. + +### Upgrade Airflow + +```bash +make upgrade +``` +If you make changes on values.yaml you should upgrade airflow using this command. \ No newline at end of file diff --git a/consumer_manager/consumer_manager.py b/consumer_manager/consumer_manager.py new file mode 100644 index 0000000..70d4762 --- /dev/null +++ b/consumer_manager/consumer_manager.py @@ -0,0 +1,95 @@ +import json +import requests +import base64 + +from confluent_kafka import Consumer, Producer + +AIRFLOW_API = "http://localhost:8080" + +class ConsumerProxy: + internal_queues = {} + internal_cnt = {} + batch_size = 200 + + @staticmethod + def delivery_report(err, msg): + """ Called once for each message produced to indicate delivery result. + Triggered by poll() or flush(). """ + if err is not None: + print('Message delivery failed: {}'.format(err)) + else: + print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + + def __init__(self, conf): + conf = { + "bootstrap.servers": "localhost:29092", + "group.id": "consumer_manager", + "auto.offset.reset": "latest", + } + self.conf = conf + self.consumer = Consumer(self.conf) + self.consumer.subscribe(["^job_.*"]) + self.producer = Producer(self.conf) + + + def process_message(self, msg): + dsd_msg = json.loads(msg.value().decode("utf-8")) + topic = msg.topic() + jid = dsd_msg["jid"] + if self.internal_queues.get(f"{jid}-{topic}", None) is None: + self.internal_queues[f"{jid}-{topic}"] = [] + self.internal_cnt[f"{jid}-{topic}"] = 0 + else: + self.internal_queues[f"{jid}-{topic}"].append(dsd_msg) + print(f"Queue size for {jid}-{topic}: {len(self.internal_queues[f'{jid}-{topic}'])}") + if len(self.internal_queues[f"{jid}-{topic}"]) >= self.batch_size: + for item in self.internal_queues[f"{jid}-{topic}"]: + self.producer.poll(0) + self.producer.produce(f"{jid}-{topic}-{self.internal_cnt[f'{jid}-{topic}']}", json.dumps(item).encode("UTF-8"), callback=self.delivery_report) + self.internal_queues[f"{jid}-{topic}"] = [] + self.producer.flush() + job_id, spider_id, project_id = jid.split(".") + payload = { + "conf": { + "topic": f"{jid}-{topic}-{self.internal_cnt[f'{jid}-{topic}']}", + "batch_size": self.batch_size, + "mongo_database": project_id, + "mongo_collection": f"{job_id}-{spider_id}-{topic}", + } + } + self.internal_cnt[f"{jid}-{topic}"] = self.internal_cnt[f"{jid}-{topic}"] + 1 + # Username and password for Airflow + username = "airflow" + password = "airflow" + + # Create a basic authentication header + credentials = f"{username}:{password}" + encoded_credentials = base64.b64encode(credentials.encode()).decode() + headers = { + "Content-Type": "application/json", + # If authentication is required: + "Authorization": f"Basic {encoded_credentials}" + } + path = "/api/v1/dags/etl/dagRuns" + print(f"Triggering DAG: {AIRFLOW_API}{path}") + response = requests.post(f"{AIRFLOW_API}{path}", headers=headers, data=json.dumps(payload)) + # Check the response + if response.status_code == 200: + print("DAG triggered successfully:", response.json()) + else: + print(f"Failed to trigger DAG: {response.status_code} - {response.text}") + + def consume(self): + while True: + msg = self.consumer.poll(20.0) + if msg is None: + continue + if msg.error(): + print("Consumer error: {}".format(msg.error())) + continue + self.process_message(msg) + #print("Received message: {}".format(msg.value().decode("utf-8"))) + +if __name__ == "__main__": + consumer_manager = ConsumerProxy({}) + consumer_manager.consume() diff --git a/dags/etl.py b/dags/etl.py new file mode 100644 index 0000000..41ec708 --- /dev/null +++ b/dags/etl.py @@ -0,0 +1,105 @@ +import json +import os +import pendulum + +#from kafka import KafkaConsumer +from airflow.providers.mongo.hooks.mongo import MongoHook +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook + +from airflow.decorators import task, dag +from airflow import DAG +from airflow.utils.dates import days_ago +from airflow.operators.python import PythonOperator + +now = pendulum.now() + +KAFKA_BROKER = os.getenv("KAFKA_BROKER", "host.docker.internal:29092") + +def split_list(lst, batch_size): + for i in range(0, len(lst), batch_size): + yield lst[i:i + batch_size] + +default_args = { + 'start_date': days_ago(1), +} + +@dag(start_date=now, schedule=None, catchup=False) +def etl(): + @task + def consume(**kwargs): + cnt = 0 + batch_size = int(kwargs["params"]["batch_size"]) + item_kafka_topic = kwargs["params"]["topic"] + kafka_hook = KafkaConsumerHook([item_kafka_topic], "estela-kafka") + consumer = kafka_hook.get_consumer() + + print("Getting the following conf %s", str(kwargs["params"])) + message_list = [] + while True: + if cnt >= batch_size: + break; + message = consumer.poll(2.0) + if message is None: + continue + if message.error(): + print(f"Consumer error: {message.error()}") + continue + cnt += 1 + print(message) + message_list.append(message.value().decode("utf-8")) + print(f"Received message: {message.value()}") + + return message_list + + @task # we expect to get a json items + def transform(items): + item_list = [] + for item in items: + item_json = json.loads(item) + item_list.append(item_json) + + return item_list + + @task + def uploading_mongodb(items, **kwargs): + #topic = kwargs["params"]["topic"] + mongo_conn_id = kwargs["params"].get("mongo_conn_id", "estela-primary") + # Database and collection should be defined in kwargs. + mongo = MongoHook(mongo_conn_id=mongo_conn_id) + client = mongo.get_conn() + # job_id, spider_id, project_id, data_kind = topic.split(".") + database_str = kwargs["params"]["mongo_database"] + database = client.get_database(database_str) + collection = database.get_collection(kwargs["params"]["mongo_collection"]) + inserted = collection.insert_many([item["payload"] for item in items]) + if len(inserted.inserted_ids) == len(items): + outcome = "All documents were successfully inserted." + else: + outcome = "Not all documents were successfully inserted." + return outcome + + data = consume() + items = transform(data) + uploading_mongodb(items) + +etl() + +# with DAG( +# dag_id="mongodb_read_dag", +# default_args=default_args, +# schedule_interval=None, +# catchup=False, +# ) as dag: +# def reading_mongodb(): +# mongo = MongoHook(mongo_conn_id="estela-primary") +# client = mongo.get_conn() +# database = client.get_database("185c81f4-bc89-41c6-90f6-99b8eef7d876") +# collection = database.get_collection("129-210-job_items") +# print("Reading from MongoDB: ") +# print([item for item in collection.find()]) +# reading_mongodb = PythonOperator( +# task_id="reading_mongodb", +# python_callable=reading_mongodb, +# provide_context=True, +# ) +# reading_mongodb diff --git a/docker-installation/Dockerfile b/docker-installation/Dockerfile new file mode 100644 index 0000000..3bcd73a --- /dev/null +++ b/docker-installation/Dockerfile @@ -0,0 +1,10 @@ +FROM apache/airflow:latest + +USER root + +COPY --chown=airflow:root ./dags/ /usr/src/app/dags/ +COPY --chown=airflow:root ./dags/ /opt/airflow/dags/ +USER airflow +WORKDIR /usr/src/app +COPY requirements.txt . +RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" -r requirements.txt diff --git a/docker-installation/docker-compose.yml b/docker-installation/docker-compose.yml new file mode 100644 index 0000000..4a93a39 --- /dev/null +++ b/docker-installation/docker-compose.yml @@ -0,0 +1,290 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. +# +# WARNING: This configuration is for local development. Do not use it in a production deployment. +# +# This configuration supports basic configuration using environment variables or an .env file +# The following variables are supported: +# +# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. +# Default: apache/airflow:2.9.3 +# AIRFLOW_UID - User ID in Airflow containers +# Default: 50000 +# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. +# Default: . +# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode +# +# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). +# Default: airflow +# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). +# Default: airflow +# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. +# Use this option ONLY for quick checks. Installing requirements at container +# startup is done EVERY TIME the service is started. +# A better way is to build a custom image or extend the official image +# as described in https://airflow.apache.org/docs/docker-stack/build.html. +# Default: '' +# +# Feel free to modify this file to suit your needs. +--- +x-airflow-common: + &airflow-common + # In order to add custom dependencies or upgrade provider packages you can use your extended image. + # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml + # and uncomment the "build" line below, Then run `docker-compose build` to build the images. + #image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.9.3} + build: . + environment: + &airflow-common-env + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 + AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' + AIRFLOW__CORE__LOAD_EXAMPLES: 'false' + AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' + # yamllint disable rule:line-length + # Use simple http server on scheduler for health checks + # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server + # yamllint enable rule:line-length + AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' + # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks + # for other purpose (development, test and especially production usage) build/extend Airflow image. + _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} + # The following line can be used to set a custom config file, stored in the local config folder + # If you want to use it, outcomment it and replace airflow.cfg with the name of your config file + AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg' + volumes: + - ${AIRFLOW_PROJ_DIR:-..}/dags:/opt/airflow/dags + - ${AIRFLOW_PROJ_DIR:-./runtime/}/logs:/opt/airflow/logs + - ${AIRFLOW_PROJ_DIR:-./runtime/}/config:/opt/airflow/config + - ${AIRFLOW_PROJ_DIR:-./runtime/}/plugins:/opt/airflow/plugins + user: "${AIRFLOW_UID:-50000}:0" + depends_on: + &airflow-common-depends-on + redis: + condition: service_healthy + postgres: + condition: service_healthy + +services: + postgres: + image: postgres:13 + environment: + POSTGRES_USER: airflow + POSTGRES_PASSWORD: airflow + POSTGRES_DB: airflow + volumes: + - postgres-db-volume:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow"] + interval: 10s + retries: 5 + start_period: 5s + restart: always + + redis: + # Redis is limited to 7.2-bookworm due to licencing change + # https://redis.io/blog/redis-adopts-dual-source-available-licensing/ + image: redis:7.2-bookworm + expose: + - 6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 30s + retries: 50 + start_period: 30s + restart: always + + airflow-webserver: + <<: *airflow-common + command: webserver + ports: + - "8080:8080" + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-scheduler: + <<: *airflow-common + command: scheduler + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-worker: + <<: *airflow-common + command: celery worker + healthcheck: + # yamllint disable rule:line-length + test: + - "CMD-SHELL" + - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + environment: + <<: *airflow-common-env + # Required to handle warm shutdown of the celery workers properly + # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation + DUMB_INIT_SETSID: "0" + restart: always + extra_hosts: + - "localhost:192.168.0.6" # Map localhost to host.docker.internal + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-triggerer: + <<: *airflow-common + command: triggerer + healthcheck: + test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-init: + <<: *airflow-common + entrypoint: /bin/bash + # yamllint disable rule:line-length + command: + - -c + - | + if [[ -z "${AIRFLOW_UID}" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" + echo "If you are on Linux, you SHOULD follow the instructions below to set " + echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." + echo "For other operating systems you can get rid of the warning with manually created .env file:" + echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" + echo + fi + one_meg=1048576 + mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) + cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) + disk_available=$$(df / | tail -1 | awk '{print $$4}') + warning_resources="false" + if (( mem_available < 4000 )) ; then + echo + echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" + echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" + echo + warning_resources="true" + fi + if (( cpus_available < 2 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" + echo "At least 2 CPUs recommended. You have $${cpus_available}" + echo + warning_resources="true" + fi + if (( disk_available < one_meg * 10 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" + echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" + echo + warning_resources="true" + fi + if [[ $${warning_resources} == "true" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" + echo "Please follow the instructions to increase amount of resources available:" + echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" + echo + fi + mkdir -p /sources/logs /sources/dags /sources/plugins + chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} + exec /entrypoint airflow version + # yamllint enable rule:line-length + environment: + <<: *airflow-common-env + _AIRFLOW_DB_MIGRATE: 'true' + _AIRFLOW_WWW_USER_CREATE: 'true' + _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} + _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} + _PIP_ADDITIONAL_REQUIREMENTS: '' + user: "0:0" + volumes: + - ${AIRFLOW_PROJ_DIR:-..}:/sources + + airflow-cli: + <<: *airflow-common + profiles: + - debug + environment: + <<: *airflow-common-env + CONNECTION_CHECK_MAX_COUNT: "0" + # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 + command: + - bash + - -c + - airflow + + # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up + # or by explicitly targeted on the command line e.g. docker-compose up flower. + # See: https://docs.docker.com/compose/profiles/ + flower: + <<: *airflow-common + command: celery flower + profiles: + - flower + ports: + - "5555:5555" + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:5555/"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + +volumes: + postgres-db-volume: \ No newline at end of file diff --git a/docker-installation/requirements.txt b/docker-installation/requirements.txt new file mode 100644 index 0000000..790b625 --- /dev/null +++ b/docker-installation/requirements.txt @@ -0,0 +1,3 @@ +kafka-python-ng +apache-airflow-providers-mongo +apache-airflow-providers-apache-kafka diff --git a/k8s-installation/Makefile b/k8s-installation/Makefile new file mode 100644 index 0000000..51a8908 --- /dev/null +++ b/k8s-installation/Makefile @@ -0,0 +1,37 @@ +PLATFORM ?= linux/amd64 +IMAGE_NAME ?= airflow +TAG ?= latest +REGISTRY ?= "localhost:5001" +RELEASE_NAME ?= "airflow" +NAMESPACE ?= "airflow" +SSH_KEY ?= "~/.ssh/dags_ssh" + +.PHONY: build +build: + cd .. && docker build -f docker-installation/Dockerfile --platform $(PLATFORM) -t $(REGISTRY)/$(IMAGE_NAME):$(TAG) . + +.PHONY: push +push: + docker push $(REGISTRY)/$(IMAGE_NAME):$(TAG) + +.PHONY: install +install: + -$(MAKE) create-connections + -$(MAKE) git-sync-credentials + -helm install $(RELEASE_NAME) apache-airflow/airflow --namespace $(NAMESPACE) --debug --timeout 10m0s -f values.yaml -f override.yaml + +.PHONY: upgrade +upgrade: + -helm upgrade $(RELEASE_NAME) apache-airflow/airflow --namespace $(NAMESPACE) --debug --timeout 10m0s -f values.yaml -f override.yaml + +.PHONY: uninstall +uninstall: + helm uninstall $(RELEASE_NAME) -n $(NAMESPACE) + +.PHONY: create-connections +create-connections: + kubectl apply -f airflow-connections.yaml -n $(NAMESPACE) + +.PHONY: git-sync-credentials +git-sync-credentials: + kubectl apply -f -n $(NAMESPACE) diff --git a/k8s-installation/kind-cluster.yaml b/k8s-installation/kind-cluster.yaml new file mode 100644 index 0000000..47a54ee --- /dev/null +++ b/k8s-installation/kind-cluster.yaml @@ -0,0 +1,34 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "node=worker_1" + extraMounts: + - hostPath: ./data + containerPath: /tmp/data +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "node=worker_2" + extraMounts: + - hostPath: ./data + containerPath: /tmp/data +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "node=worker_3" + extraMounts: + - hostPath: ./data + containerPath: /tmp/data diff --git a/k8s-installation/override.yaml b/k8s-installation/override.yaml new file mode 100644 index 0000000..8e1ba32 --- /dev/null +++ b/k8s-installation/override.yaml @@ -0,0 +1,35 @@ +#extraSecrets: +# my-airflow-connections: +# data: | +# AIRFLOW_CONN_MYCONNID: + +# Default airflow repository -- overridden by all the specific images below +#defaultAirflowRepository: # Update it with the registry image. + +# Default airflow tag to deploy +#defaultAirflowTag: "latest" + +# Specify the executor to use, it's recommended to use KubernetesExecutor +# executor: "KubernetesExecutor" + +#env: +# - name: "AIRFLOW__KUBERNETES_EXECUTOR__DELETE_WORKER_PODS" +# value: "False" + +# To limit the resources for workers pods, uncomment the following +# If you want to limit resources e.g. triggerer then you can do something similar +# read: https://airflow.apache.org/docs/helm-chart/stable/index.html +# workers: +# resources: +# limits: +# cpu: "1000m" +# memory: "2Gi" +# requests: +# cpu: "500m" +# memory: "1Gi" + +# dags: +# gitSync: +# enabled: true +# repo: +# sshKeySecret: diff --git a/k8s-installation/values.yaml b/k8s-installation/values.yaml new file mode 100644 index 0000000..deb3139 --- /dev/null +++ b/k8s-installation/values.yaml @@ -0,0 +1,2726 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +# Default values for airflow. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Provide a name to substitute for the full names of resources +fullnameOverride: "" + +# Provide a name to substitute for the name of the chart +nameOverride: "" + +# Use standard naming for all resources using airflow.fullname template +# Consider removing this later and default it to true +# to make this chart follow standard naming conventions using the fullname template. +# For now this is an opt-in switch for backwards compatibility to leverage the standard naming convention +# and being able to use fully fullnameOverride and nameOverride in all resources +# For new installations - it is recommended to set it to True to follow standard naming conventions +# For existing installations, this will rename and redeploy your resources with the new names. Be aware that +# this will recreate your deployment/statefulsets along with their persistent volume claims and data storage +# migration may be needed to keep your old data +# +# Note:fernet-key,redis-password and broker-url secrets don't use this logic yet, +# as this may break existing installations due to how they get installed via pre-install hook. +useStandardNaming: false + +# Max number of old replicasets to retain. Can be overridden by each deployment's revisionHistoryLimit +revisionHistoryLimit: ~ + +# User and group of airflow user +uid: 50000 +gid: 0 + +# Default security context for airflow (deprecated, use `securityContexts` instead) +securityContext: {} +# runAsUser: 50000 +# fsGroup: 0 +# runAsGroup: 0 + +# Detailed default security context for airflow deployments +securityContexts: + pod: {} + containers: {} + +# Global container lifecycle hooks for airflow containers +containerLifecycleHooks: {} + +# Airflow home directory +# Used for mount paths +airflowHome: /opt/airflow + +# Default airflow repository -- overridden by all the specific images below +defaultAirflowRepository: apache/airflow # Tiene que ir el registry + +# Default airflow tag to deploy +defaultAirflowTag: "latest" + +# Default airflow digest. If specified, it takes precedence over tag +defaultAirflowDigest: ~ + +# Airflow version (Used to make some decisions based on Airflow Version being deployed) +airflowVersion: "2.9.3" + +# Images +images: + airflow: + repository: ~ + tag: ~ + # Specifying digest takes precedence over tag. + digest: ~ + pullPolicy: IfNotPresent + # To avoid images with user code, you can turn this to 'true' and + # all the 'run-airflow-migrations' and 'wait-for-airflow-migrations' containers/jobs + # will use the images from 'defaultAirflowRepository:defaultAirflowTag' values + # to run and wait for DB migrations . + useDefaultImageForMigration: false + # timeout (in seconds) for airflow-migrations to complete + migrationsWaitTimeout: 60 + pod_template: + # Note that `images.pod_template.repository` and `images.pod_template.tag` parameters + # can be overridden in `config.kubernetes` section. So for these parameters to have effect + # `config.kubernetes.worker_container_repository` and `config.kubernetes.worker_container_tag` + # must be not set . + repository: ~ + tag: ~ + pullPolicy: IfNotPresent + flower: + repository: ~ + tag: ~ + pullPolicy: IfNotPresent + statsd: + repository: quay.io/prometheus/statsd-exporter + tag: v0.26.1 + pullPolicy: IfNotPresent + redis: + repository: redis + # Redis is limited to 7.2-bookworm due to licencing change + # https://redis.io/blog/redis-adopts-dual-source-available-licensing/ + tag: 7.2-bookworm + pullPolicy: IfNotPresent + pgbouncer: + repository: apache/airflow + tag: airflow-pgbouncer-2024.01.19-1.21.0 + pullPolicy: IfNotPresent + pgbouncerExporter: + repository: apache/airflow + tag: airflow-pgbouncer-exporter-2024.06.18-0.17.0 + pullPolicy: IfNotPresent + gitSync: + repository: registry.k8s.io/git-sync/git-sync + tag: v4.1.0 + pullPolicy: IfNotPresent + +# Select certain nodes for airflow pods. +nodeSelector: {} +affinity: {} +tolerations: [] +topologySpreadConstraints: [] +schedulerName: ~ + +# Add common labels to all objects and pods defined in this chart. +labels: {} + +# Ingress configuration +ingress: + # Enable all ingress resources (deprecated - use ingress.web.enabled and ingress.flower.enabled) + enabled: ~ + + # Configs for the Ingress of the web Service + web: + # Enable web ingress resource + enabled: false + + # Annotations for the web Ingress + annotations: {} + + # The path for the web Ingress + path: "/" + + # The pathType for the above path (used only with Kubernetes v1.19 and above) + pathType: "ImplementationSpecific" + + # The hostname for the web Ingress (Deprecated - renamed to `ingress.web.hosts`) + host: "" + + # The hostnames or hosts configuration for the web Ingress + hosts: [] + # # The hostname for the web Ingress (can be templated) + # - name: "" + # # configs for web Ingress TLS + # tls: + # # Enable TLS termination for the web Ingress + # enabled: false + # # the name of a pre-created Secret containing a TLS private key and certificate + # secretName: "" + + # The Ingress Class for the web Ingress (used only with Kubernetes v1.19 and above) + ingressClassName: "" + + # configs for web Ingress TLS (Deprecated - renamed to `ingress.web.hosts[*].tls`) + tls: + # Enable TLS termination for the web Ingress + enabled: false + # the name of a pre-created Secret containing a TLS private key and certificate + secretName: "" + + # HTTP paths to add to the web Ingress before the default path + precedingPaths: [] + + # Http paths to add to the web Ingress after the default path + succeedingPaths: [] + + # Configs for the Ingress of the flower Service + flower: + # Enable web ingress resource + enabled: false + + # Annotations for the flower Ingress + annotations: {} + + # The path for the flower Ingress + path: "/" + + # The pathType for the above path (used only with Kubernetes v1.19 and above) + pathType: "ImplementationSpecific" + + # The hostname for the flower Ingress (Deprecated - renamed to `ingress.flower.hosts`) + host: "" + + # The hostnames or hosts configuration for the flower Ingress + hosts: [] + # # The hostname for the flower Ingress (can be templated) + # - name: "" + # tls: + # # Enable TLS termination for the flower Ingress + # enabled: false + # # the name of a pre-created Secret containing a TLS private key and certificate + # secretName: "" + + # The Ingress Class for the flower Ingress (used only with Kubernetes v1.19 and above) + ingressClassName: "" + + # configs for flower Ingress TLS (Deprecated - renamed to `ingress.flower.hosts[*].tls`) + tls: + # Enable TLS termination for the flower Ingress + enabled: false + # the name of a pre-created Secret containing a TLS private key and certificate + secretName: "" + +# Network policy configuration +networkPolicies: + # Enabled network policies + enabled: false + +# Extra annotations to apply to all +# Airflow pods +airflowPodAnnotations: {} + +# Extra annotations to apply to +# main Airflow configmap +airflowConfigAnnotations: {} + +# `airflow_local_settings` file as a string (can be templated). +airflowLocalSettings: |- + {{- if semverCompare ">=2.2.0" .Values.airflowVersion }} + {{- if not (or .Values.webserverSecretKey .Values.webserverSecretKeySecretName) }} + from airflow.www.utils import UIAlert + + DASHBOARD_UIALERTS = [ + UIAlert( + 'Usage of a dynamic webserver secret key detected. We recommend a static webserver secret key instead.' + ' See the ' + 'Helm Chart Production Guide for more details.', + category="warning", + roles=["Admin"], + html=True, + ) + ] + {{- end }} + {{- end }} + +# Enable RBAC (default on most clusters these days) +rbac: + # Specifies whether RBAC resources should be created + create: true + createSCCRoleBinding: false + +# Airflow executor +# One of: LocalExecutor, LocalKubernetesExecutor, CeleryExecutor, KubernetesExecutor, CeleryKubernetesExecutor +executor: "CeleryExecutor" + +# If this is true and using LocalExecutor/KubernetesExecutor/CeleryKubernetesExecutor, the scheduler's +# service account will have access to communicate with the api-server and launch pods. +# If this is true and using CeleryExecutor/KubernetesExecutor/CeleryKubernetesExecutor, the workers +# will be able to launch pods. +allowPodLaunching: true + +# Environment variables for all airflow containers +env: [] +# - name: "" +# value: "" + +# Volumes for all airflow containers +volumes: [] + +# VolumeMounts for all airflow containers +volumeMounts: [] + +# Secrets for all airflow containers +secret: [] +# - envName: "" +# secretName: "" +# secretKey: "" + +# Enables selected built-in secrets that are set via environment variables by default. +# Those secrets are provided by the Helm Chart secrets by default but in some cases you +# might want to provide some of those variables with _CMD or _SECRET variable, and you should +# in this case disable setting of those variables by setting the relevant configuration to false. +enableBuiltInSecretEnvVars: + AIRFLOW__CORE__FERNET_KEY: true + # For Airflow <2.3, backward compatibility; moved to [database] in 2.3 + AIRFLOW__CORE__SQL_ALCHEMY_CONN: true + AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: true + AIRFLOW_CONN_AIRFLOW_DB: true + AIRFLOW__WEBSERVER__SECRET_KEY: true + AIRFLOW__CELERY__CELERY_RESULT_BACKEND: true + AIRFLOW__CELERY__RESULT_BACKEND: true + AIRFLOW__CELERY__BROKER_URL: true + AIRFLOW__ELASTICSEARCH__HOST: true + AIRFLOW__ELASTICSEARCH__ELASTICSEARCH_HOST: true + +# Priority Classes that will be installed by charts. +# Ideally, there should be an entry for dagProcessor, flower, +# pgbouncer, scheduler, statsd, triggerer, webserver, worker. +# The format for priorityClasses is an array with each element having: +# * name is the name of the priorityClass. Ensure the same name is given to the respective section as well +# * preemptionPolicy for the priorityClass +# * value is the preemption value for the priorityClass +priorityClasses: [] +# - name: class1 (if this is for dagProcessor, ensure overriding .Values.dagProcessor.priorityClass too) +# preemptionPolicy: PreemptLowerPriority +# value: 10000 +# - name: class2 +# preemptionPolicy: Never +# value: 100000 + +# Extra secrets that will be managed by the chart +# (You can use them with extraEnv or extraEnvFrom or some of the extraVolumes values). +# The format for secret data is "key/value" where +# * key (can be templated) is the name of the secret that will be created +# * value: an object with the standard 'data' or 'stringData' key (or both). +# The value associated with those keys must be a string (can be templated) +extraSecrets: {} +# eg: +# extraSecrets: +# '{{ .Release.Name }}-airflow-connections': +# type: 'Opaque' +# labels: +# my.custom.label/v1: my_custom_label_value_1 +# data: | +# AIRFLOW_CONN_GCP: 'base64_encoded_gcp_conn_string' +# AIRFLOW_CONN_AWS: 'base64_encoded_aws_conn_string' +# stringData: | +# AIRFLOW_CONN_OTHER: 'other_conn' +# '{{ .Release.Name }}-other-secret-name-suffix': +# data: | +# ... +# 'proxy-config': +# stringData: | +# HTTP_PROXY: http://proxy_user:proxy_password@192.168.0.10:2080 +# HTTPS_PROXY: http://proxy_user:proxy_password@192.168.0.10:2080 +# NO_PROXY: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc" + +# Extra ConfigMaps that will be managed by the chart +# (You can use them with extraEnv or extraEnvFrom or some of the extraVolumes values). +# The format for configmap data is "key/value" where +# * key (can be templated) is the name of the configmap that will be created +# * value: an object with the standard 'data' key. +# The value associated with this keys must be a string (can be templated) +extraConfigMaps: {} +# eg: +# extraConfigMaps: +# '{{ .Release.Name }}-airflow-variables': +# labels: +# my.custom.label/v2: my_custom_label_value_2 +# data: | +# AIRFLOW_VAR_HELLO_MESSAGE: "Hi!" +# AIRFLOW_VAR_KUBERNETES_NAMESPACE: "{{ .Release.Namespace }}" + +# Extra env 'items' that will be added to the definition of airflow containers +# a string is expected (can be templated). +# TODO: difference from `env`? This is a templated string. Probably should template `env` and remove this. +extraEnv: ~ +# eg: +# extraEnv: | +# - name: AIRFLOW__CORE__LOAD_EXAMPLES +# value: 'True' + +# Extra envFrom 'items' that will be added to the definition of airflow containers +# A string is expected (can be templated). +extraEnvFrom: #| + #- secretRef: + # name: 'airflow-connections' +# eg: +# extraEnvFrom: | +# - secretRef: +# name: '{{ .Release.Name }}-airflow-connections' +# - configMapRef: +# name: '{{ .Release.Name }}-airflow-variables' + +# Airflow database & redis config +data: + # If secret names are provided, use those secrets + # These secrets must be created manually, eg: + # + # kind: Secret + # apiVersion: v1 + # metadata: + # name: custom-airflow-metadata-secret + # type: Opaque + # data: + # connection: base64_encoded_connection_string + + metadataSecretName: ~ + # When providing secret names and using the same database for metadata and + # result backend, for Airflow < 2.4.0 it is necessary to create a separate + # secret for result backend but with a db+ scheme prefix. + # For Airflow >= 2.4.0 it is possible to not specify the secret again, + # as Airflow will use sql_alchemy_conn with a db+ scheme prefix by default. + resultBackendSecretName: ~ + brokerUrlSecretName: ~ + + # Otherwise pass connection values in + metadataConnection: + user: postgres + pass: postgres + protocol: postgresql + host: ~ + port: 5432 + db: postgres + sslmode: disable + # resultBackendConnection defaults to the same database as metadataConnection + resultBackendConnection: ~ + # or, you can use a different database + # resultBackendConnection: + # user: postgres + # pass: postgres + # protocol: postgresql + # host: ~ + # port: 5432 + # db: postgres + # sslmode: disable + # Note: brokerUrl can only be set during install, not upgrade + brokerUrl: ~ + +# Fernet key settings +# Note: fernetKey can only be set during install, not upgrade +fernetKey: ~ +fernetKeySecretName: ~ + +# Flask secret key for Airflow Webserver: `[webserver] secret_key` in airflow.cfg +webserverSecretKey: 9a11914029cb88be617554bdb02c6468 +webserverSecretKeySecretName: ~ + +# In order to use kerberos you need to create secret containing the keytab file +# The secret name should follow naming convention of the application where resources are +# name {{ .Release-name }}-. In case of the keytab file, the postfix is "kerberos-keytab" +# So if your release is named "my-release" the name of the secret should be "my-release-kerberos-keytab" +# +# The Keytab content should be available in the "kerberos.keytab" key of the secret. +# +# apiVersion: v1 +# kind: Secret +# data: +# kerberos.keytab: +# type: Opaque +# +# +# If you have such keytab file you can do it with similar +# +# kubectl create secret generic {{ .Release.name }}-kerberos-keytab --from-file=kerberos.keytab +# +# +# Alternatively, instead of manually creating the secret, it is possible to specify +# kerberos.keytabBase64Content parameter. This parameter should contain base64 encoded keytab. +# + +kerberos: + enabled: false + ccacheMountPath: /var/kerberos-ccache + ccacheFileName: cache + configPath: /etc/krb5.conf + keytabBase64Content: ~ + keytabPath: /etc/airflow.keytab + principal: airflow@FOO.COM + reinitFrequency: 3600 + config: | + # This is an example config showing how you can use templating and how "example" config + # might look like. It works with the test kerberos server that we are using during integration + # testing at Apache Airflow (see `scripts/ci/docker-compose/integration-kerberos.yml` but in + # order to make it production-ready you must replace it with your own configuration that + # Matches your kerberos deployment. Administrators of your Kerberos instance should + # provide the right configuration. + + [logging] + default = "FILE:{{ template "airflow_logs_no_quote" . }}/kerberos_libs.log" + kdc = "FILE:{{ template "airflow_logs_no_quote" . }}/kerberos_kdc.log" + admin_server = "FILE:{{ template "airflow_logs_no_quote" . }}/kadmind.log" + + [libdefaults] + default_realm = FOO.COM + ticket_lifetime = 10h + renew_lifetime = 7d + forwardable = true + + [realms] + FOO.COM = { + kdc = kdc-server.foo.com + admin_server = admin_server.foo.com + } + +# Airflow Worker Config +workers: + # Number of airflow celery workers in StatefulSet + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running Airflow workers (templated). + command: ~ + # Args to use when running Airflow workers (templated). + args: + - "bash" + - "-c" + # The format below is necessary to get `helm lint` happy + - |- + exec \ + airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "celery worker" "worker" }} + + # If the worker stops responding for 5 minutes (5*60s) kill the + # worker and let Kubernetes restart it + livenessProbe: + enabled: true + initialDelaySeconds: 10 + timeoutSeconds: 20 + failureThreshold: 5 + periodSeconds: 60 + command: ~ + + # Update Strategy when worker is deployed as a StatefulSet + updateStrategy: ~ + # Update Strategy when worker is deployed as a Deployment + strategy: + rollingUpdate: + maxSurge: "100%" + maxUnavailable: "50%" + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for worker deployments for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to worker kubernetes service account. + annotations: {} + + # Allow KEDA autoscaling. + keda: + enabled: false + namespaceLabels: {} + + # How often KEDA polls the airflow DB to report new scale requests to the HPA + pollingInterval: 5 + + # How many seconds KEDA will wait before scaling to zero. + # Note that HPA has a separate cooldown period for scale-downs + cooldownPeriod: 30 + + # Minimum number of workers created by keda + minReplicaCount: 0 + + # Maximum number of workers created by keda + maxReplicaCount: 10 + + # Specify HPA related options + advanced: {} + # horizontalPodAutoscalerConfig: + # behavior: + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + + # Query to use for KEDA autoscaling. Must return a single integer. + query: >- + SELECT ceil(COUNT(*)::decimal / {{ .Values.config.celery.worker_concurrency }}) + FROM task_instance + WHERE (state='running' OR state='queued') + {{- if eq .Values.executor "CeleryKubernetesExecutor" }} + AND queue != '{{ .Values.config.celery_kubernetes_executor.kubernetes_queue }}' + {{- end }} + + # Weather to use PGBouncer to connect to the database or not when it is enabled + # This configuration will be ignored if PGBouncer is not enabled + usePgbouncer: true + + # Allow HPA (KEDA must be disabled). + hpa: + enabled: false + + # Minimum number of workers created by HPA + minReplicaCount: 0 + + # Maximum number of workers created by HPA + maxReplicaCount: 5 + + # Specifications for which to use to calculate the desired replica count + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 80 + + # Scaling behavior of the target in both Up and Down directions + behavior: {} + + persistence: + # Enable persistent volumes + enabled: true + # This policy determines whether PVCs should be deleted when StatefulSet is scaled down or removed. + persistentVolumeClaimRetentionPolicy: ~ + # persistentVolumeClaimRetentionPolicy: + # whenDeleted: Delete + # whenScaled: Delete + # Volume size for worker StatefulSet + size: 100Gi + # If using a custom storageClass, pass name ref to all statefulSets here + storageClassName: + # Execute init container to chown log directory. + # This is currently only needed in kind, due to usage + # of local-path provisioner. + fixPermissions: false + # Annotations to add to worker volumes + annotations: {} + # Detailed default security context for persistence for container level + securityContexts: + container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + + kerberosSidecar: + # Enable kerberos sidecar + enabled: false + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + # Detailed default security context for kerberosSidecar for container level + securityContexts: + container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + + kerberosInitContainer: + # Enable kerberos init container + enabled: false + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Grace period for tasks to finish after SIGTERM is sent from kubernetes + terminationGracePeriodSeconds: 600 + + # This setting tells kubernetes that its ok to evict + # when it wants to scale a node down. + safeToEvict: false + + # Launch additional containers into worker (templated). + # Note: If used with KubernetesExecutor, you are responsible for signaling sidecars to exit when the main + # container finishes so Airflow can continue the worker shutdown process! + extraContainers: [] + # Add additional init containers into workers (templated). + extraInitContainers: [] + + # Mount additional volumes into worker. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Select certain nodes for airflow worker pods. + nodeSelector: {} + runtimeClassName: ~ + priorityClassName: ~ + affinity: {} + # default worker affinity is: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # labelSelector: + # matchLabels: + # component: worker + # topologyKey: kubernetes.io/hostname + # weight: 100 + tolerations: [] + topologySpreadConstraints: [] + # hostAliases to use in worker pods. + # See: + # https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/ + hostAliases: [] + # - ip: "127.0.0.2" + # hostnames: + # - "test.hostname.one" + # - ip: "127.0.0.3" + # hostnames: + # - "test.hostname.two" + + # annotations for the worker resource + annotations: {} + + podAnnotations: {} + + # Labels specific to workers objects and pods + labels: {} + + logGroomerSidecar: + # Whether to deploy the Airflow worker log groomer sidecar. + enabled: true + # Command to use when running the Airflow worker log groomer sidecar (templated). + command: ~ + # Args to use when running the Airflow worker log groomer sidecar (templated). + args: ["bash", "/clean-logs"] + # Number of days to retain logs + retentionDays: 15 + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + # Detailed default security context for logGroomerSidecar for container level + securityContexts: + container: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + env: [] + + volumeClaimTemplates: [] + # Additional volumeClaimTemplates needed. + # Comment out the above and uncomment the section below to enable it. + # Add more as needed + # Make sure to mount it under extraVolumeMounts. + # volumeClaimTemplates: + # - metadata: + # name: data-volume-1 + # spec: + # storageClassName: "storage-class-1" + # accessModes: + # - "ReadWriteOnce" + # resources: + # requests: + # storage: "10Gi" + # - metadata: + # name: data-volume-2 + # spec: + # storageClassName: "storage-class-2" + # accessModes: + # - "ReadWriteOnce" + # resources: + # requests: + # storage: "20Gi" + +# Airflow scheduler settings +scheduler: + enabled: true + # hostAliases for the scheduler pod + hostAliases: [] + # - ip: "127.0.0.1" + # hostnames: + # - "foo.local" + # - ip: "10.1.2.3" + # hostnames: + # - "foo.remote" + + # If the scheduler stops heartbeating for 5 minutes (5*60s) kill the + # scheduler and let Kubernetes restart it + livenessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 20 + failureThreshold: 5 + periodSeconds: 60 + command: ~ + + # Wait for at most 1 minute (6*10s) for the scheduler container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + failureThreshold: 6 + periodSeconds: 10 + timeoutSeconds: 20 + command: ~ + + # Airflow 2.0 allows users to run multiple schedulers, + # However this feature is only recommended for MySQL 8+ and Postgres + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running the Airflow scheduler (templated). + command: ~ + # Args to use when running the Airflow scheduler (templated). + args: ["bash", "-c", "exec airflow scheduler"] + + # Update Strategy when scheduler is deployed as a StatefulSet + # (when using LocalExecutor and workers.persistence) + updateStrategy: ~ + # Update Strategy when scheduler is deployed as a Deployment + # (when not using LocalExecutor and workers.persistence) + strategy: ~ + + # When not set, the values defined in the global securityContext will be used + # (deprecated, use `securityContexts` instead) + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for scheduler deployments for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to scheduler kubernetes service account. + annotations: {} + + # Scheduler pod disruption budget + podDisruptionBudget: + enabled: false + + # PDB configuration + config: + # minAvailable and maxUnavailable are mutually exclusive + maxUnavailable: 1 + # minAvailable: 1 + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # This setting tells kubernetes that its ok to evict + # when it wants to scale a node down. + safeToEvict: true + + # Launch additional containers into scheduler (templated). + extraContainers: [] + # Add additional init containers into scheduler (templated). + extraInitContainers: [] + + # Mount additional volumes into scheduler. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Select certain nodes for airflow scheduler pods. + nodeSelector: {} + affinity: {} + # default scheduler affinity is: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # labelSelector: + # matchLabels: + # component: scheduler + # topologyKey: kubernetes.io/hostname + # weight: 100 + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + # annotations for scheduler deployment + annotations: {} + + podAnnotations: {} + + # Labels specific to scheduler objects and pods + labels: {} + + logGroomerSidecar: + # Whether to deploy the Airflow scheduler log groomer sidecar. + enabled: true + # Command to use when running the Airflow scheduler log groomer sidecar (templated). + command: ~ + # Args to use when running the Airflow scheduler log groomer sidecar (templated). + args: ["bash", "/clean-logs"] + # Number of days to retain logs + retentionDays: 15 + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + # Detailed default security context for logGroomerSidecar for container level + securityContexts: + container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + env: [] + +# Airflow create user job settings +createUserJob: + # Limit the lifetime of the job object after it finished execution. + ttlSecondsAfterFinished: 300 + # Command to use when running the create user job (templated). + command: ~ + # Args to use when running the create user job (templated). + args: + - "bash" + - "-c" + # The format below is necessary to get `helm lint` happy + - |- + exec \ + airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "users create" "create_user" }} "$@" + - -- + - "-r" + - "{{ .Values.webserver.defaultUser.role }}" + - "-u" + - "{{ .Values.webserver.defaultUser.username }}" + - "-e" + - "{{ .Values.webserver.defaultUser.email }}" + - "-f" + - "{{ .Values.webserver.defaultUser.firstName }}" + - "-l" + - "{{ .Values.webserver.defaultUser.lastName }}" + - "-p" + - "{{ .Values.webserver.defaultUser.password }}" + + # Annotations on the create user job pod + annotations: {} + # jobAnnotations are annotations on the create user job + jobAnnotations: {} + + # Labels specific to createUserJob objects and pods + labels: {} + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for createUserJob for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to create user kubernetes service account. + annotations: {} + + # Launch additional containers into user creation job + extraContainers: [] + + # Add additional init containers into user creation job (templated). + extraInitContainers: [] + + # Mount additional volumes into user creation job. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + priorityClassName: ~ + # In case you need to disable the helm hooks that create the jobs after install. + # Disable this if you are using ArgoCD for example + useHelmHooks: true + applyCustomEnv: true + + env: [] + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# Airflow database migration job settings +migrateDatabaseJob: + enabled: true + # Limit the lifetime of the job object after it finished execution. + ttlSecondsAfterFinished: 300 + # Command to use when running the migrate database job (templated). + command: ~ + # Args to use when running the migrate database job (templated). + args: + - "bash" + - "-c" + - >- + exec \ + + airflow {{ semverCompare ">=2.7.0" .Values.airflowVersion + | ternary "db migrate" (semverCompare ">=2.0.0" .Values.airflowVersion + | ternary "db upgrade" "upgradedb") }} + + # Annotations on the database migration pod + annotations: {} + # jobAnnotations are annotations on the database migration job + jobAnnotations: {} + + # Labels specific to migrate database job objects and pods + labels: {} + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for migrateDatabaseJob for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to migrate database job kubernetes service account. + annotations: {} + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Launch additional containers into database migration job + extraContainers: [] + + # Add additional init containers into migrate database job (templated). + extraInitContainers: [] + + # Mount additional volumes into database migration job. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + priorityClassName: ~ + # In case you need to disable the helm hooks that create the jobs after install. + # Disable this if you are using ArgoCD for example + useHelmHooks: true + applyCustomEnv: true + +# rpcServer support is experimental / dev purpose only and will later be renamed +_rpcServer: + enabled: false + + # Labels specific to workers objects and pods + labels: {} + + # Command to use when running the Airflow rpc server (templated). + command: + - "bash" + # Args to use when running the Airflow rpc server (templated). + args: ["-c", "exec airflow internal-api"] + env: [] + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to webserver kubernetes service account. + annotations: {} + service: + type: ClusterIP + ## service annotations + annotations: {} + ports: + - name: rpc-server + port: "{{ .Values.ports._rpcServer }}" + + loadBalancerIP: ~ + ## Limit load balancer source ips to list of CIDRs + # loadBalancerSourceRanges: + # - "10.123.0.0/16" + loadBalancerSourceRanges: [] + + podDisruptionBudget: + enabled: false + + # PDB configuration + config: + # minAvailable and maxUnavailable are mutually exclusive + maxUnavailable: 1 + # minAvailable: 1 + + # Detailed default security contexts for webserver deployments for container and pod level + securityContexts: + pod: {} + container: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + # Launch additional containers into the flower pods. + extraContainers: [] + + # Additional network policies as needed (Deprecated - renamed to `webserver.networkPolicy.ingress.from`) + extraNetworkPolicies: [] + networkPolicy: + ingress: + # Peers for webserver NetworkPolicy ingress + from: [] + # Ports for webserver NetworkPolicy ingress (if `from` is set) + ports: + - port: "{{ .Values.ports._rpcServer }}" + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + readinessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + # Wait for at most 1 minute (6*10s) for the RPC server container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + timeoutSeconds: 20 + failureThreshold: 6 + periodSeconds: 10 + scheme: HTTP + +# Airflow webserver settings +webserver: + enabled: true + # Add custom annotations to the webserver configmap + configMapAnnotations: {} + # hostAliases for the webserver pod + hostAliases: [] + # - ip: "127.0.0.1" + # hostnames: + # - "foo.local" + # - ip: "10.1.2.3" + # hostnames: + # - "foo.remote" + allowPodLogReading: true + livenessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + readinessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + # Wait for at most 1 minute (6*10s) for the webserver container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + timeoutSeconds: 20 + failureThreshold: 6 + periodSeconds: 10 + scheme: HTTP + + # Number of webservers + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running the Airflow webserver (templated). + command: ~ + # Args to use when running the Airflow webserver (templated). + args: ["bash", "-c", "exec airflow webserver"] + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to webserver kubernetes service account. + annotations: {} + + # Webserver pod disruption budget + podDisruptionBudget: + enabled: false + + # PDB configuration + config: + # minAvailable and maxUnavailable are mutually exclusive + maxUnavailable: 1 + # minAvailable: 1 + + # Allow overriding Update Strategy for Webserver + strategy: ~ + + # When not set, the values defined in the global securityContext will be used + # (deprecated, use `securityContexts` instead) + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security contexts for webserver deployments for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Additional network policies as needed (Deprecated - renamed to `webserver.networkPolicy.ingress.from`) + extraNetworkPolicies: [] + networkPolicy: + ingress: + # Peers for webserver NetworkPolicy ingress + from: [] + # Ports for webserver NetworkPolicy ingress (if `from` is set) + ports: + - port: "{{ .Values.ports.airflowUI }}" + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Create initial user. + defaultUser: + enabled: true + role: Admin + username: admin + email: admin@example.com + firstName: admin + lastName: user + password: admin + + # Launch additional containers into webserver (templated). + extraContainers: [] + # Add additional init containers into webserver (templated). + extraInitContainers: [] + + # Mount additional volumes into webserver. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # This string (can be templated) will be mounted into the Airflow Webserver + # as a custom webserver_config.py. You can bake a webserver_config.py in to + # your image instead or specify a configmap containing the + # webserver_config.py. + webserverConfig: ~ + # webserverConfig: | + # from airflow import configuration as conf + + # # The SQLAlchemy connection string. + # SQLALCHEMY_DATABASE_URI = conf.get('database', 'SQL_ALCHEMY_CONN') + + # # Flask-WTF flag for CSRF + # CSRF_ENABLED = True + webserverConfigConfigMapName: ~ + + service: + type: ClusterIP + ## service annotations + annotations: {} + ports: + - name: airflow-ui + port: "{{ .Values.ports.airflowUI }}" + # To change the port used to access the webserver: + # ports: + # - name: airflow-ui + # port: 80 + # targetPort: airflow-ui + # To only expose a sidecar, not the webserver directly: + # ports: + # - name: only_sidecar + # port: 80 + # targetPort: 8888 + # If you have a public IP, set NodePort to set an external port. + # Service type must be 'NodePort': + # ports: + # - name: airflow-ui + # port: 8080 + # targetPort: 8080 + # nodePort: 31151 + loadBalancerIP: ~ + ## Limit load balancer source ips to list of CIDRs + # loadBalancerSourceRanges: + # - "10.123.0.0/16" + loadBalancerSourceRanges: [] + + # Select certain nodes for airflow webserver pods. + nodeSelector: {} + priorityClassName: ~ + affinity: {} + # default webserver affinity is: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # labelSelector: + # matchLabels: + # component: webserver + # topologyKey: kubernetes.io/hostname + # weight: 100 + tolerations: [] + topologySpreadConstraints: [] + + # annotations for webserver deployment + annotations: {} + + podAnnotations: {} + + # Labels specific webserver app + labels: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + env: [] + +# Airflow Triggerer Config +triggerer: + enabled: true + # Number of airflow triggerers in the deployment + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running Airflow triggerers (templated). + command: ~ + # Args to use when running Airflow triggerer (templated). + args: ["bash", "-c", "exec airflow triggerer"] + + # Update Strategy when triggerer is deployed as a StatefulSet + updateStrategy: ~ + # Update Strategy when triggerer is deployed as a Deployment + strategy: + rollingUpdate: + maxSurge: "100%" + maxUnavailable: "50%" + + # If the triggerer stops heartbeating for 5 minutes (5*60s) kill the + # triggerer and let Kubernetes restart it + livenessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 20 + failureThreshold: 5 + periodSeconds: 60 + command: ~ + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to triggerer kubernetes service account. + annotations: {} + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for triggerer for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + persistence: + # Enable persistent volumes + enabled: true + # This policy determines whether PVCs should be deleted when StatefulSet is scaled down or removed. + persistentVolumeClaimRetentionPolicy: ~ + # Volume size for triggerer StatefulSet + size: 100Gi + # If using a custom storageClass, pass name ref to all statefulSets here + storageClassName: + # Execute init container to chown log directory. + # This is currently only needed in kind, due to usage + # of local-path provisioner. + fixPermissions: false + # Annotations to add to triggerer volumes + annotations: {} + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Grace period for triggerer to finish after SIGTERM is sent from kubernetes + terminationGracePeriodSeconds: 60 + + # This setting tells kubernetes that its ok to evict + # when it wants to scale a node down. + safeToEvict: true + + # Launch additional containers into triggerer (templated). + extraContainers: [] + # Add additional init containers into triggerers (templated). + extraInitContainers: [] + + # Mount additional volumes into triggerer. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Select certain nodes for airflow triggerer pods. + nodeSelector: {} + affinity: {} + # default triggerer affinity is: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # labelSelector: + # matchLabels: + # component: triggerer + # topologyKey: kubernetes.io/hostname + # weight: 100 + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + # annotations for the triggerer deployment + annotations: {} + + podAnnotations: {} + + # Labels specific to triggerer objects and pods + labels: {} + + logGroomerSidecar: + # Whether to deploy the Airflow triggerer log groomer sidecar. + enabled: true + # Command to use when running the Airflow triggerer log groomer sidecar (templated). + command: ~ + # Args to use when running the Airflow triggerer log groomer sidecar (templated). + args: ["bash", "/clean-logs"] + # Number of days to retain logs + retentionDays: 15 + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + # Detailed default security context for logGroomerSidecar for container level + securityContexts: + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + env: [] + + # Allow KEDA autoscaling. + keda: + enabled: false + namespaceLabels: {} + + # How often KEDA polls the airflow DB to report new scale requests to the HPA + pollingInterval: 5 + + # How many seconds KEDA will wait before scaling to zero. + # Note that HPA has a separate cooldown period for scale-downs + cooldownPeriod: 30 + + # Minimum number of triggerers created by keda + minReplicaCount: 0 + + # Maximum number of triggerers created by keda + maxReplicaCount: 10 + + # Specify HPA related options + advanced: {} + # horizontalPodAutoscalerConfig: + # behavior: + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + + # Query to use for KEDA autoscaling. Must return a single integer. + query: >- + SELECT ceil(COUNT(*)::decimal / {{ .Values.config.triggerer.default_capacity }}) + FROM trigger + + # Whether to use PGBouncer to connect to the database or not when it is enabled + # This configuration will be ignored if PGBouncer is not enabled + usePgbouncer: false + +# Airflow Dag Processor Config +dagProcessor: + enabled: false + # Number of airflow dag processors in the deployment + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running Airflow dag processors (templated). + command: ~ + # Args to use when running Airflow dag processor (templated). + args: ["bash", "-c", "exec airflow dag-processor"] + + # Update Strategy for dag processors + strategy: + rollingUpdate: + maxSurge: "100%" + maxUnavailable: "50%" + + # If the dag processor stops heartbeating for 5 minutes (5*60s) kill the + # dag processor and let Kubernetes restart it + livenessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 20 + failureThreshold: 5 + periodSeconds: 60 + command: ~ + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to dag processor kubernetes service account. + annotations: {} + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for dagProcessor for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Grace period for dag processor to finish after SIGTERM is sent from kubernetes + terminationGracePeriodSeconds: 60 + + # This setting tells kubernetes that its ok to evict + # when it wants to scale a node down. + safeToEvict: true + + # Launch additional containers into dag processor (templated). + extraContainers: [] + # Add additional init containers into dag processors (templated). + extraInitContainers: [] + + # Mount additional volumes into dag processor. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Select certain nodes for airflow dag processor pods. + nodeSelector: {} + affinity: {} + # default dag processor affinity is: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # labelSelector: + # matchLabels: + # component: dag-processor + # topologyKey: kubernetes.io/hostname + # weight: 100 + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + # annotations for the dag processor deployment + annotations: {} + + podAnnotations: {} + + logGroomerSidecar: + # Whether to deploy the Airflow dag processor log groomer sidecar. + enabled: true + # Command to use when running the Airflow dag processor log groomer sidecar (templated). + command: ~ + # Args to use when running the Airflow dag processor log groomer sidecar (templated). + args: ["bash", "/clean-logs"] + # Number of days to retain logs + retentionDays: 15 + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + securityContexts: + container: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + env: [] + +# Flower settings +flower: + # Enable flower. + # If True, and using CeleryExecutor/CeleryKubernetesExecutor, will deploy flower app. + enabled: false + + livenessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 5 + failureThreshold: 10 + periodSeconds: 5 + + readinessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 5 + failureThreshold: 10 + periodSeconds: 5 + + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Command to use when running flower (templated). + command: ~ + # Args to use when running flower (templated). + args: + - "bash" + - "-c" + # The format below is necessary to get `helm lint` happy + - |- + exec \ + airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "celery flower" "flower" }} + + # Additional network policies as needed (Deprecated - renamed to `flower.networkPolicy.ingress.from`) + extraNetworkPolicies: [] + networkPolicy: + ingress: + # Peers for flower NetworkPolicy ingress + from: [] + # Ports for flower NetworkPolicy ingress (if ingressPeers is set) + ports: + - port: "{{ .Values.ports.flowerUI }}" + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for flower for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to worker kubernetes service account. + annotations: {} + + # A secret containing the connection + secretName: ~ + + # Else, if username and password are set, create secret from username and password + username: ~ + password: ~ + + service: + type: ClusterIP + ## service annotations + annotations: {} + ports: + - name: flower-ui + port: "{{ .Values.ports.flowerUI }}" + # To change the port used to access flower: + # ports: + # - name: flower-ui + # port: 8080 + # targetPort: flower-ui + loadBalancerIP: ~ + ## Limit load balancer source ips to list of CIDRs + # loadBalancerSourceRanges: + # - "10.123.0.0/16" + loadBalancerSourceRanges: [] + + # Launch additional containers into the flower pods. + extraContainers: [] + # Mount additional volumes into the flower pods. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Select certain nodes for airflow flower pods. + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + # annotations for the flower deployment + annotations: {} + + podAnnotations: {} + + # Labels specific to flower objects and pods + labels: {} + env: [] + +# StatsD settings +statsd: + # Add custom annotations to the statsd configmap + configMapAnnotations: {} + + enabled: true + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + + # Arguments for StatsD exporter command. + args: ["--statsd.mapping-config=/etc/statsd-exporter/mappings.yml"] + + # Annotations to add to the StatsD Deployment. + annotations: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to worker kubernetes service account. + annotations: {} + + uid: 65534 + # When not set, `statsd.uid` will be used + + # (deprecated, use `securityContexts` instead) + securityContext: {} + # runAsUser: 65534 + # fsGroup: 0 + # runAsGroup: 0 + + # Detailed default security context for statsd deployments for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Additional network policies as needed + extraNetworkPolicies: [] + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + service: + extraAnnotations: {} + + # Select certain nodes for StatsD pods. + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + # Additional mappings for StatsD exporter. + # If set, will merge default mapping and extra mappings, default mapping has higher priority. + # So, if you want to change some default mapping, please use `overrideMappings` + extraMappings: [] + + # Override mappings for StatsD exporter. + # If set, will ignore setting item in default and `extraMappings`. + # So, If you use it, ensure all mapping item contains in it. + overrideMappings: [] + + podAnnotations: {} + env: [] + +# PgBouncer settings +pgbouncer: + # Enable PgBouncer + enabled: false + # Number of PgBouncer replicas to run in Deployment + replicas: 1 + # Max number of old replicasets to retain + revisionHistoryLimit: ~ + # Command to use for PgBouncer(templated). + command: ["pgbouncer", "-u", "nobody", "/etc/pgbouncer/pgbouncer.ini"] + # Args to use for PgBouncer(templated). + args: ~ + auth_type: scram-sha-256 + auth_file: /etc/pgbouncer/users.txt + + # annotations to be added to the PgBouncer deployment + annotations: {} + + podAnnotations: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to worker kubernetes service account. + annotations: {} + + # Additional network policies as needed + extraNetworkPolicies: [] + + # Pool sizes + metadataPoolSize: 10 + resultBackendPoolSize: 5 + + # Maximum clients that can connect to PgBouncer (higher = more file descriptors) + maxClientConn: 100 + + # supply the name of existing secret with pgbouncer.ini and users.txt defined + # you can load them to a k8s secret like the one below + # apiVersion: v1 + # kind: Secret + # metadata: + # name: pgbouncer-config-secret + # data: + # pgbouncer.ini: + # users.txt: + # type: Opaque + # + # configSecretName: pgbouncer-config-secret + # + configSecretName: ~ + + # PgBouncer pod disruption budget + podDisruptionBudget: + enabled: false + + # PDB configuration + config: + # minAvailable and maxUnavailable are mutually exclusive + maxUnavailable: 1 + # minAvailable: 1 + + # Limit the resources to PgBouncer. + # When you specify the resource request the k8s scheduler uses this information to decide which node to + # place the Pod on. When you specify a resource limit for a Container, the kubelet enforces those limits so + # that the running container is not allowed to use more of that resource than the limit you set. + # See: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + # Example: + # + # resource: + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + resources: {} + + service: + extraAnnotations: {} + + # https://www.pgbouncer.org/config.html + verbose: 0 + logDisconnections: 0 + logConnections: 0 + + sslmode: "prefer" + ciphers: "normal" + + ssl: + ca: ~ + cert: ~ + key: ~ + + # Add extra PgBouncer ini configuration in the databases section: + # https://www.pgbouncer.org/config.html#section-databases + extraIniMetadata: ~ + extraIniResultBackend: ~ + # Add extra general PgBouncer ini configuration: https://www.pgbouncer.org/config.html + extraIni: ~ + + # Mount additional volumes into pgbouncer. It can be templated like in the following example: + # extraVolumes: + # - name: my-templated-extra-volume + # secret: + # secretName: '{{ include "my_secret_template" . }}' + # defaultMode: 0640 + # optional: true + # + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumes: [] + extraVolumeMounts: [] + + # Launch additional containers into pgbouncer. + extraContainers: [] + + # Select certain nodes for PgBouncer pods. + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + + priorityClassName: ~ + + uid: 65534 + + # Detailed default security context for pgbouncer for container level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: + preStop: + exec: + # Allow existing queries clients to complete within 120 seconds + command: ["/bin/sh", "-c", "killall -INT pgbouncer && sleep 120"] + + metricsExporterSidecar: + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + sslmode: "disable" + + # supply the name of existing secret with PGBouncer connection URI containing + # stats user and password. + # you can load them to a k8s secret like the one below + # apiVersion: v1 + # kind: Secret + # metadata: + # name: pgbouncer-stats-secret + # data: + # connection: postgresql://:@127.0.0.1:6543/pgbouncer? + # type: Opaque + # + # statsSecretName: pgbouncer-stats-secret + # + statsSecretName: ~ + + # Key containing the PGBouncer connection URI, defaults to `connection` if not defined + statsSecretKey: ~ + + # Detailed default security context for metricsExporterSidecar for container level + securityContexts: + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + livenessProbe: + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + + readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + + # Environment variables to add to pgbouncer container + env: [] + +# Configuration for the redis provisioned by the chart +redis: + enabled: true + terminationGracePeriodSeconds: 600 + + # Annotations for Redis Statefulset + annotations: {} + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to worker kubernetes service account. + annotations: {} + + persistence: + # Enable persistent volumes + enabled: true + # Volume size for worker StatefulSet + size: 1Gi + # If using a custom storageClass, pass name ref to all statefulSets here + storageClassName: + # Annotations to add to redis volumes + annotations: {} + + # Configuration for empty dir volume (if redis.persistence.enabled == false) + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # If set use as redis secret. Make sure to also set data.brokerUrlSecretName value. + passwordSecretName: ~ + + # Else, if password is set, create secret with it, + # Otherwise a new password will be generated on install + # Note: password can only be set during install, not upgrade. + password: ~ + + # This setting tells kubernetes that its ok to evict + # when it wants to scale a node down. + safeToEvict: true + + # Select certain nodes for redis pods. + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + priorityClassName: ~ + + # Set to 0 for backwards-compatiblity + uid: 0 + # If not set, `redis.uid` will be used + securityContext: {} + # runAsUser: 999 + # runAsGroup: 0 + + # Detailed default security context for redis for container and pod level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + podAnnotations: {} +# Auth secret for a private registry +# This is used if pulling airflow images from a private registry +registry: + secretName: aws-ecr + + # Example: + # connection: + # user: ~ + # pass: ~ + # host: ~ + # email: ~ + connection: {} + +# Elasticsearch logging configuration +elasticsearch: + # Enable elasticsearch task logging + enabled: false + # A secret containing the connection + secretName: ~ + # Or an object representing the connection + # Example: + # connection: + # scheme: ~ + # user: ~ + # pass: ~ + # host: ~ + # port: ~ + connection: {} + +# All ports used by chart +ports: + flowerUI: 5555 + airflowUI: 8080 + workerLogs: 8793 + triggererLogs: 8794 + redisDB: 6379 + statsdIngest: 9125 + statsdScrape: 9102 + pgbouncer: 6543 + pgbouncerScrape: 9127 + # rpcServer support is experimental / dev purpose only and will later be renamed + _rpcServer: 9080 + +# Define any ResourceQuotas for namespace +quotas: {} + +# Define default/max/min values for pods and containers in namespace +limits: [] + +# This runs as a CronJob to cleanup old pods. +cleanup: + enabled: false + # Run every 15 minutes (templated). + schedule: "*/15 * * * *" + # To select a random-ish, deterministic starting minute between 3 and 12 inclusive for each release: + # '{{- add 3 (regexFind ".$" (adler32sum .Release.Name)) -}}-59/15 * * * *' + # To select the last digit of unix epoch time as the starting minute on each deploy: + # '{{- now | unixEpoch | trunc -1 -}}-59/* * * * *' + + # Command to use when running the cleanup cronjob (templated). + command: ~ + # Args to use when running the cleanup cronjob (templated). + args: ["bash", "-c", "exec airflow kubernetes cleanup-pods --namespace={{ .Release.Namespace }}"] + + # jobAnnotations are annotations on the cleanup CronJob + jobAnnotations: {} + + # Select certain nodes for airflow cleanup pods. + nodeSelector: {} + affinity: {} + tolerations: [] + topologySpreadConstraints: [] + priorityClassName: ~ + + podAnnotations: {} + + # Labels specific to cleanup objects and pods + labels: {} + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Create ServiceAccount + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to cleanup cronjob kubernetes service account. + annotations: {} + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 50000 + # runAsGroup: 0 + env: [] + + # Detailed default security context for cleanup for container level + securityContexts: + pod: {} + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Specify history limit + # When set, overwrite the default k8s number of successful and failed CronJob executions that are saved. + failedJobsHistoryLimit: ~ + successfulJobsHistoryLimit: ~ + +# Configuration for postgresql subchart +# Not recommended for production +postgresql: + enabled: true + auth: + enablePostgresUser: true + postgresPassword: postgres + username: "" + password: "" + +# Config settings to go into the mounted +# +# Please note that these values are passed through the `tpl` function, so are +# all subject to being rendered as go templates. If you need to include a +# literal `{{` in a value, it must be expressed like this: +# +# a: '{{ "{{ not a template }}" }}' +# +# Do not set config containing secrets via plain text values, use Env Var or k8s secret object +# yamllint disable rule:line-length +config: + core: + dags_folder: '{{ include "airflow_dags" . }}' + # This is ignored when used with the official Docker image + load_examples: 'False' + executor: '{{ .Values.executor }}' + # For Airflow 1.10, backward compatibility; moved to [logging] in 2.0 + colored_console_log: 'False' + remote_logging: '{{- ternary "True" "False" .Values.elasticsearch.enabled }}' + logging: + remote_logging: '{{- ternary "True" "False" .Values.elasticsearch.enabled }}' + colored_console_log: 'False' + metrics: + statsd_on: '{{ ternary "True" "False" .Values.statsd.enabled }}' + statsd_port: 9125 + statsd_prefix: airflow + statsd_host: '{{ printf "%s-statsd" (include "airflow.fullname" .) }}' + webserver: + enable_proxy_fix: 'True' + # For Airflow 1.10 + rbac: 'True' + celery: + flower_url_prefix: '{{ ternary "" .Values.ingress.flower.path (eq .Values.ingress.flower.path "/") }}' + worker_concurrency: 16 + scheduler: + standalone_dag_processor: '{{ ternary "True" "False" .Values.dagProcessor.enabled }}' + # statsd params included for Airflow 1.10 backward compatibility; moved to [metrics] in 2.0 + statsd_on: '{{ ternary "True" "False" .Values.statsd.enabled }}' + statsd_port: 9125 + statsd_prefix: airflow + statsd_host: '{{ printf "%s-statsd" (include "airflow.fullname" .) }}' + # `run_duration` included for Airflow 1.10 backward compatibility; removed in 2.0. + run_duration: 41460 + elasticsearch: + json_format: 'True' + log_id_template: "{dag_id}_{task_id}_{execution_date}_{try_number}" + elasticsearch_configs: + max_retries: 3 + timeout: 30 + retry_timeout: 'True' + kerberos: + keytab: '{{ .Values.kerberos.keytabPath }}' + reinit_frequency: '{{ .Values.kerberos.reinitFrequency }}' + principal: '{{ .Values.kerberos.principal }}' + ccache: '{{ .Values.kerberos.ccacheMountPath }}/{{ .Values.kerberos.ccacheFileName }}' + celery_kubernetes_executor: + kubernetes_queue: 'kubernetes' + # The `kubernetes` section is deprecated in Airflow >= 2.5.0 due to an airflow.cfg schema change. + # The `kubernetes` section can be removed once the helm chart no longer supports Airflow < 2.5.0. + kubernetes: + namespace: '{{ .Release.Namespace }}' + # The following `airflow_` entries are for Airflow 1, and can be removed when it is no longer supported. + airflow_configmap: '{{ include "airflow_config" . }}' + airflow_local_settings_configmap: '{{ include "airflow_config" . }}' + pod_template_file: '{{ include "airflow_pod_template_file" . }}/pod_template_file.yaml' + worker_container_repository: '{{ .Values.images.airflow.repository | default .Values.defaultAirflowRepository }}' + worker_container_tag: '{{ .Values.images.airflow.tag | default .Values.defaultAirflowTag }}' + multi_namespace_mode: '{{ ternary "True" "False" .Values.multiNamespaceMode }}' + # The `kubernetes_executor` section duplicates the `kubernetes` section in Airflow >= 2.5.0 due to an airflow.cfg schema change. + kubernetes_executor: + namespace: '{{ .Release.Namespace }}' + pod_template_file: '{{ include "airflow_pod_template_file" . }}/pod_template_file.yaml' + worker_container_repository: '{{ .Values.images.airflow.repository | default .Values.defaultAirflowRepository }}' + worker_container_tag: '{{ .Values.images.airflow.tag | default .Values.defaultAirflowTag }}' + multi_namespace_mode: '{{ ternary "True" "False" .Values.multiNamespaceMode }}' + triggerer: + default_capacity: 1000 +# yamllint enable rule:line-length + +# Whether Airflow can launch workers and/or pods in multiple namespaces +# If true, it creates ClusterRole/ClusterRolebinding (with access to entire cluster) +multiNamespaceMode: false + +# `podTemplate` is a templated string containing the contents of `pod_template_file.yaml` used for +# KubernetesExecutor workers. The default `podTemplate` will use normal `workers` configuration parameters +# (e.g. `workers.resources`). As such, you normally won't need to override this directly, however, +# you can still provide a completely custom `pod_template_file.yaml` if desired. +# If not set, a default one is created using `files/pod-template-file.kubernetes-helm-yaml`. +podTemplate: ~ +# The following example is NOT functional, but meant to be illustrative of how you can provide a custom +# `pod_template_file`. You're better off starting with the default in +# `files/pod-template-file.kubernetes-helm-yaml` and modifying from there. +# We will set `priorityClassName` in this example: +# podTemplate: | +# apiVersion: v1 +# kind: Pod +# metadata: +# name: placeholder-name +# labels: +# tier: airflow +# component: worker +# release: {{ .Release.Name }} +# spec: +# priorityClassName: high-priority +# containers: +# - name: base +# ... + +# Git sync +dags: + # Where dags volume will be mounted. Works for both persistence and gitSync. + # If not specified, dags mount path will be set to $AIRFLOW_HOME/dags + mountPath: ~ + persistence: + # Annotations for dags PVC + annotations: {} + # Enable persistent volume for storing dags + enabled: false + # Volume size for dags + size: 1Gi + # If using a custom storageClass, pass name here + storageClassName: + # access mode of the persistent volume + accessMode: ReadWriteOnce + ## the name of an existing PVC to use + existingClaim: + ## optional subpath for dag volume mount + subPath: ~ + gitSync: + enabled: false + + + # git repo clone url + # ssh example: git@github.com:apache/airflow.git + # https example: https://github.com/apache/airflow.git + repo: "" + branch: main + rev: HEAD + # The git revision (branch, tag, or hash) to check out, v4 only + ref: v2-2-stable + depth: 1 + # the number of consecutive failures allowed before aborting + maxFailures: 0 + # subpath within the repo where dags are located + # should be "" if dags are at repo root + subPath: "" + # if your repo needs a user name password + # you can load them to a k8s secret like the one below + # --- + # apiVersion: v1 + # kind: Secret + # metadata: + # name: git-credentials + # data: + # # For git-sync v3 + # GIT_SYNC_USERNAME: + # GIT_SYNC_PASSWORD: + # # For git-sync v4 + # GITSYNC_USERNAME: + # GITSYNC_PASSWORD: + # and specify the name of the secret below + # + # credentialsSecret: git-credentials + # + # + # If you are using an ssh clone url, you can load + # the ssh private key to a k8s secret like the one below + # --- + # apiVersion: v1 + # kind: Secret + # metadata: + # name: airflow-ssh-secret + # data: + # # key needs to be gitSshKey + # gitSshKey: + # and specify the name of the secret below + sshKeySecret: "" + # + # Or set sshKeySecret with your key + # sshKey: |- + # -----BEGIN {OPENSSH PRIVATE KEY}----- + # ... + # -----END {OPENSSH PRIVATE KEY}----- + # + # If you are using an ssh private key, you can additionally + # specify the content of your known_hosts file, example: + # + # knownHosts: | + # , + # , + + # interval between git sync attempts in seconds + # high values are more likely to cause DAGs to become out of sync between different components + # low values cause more traffic to the remote git repository + # Go-style duration string (e.g. "100ms" or "0.1s" = 100ms). + # For backwards compatibility, wait will be used if it is specified. + period: 5s + wait: ~ + # add variables from secret into gitSync containers, such proxy-config + envFrom: ~ + # envFrom: | + # - secretRef: + # name: 'proxy-config' + + containerName: git-sync + uid: 65533 + + # When not set, the values defined in the global securityContext will be used + securityContext: {} + # runAsUser: 65533 + # runAsGroup: 0 + + securityContexts: + container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + + # Mount additional volumes into git-sync. It can be templated like in the following example: + # extraVolumeMounts: + # - name: my-templated-extra-volume + # mountPath: "{{ .Values.my_custom_path }}" + # readOnly: true + extraVolumeMounts: [] + env: [] + # Supported env vars for gitsync can be found at https://github.com/kubernetes/git-sync + # - name: "" + # value: "" + + # Configuration for empty dir volume + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +logs: + # Configuration for empty dir volume (if logs.persistence.enabled == false) + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + + persistence: + # Enable persistent volume for storing logs + enabled: false + # Volume size for logs + size: 10Gi + # Annotations for the logs PVC + annotations: {} + # If using a custom storageClass, pass name here + storageClassName: + ## the name of an existing PVC to use + existingClaim: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..790b625 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +kafka-python-ng +apache-airflow-providers-mongo +apache-airflow-providers-apache-kafka