Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
a6822ed
add directories and files
hernandezc1 Jun 17, 2025
8470a38
update classification field in bg table schema
hernandezc1 Jun 17, 2025
d9e5e1d
address codacy issue(s)
hernandezc1 Jun 17, 2025
415a33e
update `classification` and `properties` fields
hernandezc1 Jun 17, 2025
d768787
Merge branch 'develop' into u/ch/swift
hernandezc1 Jun 18, 2025
3562bce
Merge branch 'develop' into u/ch/swift
hernandezc1 Jun 19, 2025
8fa6c14
add new GCP resources
hernandezc1 Jun 26, 2025
d89a760
add ps_to_storage module for Swift
hernandezc1 Jun 26, 2025
241e4bc
update metadata key/value pairs
hernandezc1 Jun 26, 2025
d965999
address codacy issues
hernandezc1 Jun 26, 2025
dad2587
add IAM policy for BQ dataset
hernandezc1 Jun 27, 2025
08e9486
configure IAM policy for BQ dataset
hernandezc1 Jun 27, 2025
2633278
update script to accomodate swift alert schema
hernandezc1 Jun 27, 2025
1f8ec8f
ensures avro bucket does not exist before creating it
hernandezc1 Jun 27, 2025
d6fd04a
creates vm if it does not already exist
hernandezc1 Jun 27, 2025
03652f7
set up artifact registry
hernandezc1 Jun 27, 2025
86cdebc
update default ps topic
hernandezc1 Jun 27, 2025
e151221
update file metadata
hernandezc1 Jun 27, 2025
18072e8
use latest version of the kafka -> pubsub connector
hernandezc1 Jun 27, 2025
26547a1
update documentation
hernandezc1 Jun 27, 2025
48c7ae4
address codacy issue
hernandezc1 Jun 27, 2025
97d18b3
update documentation
hernandezc1 Jun 27, 2025
3a2655e
improve readability by updating parameter names
hernandezc1 Jun 29, 2025
9b746d2
update documentation
hernandezc1 Jun 30, 2025
f1d221e
update parameter names
hernandezc1 Jun 30, 2025
79805a5
squash bug
hernandezc1 Jun 30, 2025
face1ab
update documentation
hernandezc1 Jun 30, 2025
bf64911
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 1, 2025
76504c4
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 1, 2025
ae974ec
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 8, 2025
43ed80c
unpin requirements
hernandezc1 Jul 8, 2025
1f1f54b
rename GCP resources
hernandezc1 Jul 8, 2025
e710bd7
rename GCP resources
hernandezc1 Jul 8, 2025
efe7f18
squash bug and update resource names
hernandezc1 Jul 9, 2025
d4fbb19
use $PROJECT_ID directly where applicable
hernandezc1 Jul 9, 2025
2c25fd8
update documentation
hernandezc1 Jul 9, 2025
98f3ccf
add versiontag as an env_var
hernandezc1 Jul 9, 2025
7587e56
update resource name
hernandezc1 Jul 9, 2025
3c5a658
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 17, 2025
4bf33e5
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 17, 2025
1fca11f
assign dead letter topic
hernandezc1 Jul 17, 2025
cf3a4c7
update Pub/Sub dead letter topic name
hernandezc1 Jul 17, 2025
968493c
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 17, 2025
1122672
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 17, 2025
837f736
Merge branch 'develop' into u/ch/swift
hernandezc1 Jul 22, 2025
81bb094
update resource names
hernandezc1 Aug 6, 2025
45651e8
update resource names
hernandezc1 Aug 6, 2025
882187c
Merge branch 'develop' into u/ch/swift
hernandezc1 Sep 24, 2025
037ba42
Merge branch 'develop' into u/ch/swift
hernandezc1 Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions broker/cloud_run/swift/ps_to_storage/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Use the official lightweight Python image.
# https://hub.docker.com/_/python
FROM python:3.12-slim

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True

# Copy local code to the container image.
ENV APP_HOME /app
WORKDIR $APP_HOME
COPY . ./

# Install production dependencies.
RUN pip install --no-cache-dir -r requirements.txt

# Run the web service on container startup. Here we use the gunicorn
# webserver, with one worker process and 8 threads.
# For environments with multiple CPU cores, increase the number of workers
# to be equal to the cores available.
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
29 changes: 29 additions & 0 deletions broker/cloud_run/swift/ps_to_storage/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# https://cloud.google.com/build/docs/deploying-builds/deploy-cloud-run
# containerize the module and deploy it to Cloud Run
steps:
# Build the image
- name: 'gcr.io/cloud-builders/docker'
args: ['build', '-t', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}', '.']
# Push the image to Artifact Registry
- name: 'gcr.io/cloud-builders/docker'
args: ['push', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}']
# Deploy image to Cloud Run
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
entrypoint: gcloud
args: ['run', 'deploy', '${_MODULE_NAME}', '--image', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}', '--region', '${_REGION}', '--set-env-vars', '${_ENV_VARS}']
images:
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}'
substitutions:
_SURVEY: 'swift'
_TESTID: 'testid'
_MODULE_NAME: '${_SURVEY}-alerts-to-storage-${_TESTID}'
_MODULE_IMAGE_NAME: 'gcr.io/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_NAME}'
_REPOSITORY: 'cloud-run-services'
# cloud functions automatically sets the projectid env var using the name "GCP_PROJECT"
# use the same name here for consistency
# [TODO] PROJECT_ID is set in setup.sh. this is confusing and we should revisit the decision.
# i (Raen) think i didn't make it a substitution because i didn't want to set a default for it.
_ENV_VARS: 'GCP_PROJECT=${PROJECT_ID},SURVEY=${_SURVEY},TESTID=${_TESTID}'
_REGION: 'us-central1'
options:
dynamic_substitutions: true
93 changes: 93 additions & 0 deletions broker/cloud_run/swift/ps_to_storage/deploy.sh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a bunch of variables in here with names that include "avro" -- I'm sure this is a holdover from the ZTF and LSST modules. For reusability, we should change those names because not all surveys publish avro alerts. For Swift in particular, assuming they publish in json and not avro, having "avro" here is confusing.

Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#! /bin/bash
# Deploys or deletes broker Cloud Run service
# This script will not delete Cloud Run services that are in production

# "False" uses production resources
# any other string will be appended to the names of all resources
testid="${1:-test}"
# "True" tearsdown/deletes resources, else setup
teardown="${2:-False}"
# name of the survey this broker instance will ingest
survey="${3:-swift}"
region="${4:-us-central1}"
PROJECT_ID=$GOOGLE_CLOUD_PROJECT # get the environment variable

MODULE_NAME="alerts-to-storage" # lower case required by cloud run
ROUTE_RUN="/" # url route that will trigger main.run()

# function used to define GCP resources; appends testid if needed
define_GCP_resources() {
local base_name="$1"
local testid_suffix=""

if [ "$testid" != "False" ]; then
testid_suffix="-${testid}"
fi

echo "${base_name}${testid_suffix}"
}

#--- GCP resources used in this script
artifact_registry_repo=$(define_GCP_resources "${survey}-cloud-run-services")
avro_bucket=$(define_GCP_resources "${PROJECT_ID}-${survey}_alerts")
avro_topic=$(define_GCP_resources "projects/${PROJECT_ID}/topics/${survey}-alert_avros")
avro_subscription=$(define_GCP_resources "${survey}-alert_avros-counter")
cr_module_name=$(define_GCP_resources "${survey}-${MODULE_NAME}") # lower case required by cloud run
ps_input_subscrip=$(define_GCP_resources "${survey}-alerts_raw") # pub/sub subscription used to trigger cloud run module
runinvoker_svcact="cloud-run-invoker@${PROJECT_ID}.iam.gserviceaccount.com"
trigger_topic=$(define_GCP_resources "${survey}-alerts_raw")

if [ "${teardown}" = "True" ]; then
# ensure that we do not teardown production resources
if [ "${testid}" != "False" ]; then
gsutil rm -r "gs://${avro_bucket}"
gcloud pubsub topics delete "${avro_topic}"
gcloud pubsub subscriptions delete "${avro_subscription}"
gcloud pubsub subscriptions delete "${ps_input_subscrip}"
gcloud run services delete "${cr_module_name}" --region "${region}"
fi
else
echo
echo "Creating avro_bucket..."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's revisit which scripts create and delete which resources. I know we had a whole discussion about it when we first created these deploy.sh scripts for individual modules and decided to put this bucket creation here, but this is the only public resource that we handle this way and I've been confused by it more than once. Every other resource gets created by setup_broker.sh. At a minimum, our handling of this bucket and the bigquery dataset/table should be consistent.

if ! gsutil ls -b "gs://${avro_bucket}" >/dev/null 2>&1; then
#--- Create the bucket that will store the alerts
gsutil mb -l "${region}" "gs://${avro_bucket}"
gsutil uniformbucketlevelaccess set on "gs://${avro_bucket}"
gsutil requesterpays set on "gs://${avro_bucket}"
gcloud storage buckets add-iam-policy-binding "gs://${avro_bucket}" \
--member="allUsers" \
--role="roles/storage.objectViewer"
else
echo "${avro_bucket} already exists."
fi

#--- Setup the Pub/Sub notifications on the Avro storage bucket
echo
echo "Configuring Pub/Sub notifications on GCS bucket..."
trigger_event=OBJECT_FINALIZE
format=json # json or none; if json, file metadata sent in message body
gsutil notification create \
-t "$avro_topic" \
-e "$trigger_event" \
-f "$format" \
"gs://${avro_bucket}"
gcloud pubsub subscriptions create "${avro_subscription}" --topic="${avro_topic}"

#--- Deploy the Cloud Run service
echo "Creating container image and deploying to Cloud Run..."
moduledir="." # assumes deploying what's in our current directory
config="${moduledir}/cloudbuild.yaml"
url=$(gcloud builds submit --config="${config}" \
--substitutions="_SURVEY=${survey},_TESTID=${testid},_MODULE_NAME=${cr_module_name},_REPOSITORY=${artifact_registry_repo}" \
"${moduledir}" | sed -n 's/^Step #2: Service URL: \(.*\)$/\1/p')

echo "Creating trigger subscription for Cloud Run..."
# WARNING: This is set to retry failed deliveries. If there is a bug in main.py this will
# retry indefinitely, until the message is delete manually.
gcloud pubsub subscriptions create "${ps_input_subscrip}" \
--topic "${trigger_topic}" \
--topic-project "${PROJECT_ID}" \
--ack-deadline=600 \
--push-endpoint="${url}${ROUTE_RUN}" \
--push-auth-service-account="${runinvoker_svcact}"
fi
107 changes: 107 additions & 0 deletions broker/cloud_run/swift/ps_to_storage/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

"""This module stores Swift/BAT-GUANO alert data as an Avro file in Cloud Storage."""

import os
import flask
import pittgoogle
from google.cloud import logging, storage
from google.cloud.exceptions import PreconditionFailed

# [FIXME] Make this helpful or else delete it.
# Connect the python logger to the google cloud logger.
# By default, this captures INFO level and above.
# pittgoogle uses the python logger.
# We don't currently use the python logger directly in this script, but we could.
logging.Client().setup_logging()

PROJECT_ID = os.getenv("GCP_PROJECT")
TESTID = os.getenv("TESTID")
SURVEY = os.getenv("SURVEY")

# Variables for incoming data
# A url route is used in setup.sh when the trigger subscription is created.
# It is possible to define multiple routes in a single module and trigger them using different subscriptions.
ROUTE_RUN = "/" # HTTP route that will trigger run(). Must match deploy.sh

# Variables for outgoing data
HTTP_204 = 204 # HTTP code: Success
HTTP_400 = 400 # HTTP code: Bad Request

# GCP resources used in this module
TOPIC_ALERTS_JSON = pittgoogle.Topic.from_cloud(
"alerts-json", survey=SURVEY, testid=TESTID, projectid=PROJECT_ID
)
Comment on lines 34 to 36
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these alerts come from Swift as json or avro? If json, I think the name of this topic can just be swift-alerts since we're not changing the serialization. Also, if this stays as swift-alerts-json does that mean we don't publish any topic that's just called swift-alerts? I think from a usability/consistency standpoint we should always publish a stream called <survey>-alerts that is just a pass through of the survey's full (but deduplicated) alert stream from Kafka (or whatever) into Pub/Sub.

If that seems confusing in comparison with our LSST streams where lsst-alerts is avro and lsst-alerts-json is json, maybe we consider changing those names so that lsst-alerts is the json version and we make the avro one called lsst-alerts-avro? Benefit of the current naming is that lsst-alerts is byte-for-byte the same as what Rubin publishes. That was my original intention for all of our <survey>-alerts streams, and in that sense using swift-alerts is consistent (assuming Swift really does publish these as json -- otherwise, sorry for this irrelevant tangent). But since all of our topics downstream of <survey>-alerts use json exclusively, I can see an argument for making all <survey>-alerts streams json as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@troyraen the alerts from Swift are JSON serialized. I named the Pub/Sub resource that way because at the time it seemed more appropriate and descriptive, but I agree that having a <survey>-alerts topic that is just a pass through of the survey's full (deduplicated) alert stream is the convention this module should adopt

bucket_name = f"{PROJECT_ID}-{SURVEY}_alerts"
if TESTID != "False":
bucket_name = f"{bucket_name}-{TESTID}"

client = storage.Client()
bucket = client.get_bucket(client.bucket(bucket_name, user_project=PROJECT_ID))

app = flask.Flask(__name__)


@app.route(ROUTE_RUN, methods=["POST"])
def run():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def run():
def run() -> tuple[str, int]:

"""Uploads alert data to a GCS bucket. Publishes a de-duplicated "alerts" stream (${survey}-alerts) containing the
original alert bytes and publishes an additional JSON message stream (${survey}-bigquery-import) in which a
BigQuery subscription is used to write alert data to the appropriate BigQuery table.

This module is intended to be deployed as a Cloud Run service. It will operate as an HTTP endpoint
triggered by Pub/Sub messages. This function will be called once for every message sent to this route.
It should accept the incoming HTTP request and return a response.

Returns
-------
response : tuple(str, int)
Tuple containing the response body (string) and HTTP status code (int). Flask will convert the
tuple into a proper HTTP response. Note that the response is a status message for the web server.
"""
# extract the envelope from the request that triggered the endpoint
# this contains a single Pub/Sub message with the alert to be processed
envelope = flask.request.get_json()
try:
alert = pittgoogle.Alert.from_cloud_run(envelope, schema_name="default")
except pittgoogle.exceptions.BadRequest as exc:
return str(exc), HTTP_400

blob = bucket.blob(_name_in_bucket(alert))
blob.metadata = _create_file_metadata(alert, event_id=envelope["message"]["messageId"])

# raise a PreconditionFailed exception if filename already exists in the bucket using "if_generation_match=0"
# let it raise. the message will be dropped.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# let it raise. the message will be dropped.

This comment is a holdover from my original code but no longer makes sense because we've now implemented the try/except right here.

try:
blob.upload_from_string(alert.msg.data, if_generation_match=0)
except PreconditionFailed:
# this alert is a duplicate. drop it.
return "", HTTP_204

# publish the same alert as JSON
TOPIC_ALERTS_JSON.publish(alert)

return "", HTTP_204


def _create_file_metadata(alert: pittgoogle.Alert, event_id: str) -> dict:
"""Return key/value pairs to be attached to the file as metadata."""

metadata = {"file_origin_message_id": event_id}
metadata["_".join("alert_type")] = alert.dict["alert_type"]
metadata["_".join("id")] = alert.dict["id"]
metadata["_".join("ra")] = alert.dict["ra"]
metadata["_".join("dec")] = alert.dict["dec"]

return metadata


def _name_in_bucket(alert: pittgoogle.Alert) -> str:
"""Return the name of the file in the bucket."""
# not easily able to extract schema version, see:
# https://github.com/nasa-gcn/gcn-schema/blob/main/gcn/notices/swift/bat/Guano.example.json
_date = alert.dict["alert_datetime"][0:10]
_alert_type = alert.dict["alert_type"]
_id = alert.dict["id"]

return f"{_date}/{_alert_type}/{_id}.json"
15 changes: 15 additions & 0 deletions broker/cloud_run/swift/ps_to_storage/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# As explained here
# https://cloud.google.com/functions/docs/writing/specifying-dependencies-python
# dependencies for a Cloud Function must be specified in a `requirements.txt`
# file (or packaged with the function) in the same directory as `main.py`

google-cloud-logging
google-cloud-storage
pittgoogle-client>=0.3.15

# for Cloud Run
# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
# pinned following quickstart example. [TODO] consider un-pinning
Flask==3.0.3
gunicorn==23.0.0
Werkzeug==3.0.6
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# for Cloud Run
# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
# pinned following quickstart example. [TODO] consider un-pinning
Flask==3.0.3
gunicorn==23.0.0
Werkzeug==3.0.6
# for Cloud Run
# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
Flask
gunicorn
Werkzeug

35 changes: 35 additions & 0 deletions broker/consumer/swift/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Start the Swift/BAT-GUANO consumer VM

See `broker/setup_broker/swift/README.md` for setup instructions.

To start the consumer VM:

```bash
survey="swift"
testid="mytest"
consumerVM="${survey}-consumer-${testid}"
zone="us-central1-a"

# Set the VM metadata
KAFKA_TOPIC="enter Kafka topic"
PS_TOPIC="${survey}-alerts-${testid}"
gcloud compute instances add-metadata ${consumerVM} --zone=${zone} \
--metadata KAFKA_TOPIC=${KAFKA_TOPIC},PS_TOPIC=${PS_TOPIC}

# Start the VM
gcloud compute instances start ${consumerVM} --zone ${zone}
# this launches the startup script which configures and starts the
# Kafka -> Pub/Sub connector
```

To stop stop the consumer VM:

```bash
survey="swift"
testid="mytest"
consumerVM="${survey}-consumer-${testid}"
zone="us-central1-a"

# Stop the VM
gcloud compute instances stop ${consumerVM} --zone ${zone}
```
13 changes: 13 additions & 0 deletions broker/consumer/swift/admin.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Kafka Admin client configs
# This file is part of a workflow that creates an authenticated connection to the Kafka broker.
# In cases where we can connect without authentication (e.g., ZTF), this file is not used.
# For config options, see https://kafka.apache.org/documentation/#adminclientconfigs
# For Swift-specific options, see https://gcn.nasa.gov/docs/client#java

security.protocol=SASL_SSL
sasl.mechanism=OAUTHBEARER
sasl.login.callback.handler.class=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler
sasl.oauthbearer.token.endpoint.url=https://auth.gcn.nasa.gov/oauth2/token
sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required \
clientId="CLIENT_ID" \
clientSecret="CLIENT_SECRET";
41 changes: 41 additions & 0 deletions broker/consumer/swift/ps-connector.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Kafka Connect sink connector configs
# For config options, see https://docs.confluent.io/platform/current/installation/configuration/connect/sink-connect-configs.html
# For additional Pub/Sub-specific options, see https://github.com/googleapis/java-pubsub-group-kafka-connector?tab=readme-ov-file#sink-connector
#
# --------------------------------------------------------------------------
# This file is adapted from:
# https://github.com/googleapis/java-pubsub-group-kafka-connector/blob/main/config/cps-sink-connector.properties
# The original copyright and license are reproduced below.
#
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------

# Unique name for the Pub/Sub sink connector.
name=CPSSinkConnector
# Tha Java class for the Pub/Sub sink connector.
connector.class=com.google.pubsub.kafka.sink.CloudPubSubSinkConnector
# The maximum number of tasks that should be created for this connector.
tasks.max=1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the alert rate you expect from this stream? I'm guessing that one task is probably fine, but it's worth checking.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see in the data listings PR, "less than 1 alert per week". So 1 task will be fine.

# Set the key converter for the Pub/Sub sink connector.
key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
# Set the value converter for the Pub/Sub sink connector.
value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
# Set kafka the topic
topics=KAFKA_TOPIC
# Set the Pub/Sub configs
cps.project=PROJECT_ID
cps.topic=PS_TOPIC
# include Kafka topic, partition, offset, timestamp as msg attributes
metadata.publish=true
35 changes: 35 additions & 0 deletions broker/consumer/swift/psconnect-worker-authenticated.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Kafka Connect worker configuration
# This file is part of a workflow that creates an authenticated connection to the Kafka broker.
# For config options, see https://docs.confluent.io/platform/current/connect/references/allconfigs.html#worker-configuration-properties
# See also: https://kafka.apache.org/documentation/#adminclientconfigs

bootstrap.servers=kafka.gcn.nasa.gov:9092
plugin.path=/usr/local/share/kafka/plugins
offset.storage.file.filename=/tmp/connect.offsets
connections.max.idle.ms=5400000

# ByteArrayConverter provides a “pass-through” option that does no conversion.
key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
value.converter=org.apache.kafka.connect.converters.ByteArrayConverter

# workers need to use SASL
sasl.mechanism=OAUTHBEARER
sasl.kerberos.service.name=kafka
security.protocol=SASL_SSL
sasl.login.callback.handler.class=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler
sasl.oauthbearer.token.endpoint.url=https://auth.gcn.nasa.gov/oauth2/token
sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required \
clientId="CLIENT_ID" \
clientSecret="CLIENT_SECRET";

# settings with `consumer.` prefixes are passed through to the Kafka consumer
consumer.group.id=GROUP_ID
consumer.auto.offset.reset=earliest
consumer.sasl.mechanism=OAUTHBEARER
consumer.sasl.kerberos.service.name=kafka
consumer.security.protocol=SASL_SSL
consumer.sasl.login.callback.handler.class=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler
consumer.sasl.oauthbearer.token.endpoint.url=https://auth.gcn.nasa.gov/oauth2/token
consumer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required \
clientId="CLIENT_ID" \
clientSecret="CLIENT_SECRET";
Loading