mwvgroup · hernandezc1 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
diff --git a/broker/cloud_run/swift/ps_to_storage/Dockerfile b/broker/cloud_run/swift/ps_to_storage/Dockerfile
@@ -0,0 +1,21 @@
+# Use the official lightweight Python image.
+# https://hub.docker.com/_/python
+FROM python:3.12-slim
+
+# Allow statements and log messages to immediately appear in the Knative logs
+ENV PYTHONUNBUFFERED True
+
+# Copy local code to the container image.
+ENV APP_HOME /app
+WORKDIR $APP_HOME
+COPY . ./
+
+# Install production dependencies.
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Run the web service on container startup. Here we use the gunicorn
+# webserver, with one worker process and 8 threads.
+# For environments with multiple CPU cores, increase the number of workers
+# to be equal to the cores available.
+# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
+CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
diff --git a/broker/cloud_run/swift/ps_to_storage/cloudbuild.yaml b/broker/cloud_run/swift/ps_to_storage/cloudbuild.yaml
@@ -0,0 +1,29 @@
+# https://cloud.google.com/build/docs/deploying-builds/deploy-cloud-run
+# containerize the module and deploy it to Cloud Run
+steps:
+# Build the image
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['build', '-t', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}', '.']
+# Push the image to Artifact Registry
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['push', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}']
+# Deploy image to Cloud Run
+- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
+  entrypoint: gcloud
+  args: ['run', 'deploy', '${_MODULE_NAME}', '--image', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}', '--region', '${_REGION}', '--set-env-vars', '${_ENV_VARS}']
+images:
+- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_IMAGE_NAME}'
+substitutions:
+    _SURVEY: 'swift'
+    _TESTID: 'testid'
+    _MODULE_NAME: '${_SURVEY}-alerts-to-storage-${_TESTID}'
+    _MODULE_IMAGE_NAME: 'gcr.io/${PROJECT_ID}/${_REPOSITORY}/${_MODULE_NAME}'
+    _REPOSITORY: 'cloud-run-services'
+    # cloud functions automatically sets the projectid env var using the name "GCP_PROJECT"
+    # use the same name here for consistency
+    # [TODO] PROJECT_ID is set in setup.sh. this is confusing and we should revisit the decision.
+    # i (Raen) think i didn't make it a substitution because i didn't want to set a default for it.
+    _ENV_VARS: 'GCP_PROJECT=${PROJECT_ID},SURVEY=${_SURVEY},TESTID=${_TESTID}'
+    _REGION: 'us-central1'
+options:
+    dynamic_substitutions: true
diff --git a/broker/cloud_run/swift/ps_to_storage/deploy.sh b/broker/cloud_run/swift/ps_to_storage/deploy.sh
@@ -0,0 +1,95 @@
+#! /bin/bash
+# Deploys or deletes broker Cloud Run service
+# This script will not delete Cloud Run services that are in production
+
+# "False" uses production resources
+# any other string will be appended to the names of all resources
+testid="${1:-test}"
+# "True" tearsdown/deletes resources, else setup
+teardown="${2:-False}"
+# name of the survey this broker instance will ingest
+survey="${3:-swift}"
+region="${4:-us-central1}"
+# get the environment variable
+PROJECT_ID=$GOOGLE_CLOUD_PROJECT
+
+MODULE_NAME="alerts-to-storage"  # lower case required by cloud run
+ROUTE_RUN="/"  # url route that will trigger main.run()
+
+define_GCP_resources() {
+    local base_name="$1"
+    local testid_suffix=""
+
+    if [ "$testid" != "False" ]; then
+        testid_suffix="-${testid}"
+    fi
+    echo "${base_name}${testid_suffix}"
+}
+
+#--- GCP resources used in this script
+artifact_registry_repo=$(define_GCP_resources "${survey}-cloud-run-services")
+cr_module_name=$(define_GCP_resources "${survey}-${MODULE_NAME}")  # lower case required by cloud run
+gcs_avro_bucket=$(define_GCP_resources "${PROJECT_ID}-${survey}_alerts")
+ps_input_subscrip=$(define_GCP_resources "${survey}-alerts_raw") # pub/sub subscription used to trigger cloud run module
+ps_subscription_avro=$(define_GCP_resources "${survey}-alert_avros-counter")
+ps_topic_avro=$(define_GCP_resources "projects/${PROJECT_ID}/topics/${survey}-alert_avros")
+ps_trigger_topic=$(define_GCP_resources "${survey}-alerts_raw")
+runinvoker_svcact="cloud-run-invoker@${PROJECT_ID}.iam.gserviceaccount.com"
+
+if [ "${teardown}" = "True" ]; then
+    # ensure that we do not teardown production resources
+    if [ "${testid}" != "False" ]; then
+        echo
+        echo "Deleting resources for ${MODULE_NAME} module..."
+        gsutil rm -r "gs://${gcs_avro_bucket}"
+        gcloud pubsub topics delete "${ps_topic_avro}"
+        gcloud pubsub subscriptions delete "${ps_subscription_avro}"
+        gcloud pubsub subscriptions delete "${ps_input_subscrip}"
+        gcloud run services delete "${cr_module_name}" --region "${region}"
+    fi
+else
+    echo
+    echo "Creating avro_bucket..."
+    if ! gsutil ls -b "gs://${gcs_avro_bucket}" >/dev/null 2>&1; then
+        #--- Create the bucket that will store the alerts
+        gsutil mb -l "${region}" "gs://${gcs_avro_bucket}"
+        gsutil uniformbucketlevelaccess set on "gs://${gcs_avro_bucket}"
+        gsutil requesterpays set on "gs://${gcs_avro_bucket}"
+        gcloud storage buckets add-iam-policy-binding "gs://${gcs_avro_bucket}" \
+            --member="allUsers" \
+            --role="roles/storage.objectViewer"
+    else
+        echo "${gcs_avro_bucket} already exists."
+    fi
+
+    #--- Setup the Pub/Sub notifications on the Avro storage bucket
+    echo
+    echo "Configuring Pub/Sub notifications on GCS bucket..."
+    trigger_event=OBJECT_FINALIZE
+    format=json  # json or none; if json, file metadata sent in message body
+    gsutil notification create \
+        -t "$ps_topic_avro" \
+        -e "$trigger_event" \
+        -f "$format" \
+        "gs://${gcs_avro_bucket}"
+    gcloud pubsub subscriptions create "${ps_subscription_avro}" --topic="${ps_topic_avro}"
+
+    #--- Deploy the Cloud Run service
+    echo
+    echo "Creating container image for ${MODULE_NAME} module and deploying to Cloud Run..."
+    moduledir="."  # assumes deploying what's in our current directory
+    config="${moduledir}/cloudbuild.yaml"
+    url=$(gcloud builds submit --config="${config}" \
+        --substitutions="_SURVEY=${survey},_TESTID=${testid},_MODULE_NAME=${cr_module_name},_REPOSITORY=${artifact_registry_repo}" \
+        "${moduledir}" | sed -n 's/^Step #2: Service URL: \(.*\)$/\1/p')
+    echo
+    echo "Creating trigger subscription for ${MODULE_NAME} Cloud Run service..."
+    # WARNING:  This is set to retry failed deliveries. If there is a bug in main.py this will
+    # retry indefinitely, until the message is delete manually.
+    gcloud pubsub subscriptions create "${ps_input_subscrip}" \
+        --topic "${ps_trigger_topic}" \
+        --topic-project "${PROJECT_ID}" \
+        --ack-deadline=600 \
+        --push-endpoint="${url}${ROUTE_RUN}" \
+        --push-auth-service-account="${runinvoker_svcact}"
+fi
diff --git a/broker/cloud_run/swift/ps_to_storage/main.py b/broker/cloud_run/swift/ps_to_storage/main.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+"""This module stores Swift/BAT-GUANO alert data as a JSON file in Cloud Storage."""
+
+import os
+import flask
+import pittgoogle
+from google.cloud import logging, storage
+from google.cloud.exceptions import PreconditionFailed
+
+# [FIXME] Make this helpful or else delete it.
+# Connect the python logger to the google cloud logger.
+# By default, this captures INFO level and above.
+# pittgoogle uses the python logger.
+# We don't currently use the python logger directly in this script, but we could.
+logging.Client().setup_logging()
+
+PROJECT_ID = os.getenv("GCP_PROJECT")
+TESTID = os.getenv("TESTID")
+SURVEY = os.getenv("SURVEY")
+
+# Variables for incoming data
+# A url route is used in setup.sh when the trigger subscription is created.
+# It is possible to define multiple routes in a single module and trigger them using different subscriptions.
+ROUTE_RUN = "/"  # HTTP route that will trigger run(). Must match deploy.sh
+
+# Variables for outgoing data
+HTTP_204 = 204  # HTTP code: Success
+HTTP_400 = 400  # HTTP code: Bad Request
+
+# GCP resources used in this module
+TOPIC_ALERTS_JSON = pittgoogle.Topic.from_cloud(
+    "alerts-json", survey=SURVEY, testid=TESTID, projectid=PROJECT_ID
+)
+bucket_name = f"{PROJECT_ID}-{SURVEY}_alerts"
+if TESTID != "False":
+    bucket_name = f"{bucket_name}-{TESTID}"
+
+client = storage.Client()
+bucket = client.get_bucket(client.bucket(bucket_name, user_project=PROJECT_ID))
+
+app = flask.Flask(__name__)
+
+
+@app.route(ROUTE_RUN, methods=["POST"])
+def run():
-def run():
+def run() -> tuple[str, int]:
-def run():
+def run() -> tuple[str, int]:
+    """Uploads alert data to a GCS bucket. Publishes a de-duplicated JSON-serialized "alerts" stream
+    (${survey}-alerts-json) containing the original alert bytes. A BigQuery subscription is used to write alert data to
+    the appropriate BigQuery table.
+
+    This module is intended to be deployed as a Cloud Run service. It will operate as an HTTP endpoint
+    triggered by Pub/Sub messages. This function will be called once for every message sent to this route.
+    It should accept the incoming HTTP request and return a response.
+
+    Returns
+    -------
+    response : tuple(str, int)
+        Tuple containing the response body (string) and HTTP status code (int). Flask will convert the
+        tuple into a proper HTTP response. Note that the response is a status message for the web server.
+    """
+    # extract the envelope from the request that triggered the endpoint
+    # this contains a single Pub/Sub message with the alert to be processed
+    envelope = flask.request.get_json()
+    try:
+        alert = pittgoogle.Alert.from_cloud_run(envelope, schema_name="default")
+    except pittgoogle.exceptions.BadRequest as exc:
+        return str(exc), HTTP_400
+
+    blob = bucket.blob(_name_in_bucket(alert))
+    blob.metadata = _create_file_metadata(alert, event_id=envelope["message"]["messageId"])
+
+    # raise a PreconditionFailed exception if filename already exists in the bucket using "if_generation_match=0"
+    # let it raise. the message will be dropped.
-    # let it raise. the message will be dropped.
-    # let it raise. the message will be dropped.
+    try:
+        blob.upload_from_string(alert.msg.data, if_generation_match=0)
+    except PreconditionFailed:
+        # this alert is a duplicate. drop it.
+        return "", HTTP_204
+
+    # publish the same alert as JSON
+    TOPIC_ALERTS_JSON.publish(alert)
+
+    return "", HTTP_204
+
+
+def _create_file_metadata(alert: pittgoogle.Alert, event_id: str) -> dict:
+    """Return key/value pairs to be attached to the file as metadata."""
+    # https://github.com/nasa-gcn/gcn-schema/blob/main/gcn/notices/swift/bat/Guano.example.json
+    metadata = {"file_origin_message_id": event_id}
+    metadata["_".join("alert_datetime")] = alert.dict["alert_datetime"]
+    metadata["_".join("alert_type")] = alert.dict["alert_type"]
+    metadata["_".join("classification")] = alert.dict["classification"]
+    metadata["_".join("id")] = alert.dict["id"]
+
+    return metadata
+
+
+def _name_in_bucket(alert: pittgoogle.Alert) -> str:
+    """Return the name of the file in the bucket."""
+    # not easily able to extract schema version, see:
+    # https://github.com/nasa-gcn/gcn-schema/blob/main/gcn/notices/swift/bat/Guano.example.json
+    _date = alert.dict["alert_datetime"][0:10]
+    _alert_type = alert.dict["alert_type"]
+    _id = alert.dict["id"][0]
+
+    return f"{_date}/{_alert_type}/{_id}.json"
diff --git a/broker/cloud_run/swift/ps_to_storage/requirements.txt b/broker/cloud_run/swift/ps_to_storage/requirements.txt
@@ -0,0 +1,15 @@
+# As explained here
+# https://cloud.google.com/functions/docs/writing/specifying-dependencies-python
+# dependencies for a Cloud Function must be specified in a `requirements.txt`
+# file (or packaged with the function) in the same directory as `main.py`
+
+google-cloud-logging
+google-cloud-storage
+pittgoogle-client>=0.3.15
+
+# for Cloud Run
+# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
+# pinned following quickstart example. [TODO] consider un-pinning
+Flask==3.0.3
+gunicorn==23.0.0
+Werkzeug==3.0.6
-# for Cloud Run
-# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
-# pinned following quickstart example. [TODO] consider un-pinning
-Flask==3.0.3
-gunicorn==23.0.0
-Werkzeug==3.0.6
+# for Cloud Run
+# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
+Flask
+gunicorn
+Werkzeug
-# for Cloud Run
-# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
-# pinned following quickstart example. [TODO] consider un-pinning
-Flask==3.0.3
-gunicorn==23.0.0
-Werkzeug==3.0.6
+# for Cloud Run
+# https://cloud.google.com/run/docs/quickstarts/build-and-deploy/deploy-python-service
+Flask
+gunicorn
+Werkzeug
diff --git a/broker/consumer/swift/vm_install.sh b/broker/consumer/swift/vm_install.sh
@@ -1,6 +1,6 @@
 #! /bin/bash
 # Installs the software required to run the Kafka Consumer.
-# Assumes a Debian 10 OS.
+# Assumes a Debian 12 OS.
 
 #--- Get metadata attributes
 baseurl="http://metadata.google.internal/computeMetadata/v1"
@@ -33,7 +33,7 @@ snap install core
 snap install yq
 
 #--- Install Java and the dev kit
-# see https://www.digitalocean.com/community/tutorials/how-to-install-java-with-apt-on-debian-10
+# see https://www.digitalocean.com/community/tutorials/how-to-install-java-with-apt-on-debian-11
 apt update
 echo "Installing Java..."
 apt install -y default-jre
@@ -61,7 +61,7 @@ echo "Done installing Confluent Platform."
 echo "Installing the Kafka -> Pub/Sub connector"
 (
     plugindir=/usr/local/share/kafka/plugins
-    CONNECTOR_RELEASE="1.1.0"
+    CONNECTOR_RELEASE="1.3.2"
     mkdir -p ${plugindir}
     #- install the connector
     cd ${plugindir}

diff --git a/broker/consumer/swift/vm_startup.sh b/broker/consumer/swift/vm_startup.sh
@@ -23,7 +23,7 @@ fi
 
 #--- GCP resources used in this script
 broker_bucket="${PROJECT_ID}-${survey}-broker_files"
-PS_TOPIC_DEFAULT="${survey}-alerts"
+PS_TOPIC_DEFAULT="${survey}-alerts_raw"
 # use test resources, if requested
 if [ "$testid" != "False" ]; then
     broker_bucket="${broker_bucket}-${testid}"

diff --git a/broker/setup_broker/swift/create_vm.sh b/broker/setup_broker/swift/create_vm.sh
@@ -2,15 +2,17 @@
 # Creates or deletes the GCP VM instances needed by the broker.
 # This script will not delete VMs that are in production
 
-
-broker_bucket=$1 # name of GCS bucket where broker files are staged
+# name of GCS bucket where broker files are staged
+gcs_broker_bucket=$1
+# "False" uses production resources
+# any other string will be appended to the names of all resources
 testid="${2:-test}"
-#   "False" uses production resources
-#   any other string will be appended to the names of all resources
-teardown="${3:-False}" # "True" tearsdown/deletes resources, else setup
-survey="${4:-swift}"
+# "True" tearsdown/deletes resources, else setup
+teardown="${3:-False}"
 # name of the survey this broker instance will ingest
+survey="${4:-swift}"
 zone="${5:-us-central1-a}"
+project_id="${6:-PROJECT_ID}"
 
 #--- GCP resources used in this script
 consumerVM="${survey}-consumer"
@@ -25,19 +27,22 @@ if [ "$teardown" = "True" ]; then
     if [ "$testid" != "False" ]; then
         gcloud compute instances delete "$consumerVM" --zone="$zone"
     fi
-
-#--- Create resources
+#--- Setup resources if they do not exist
 else
-#--- Consumer VM
-    # create VM
-    machinetype=e2-custom-1-5632
-    # metadata
-    googlelogging="google-logging-enabled=true"
-    startupscript="startup-script-url=gs://${broker_bucket}/${survey}/vm_install.sh"
-    shutdownscript="shutdown-script-url=gs://${broker_bucket}/${survey}/vm_shutdown.sh"
-    gcloud compute instances create "$consumerVM" \
-        --zone="$zone" \
-        --machine-type="$machinetype" \
-        --scopes=cloud-platform \
-        --metadata="${googlelogging},${startupscript},${shutdownscript}"
+    if ! gcloud compute instances describe "${consumerVM}" --zone="${zone}" --project="${project_id}" >/dev/null 2>&1; then
+        machinetype=e2-custom-1-5632
+        # metadata
+        googlelogging="google-logging-enabled=true"
+        startupscript="startup-script-url=gs://${gcs_broker_bucket}/${survey}/vm_install.sh"
+        shutdownscript="shutdown-script-url=gs://${gcs_broker_bucket}/${survey}/vm_shutdown.sh"
+        #--- Create VM
+        gcloud compute instances create "$consumerVM" \
+            --zone="$zone" \
+            --machine-type="$machinetype" \
+            --scopes=cloud-platform \
+            --metadata="${googlelogging},${startupscript},${shutdownscript}"
+    else
+        echo
+        echo "VM instance ${consumerVM} already exists in zone ${zone}."
+    fi
 fi