-
Notifications
You must be signed in to change notification settings - Fork 0
Update SMT UDFs for LSST modules #302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 10 commits
65f9362
3e0fc7b
4ca5dc5
44f5c76
499b771
98ebe09
029599d
7bc02c6
1c6efd6
1114322
e2ff196
2650b6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,10 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: UTF-8 -*- | ||
|
|
||
| """This module stores LSST alert data as an Avro file in Cloud Storage.""" | ||
| """This module stores LSST alert data as an Avro file in Cloud Storage and publishes it to various Pub/Sub topics.""" | ||
|
|
||
| import os | ||
| from typing import Any | ||
| import flask | ||
| import pittgoogle | ||
| from google.cloud import logging, storage | ||
|
|
@@ -28,6 +29,51 @@ | |
| # Variables for outgoing data | ||
| HTTP_204 = 204 # HTTP code: Success | ||
| HTTP_400 = 400 # HTTP code: Bad Request | ||
| LITE_FIELDS_CONFIG = { | ||
| "diaSource": { | ||
| "fields": { | ||
| "diaSourceId", | ||
|
Comment on lines
+32
to
+35
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine for now, but it would be great to define this in a yaml file in pittgoogle-client instead. |
||
| "midpointMjdTai", | ||
| "ra", | ||
| "raErr", | ||
| "dec", | ||
| "decErr", | ||
| "psfFlux", | ||
| "psfFluxErr", | ||
| "band", | ||
| }, | ||
| "is_list": False, | ||
| }, | ||
| "prvDiaSources": { | ||
| "fields": { | ||
| "diaSourceId", | ||
| "midpointMjdTai", | ||
| "ra", | ||
| "raErr", | ||
| "dec", | ||
| "decErr", | ||
| "psfFlux", | ||
| "psfFluxErr", | ||
| "band", | ||
| }, | ||
| "is_list": True, | ||
| }, | ||
| "diaObject": { | ||
| "fields": { | ||
| "diaObjectId", | ||
| "lastDiaSourceMjdTai", | ||
| "firstDiaSourceMjdTai", | ||
| "nDiaSources", | ||
| "u_psfFluxErrMean", | ||
| "g_psfFluxErrMean", | ||
| "r_psfFluxErrMean", | ||
| "i_psfFluxErrMean", | ||
| "z_psfFluxErrMean", | ||
| "y_psfFluxErrMean", | ||
| }, | ||
| "is_list": False, | ||
| }, | ||
| } | ||
|
|
||
| # GCP resources used in this module | ||
| TOPIC_ALERTS = pittgoogle.Topic.from_cloud( | ||
|
|
@@ -88,12 +134,8 @@ def run(): | |
| TOPIC_ALERTS.publish(alert) | ||
| # publish the same alert as JSON. Data will be coerced to valid JSON by pittgoogle. | ||
| TOPIC_ALERTS_JSON.publish(alert, serializer="json") | ||
| # add top-level key for lite stream | ||
| alert_lite = pittgoogle.Alert.from_dict( | ||
| payload={"alert_lite": alert.dict}, | ||
| attributes={**alert.attributes}, | ||
| ) | ||
| TOPIC_LITE.publish(alert_lite, serializer="json") | ||
| # publish a lite version of the alert as JSON | ||
| TOPIC_LITE.publish(_create_lite_alert(alert), serializer="json") | ||
|
|
||
| return "", HTTP_204 | ||
|
|
||
|
|
@@ -109,3 +151,33 @@ def _create_file_metadata(alert: pittgoogle.Alert, event_id: str) -> dict: | |
| metadata["kafka.timestamp"] = alert.attributes["kafka.timestamp"] | ||
|
|
||
| return metadata | ||
|
|
||
|
|
||
| def _create_lite_alert(alert: pittgoogle.Alert) -> pittgoogle.Alert: | ||
| """Creates a lite Alert object by filtering nested fields from the original Alert dictionary.""" | ||
|
|
||
| alert_lite_dict = alert.drop_cutouts() | ||
| for key, config in LITE_FIELDS_CONFIG.items(): | ||
| if key in alert_lite_dict: | ||
| # replace the original nested object with its filtered version | ||
| alert_lite_dict[key] = _process_field(alert_lite_dict.get(key), config) | ||
|
|
||
| return pittgoogle.Alert.from_dict( | ||
| payload={"alert_lite": alert_lite_dict}, | ||
| attributes={**alert.attributes}, | ||
| ) | ||
|
|
||
|
|
||
| def _process_field(original_value: Any, config: dict) -> Any: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this really accept and return |
||
| """Filters a dictionary or a list of dictionaries based on the provided configuration.""" | ||
| whitelisted_fields = config["fields"] | ||
|
|
||
| if config["is_list"]: | ||
| return [_filter_dict(item, whitelisted_fields) for item in original_value or []] | ||
| return _filter_dict(original_value, whitelisted_fields) | ||
|
|
||
|
|
||
| def _filter_dict(alert_dict: dict, whitelisted_fields: set) -> dict: | ||
| """Creates a new dictionary containing only the keys specified in whitelisted_fields.""" | ||
|
|
||
| return {k: v for k, v in (alert_dict or {}).items() if k in whitelisted_fields} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,21 +1,39 @@ | ||
| # https://cloud.google.com/pubsub/docs/smts/create-topic-smt#create | ||
| - javascriptUdf: | ||
| code: > | ||
| function addTopLevelFields(message, metadata) { | ||
| const data = JSON.parse(message.data); | ||
| const attrs = message.attributes || {}; | ||
| function addTopLevelFields(message, metadata) { | ||
| const attrs = message.attributes || {}; | ||
| const dataStr = message.data.toString(); | ||
|
|
||
| const payload = { | ||
| ...data, // spread the original JSON fields into the root | ||
| healpix9: attrs.healpix9 ? Number(attrs.healpix9) : null, | ||
| healpix19: attrs.healpix19 ? Number(attrs.healpix19) : null, | ||
| healpix29: attrs.healpix29 ? Number(attrs.healpix29) : null, | ||
| kafkaPublishTimestamp: attrs["kafka.timestamp"] ? Number(attrs["kafka.timestamp"]) * 1000 : null | ||
| }; | ||
| // Create an empty object to hold the new fields we want to inject into the JSON payload | ||
| const newFields = {}; | ||
|
|
||
| return { | ||
| data: JSON.stringify(payload), | ||
| attributes: attrs // preserve attributes | ||
| }; | ||
| // Extract the following attributes and add them to newFields | ||
| // We avoid casting fields as JavaScript numbers to prevent precision loss | ||
| if (attrs.healpix9) newFields.healpix9 = attrs.healpix9.toString(); | ||
| if (attrs.healpix19) newFields.healpix19 = attrs.healpix19.toString(); | ||
| if (attrs.healpix29) newFields.healpix29 = attrs.healpix29.toString(); | ||
| if (attrs["kafka.timestamp"]) { | ||
| newFields.kafkaPublishTimestamp = attrs["kafka.timestamp"] * 1000; | ||
| } | ||
|
|
||
| // Define the data as a set of key-value pairs to be added to the JSON payload | ||
| const newPairs = Object.entries(newFields) | ||
| .map(([k, v]) => `"${k}":${v}`); | ||
|
|
||
| if (newPairs.length === 0) { | ||
| // No new fields; return the original message | ||
| return message; | ||
| } | ||
|
|
||
| // Inject the new fields into the JSON payload | ||
| const newData = dataStr.endsWith("}") | ||
| ? dataStr.slice(0, -1) + "," + newPairs.join(",") + "}" | ||
| : dataStr; | ||
|
|
||
| return { | ||
| data: newData, | ||
| attributes: attrs | ||
| }; | ||
| } | ||
| functionName: addTopLevelFields |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,49 +1,29 @@ | ||
| # https://cloud.google.com/pubsub/docs/smts/create-topic-smt#create | ||
| - javascriptUdf: | ||
| code: > | ||
| function reformatValueAddedAlert(message, metadata) { | ||
| const data = JSON.parse(message.data); | ||
| const alertLite = data["alert_lite"] || {}; | ||
| const attrs = message.attributes || {}; | ||
|
|
||
| // Find the value_added key | ||
| const valueAddedKey = Object.keys(data).find(k => k !== "alert_lite"); | ||
| const valueAddedField = valueAddedKey ? (data[valueAddedKey] || {}) : {}; | ||
|
|
||
| // Whitelist fields | ||
| const diaObjectFields = ["diaObjectId"]; | ||
| const ssSourceFields = ["ssObjectId"]; | ||
| const diaSourceId = ["diaSourceId"]; | ||
|
|
||
| // Extract whitelisted fields | ||
| function extractFields(obj, fields) { | ||
| if (!obj) return obj; | ||
| const extracted = {}; | ||
| for (const f of fields) { | ||
| if (obj.hasOwnProperty(f)) { | ||
| extracted[f] = obj[f]; | ||
| } | ||
| } | ||
| return extracted; | ||
| } | ||
|
|
||
| const flattened = {}; | ||
|
|
||
| // Extract diaSourceId, diaObjectId and ssObjectId | ||
| Object.assign(flattened, extractFields(alertLite["diaObject"], diaObjectFields)); | ||
| Object.assign(flattened, extractFields(alertLite["ssSource"], ssSourceFields)); | ||
| Object.assign(flattened, extractFields(alertLite, diaSourceId)); | ||
|
|
||
| // Spread all fields from value_added into top-level | ||
| Object.assign(flattened, valueAddedField); | ||
|
|
||
| // Add top-level field | ||
| flattened.kafkaPublishTimestamp = attrs["kafka.timestamp"] ? Number(attrs["kafka.timestamp"]) * 1000 : null; | ||
|
|
||
| // Return transformed message and preserve attributes | ||
| return { | ||
| data: JSON.stringify(flattened), | ||
| attributes: message.attributes | ||
| }; | ||
| } | ||
| functionName: reformatValueAddedAlert | ||
| function flattenValueAddedAlert(message, metadata) { | ||
| const attrs = message.attributes || {}; | ||
| const data = JSON.parse(message.data); | ||
|
|
||
| // Find and extract the value_added dictionary | ||
| const valueAddedKey = Object.keys(data).find(k => k !== "alert_lite"); | ||
| const valueAddedData = valueAddedKey ? (data[valueAddedKey] || {}) : {}; | ||
|
|
||
| // Build the final payload, starting with value_added fields | ||
| const payload = { | ||
| ...valueAddedData, | ||
|
|
||
| // Use the attributes to add the remaining key-value pairs | ||
| diaSourceId: attrs.diaSource_diaSourceId ? attrs.diaSource_diaSourceId.toString() : null, | ||
| diaObjectId: attrs.diaObject_diaObjectId ? attrs.diaObject_diaObjectId.toString() : null, | ||
| ssObjectId: attrs.ssSource_ssObjectId ? attrs.ssSource_ssObjectId.toString() : null, | ||
| kafkaPublishTimestamp: attrs["kafka.timestamp"] ? attrs["kafka.timestamp"] * 1000 : null | ||
| }; | ||
|
|
||
| // Return the flattened message | ||
| return { | ||
| data: JSON.stringify(payload), | ||
| attributes: message.attributes | ||
| }; | ||
| } | ||
| functionName: flattenValueAddedAlert |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At some point, double check that the combination of these three flags does what you expect.