From 1d9c3b9b997cfa6eec163a5ba2c75027122fe2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Taneli=20Lepp=C3=A4?= Date: Thu, 20 Oct 2022 09:04:26 +0200 Subject: [PATCH] pubsub2inbox: version 1.3.0 (#880) * pubsub2inbox: - Added Cloud Run support with Dockerfile and Terraform - Improved SCC finding output processor (expand fields better). - Fixed requirements.txt. - Added new get_gcp_resource filter for fetching information about arbitrary GCP resources. - Added example of sending Cloud IDS findings to SCC. - Bumped version to 1.3.0. * fix --- tools/pubsub2inbox/Dockerfile | 49 +++++ tools/pubsub2inbox/PROCESSORS.md | 3 + tools/pubsub2inbox/README.md | 32 +++- .../pubsub2inbox/examples/scc-cloud-ids.yaml | 63 +++++++ tools/pubsub2inbox/filters/__init__.py | 3 +- tools/pubsub2inbox/filters/gcp.py | 17 ++ tools/pubsub2inbox/helpers/base.py | 27 ++- tools/pubsub2inbox/main.py | 171 +++++++++++++++--- tools/pubsub2inbox/main.tf | 127 ++++++++++++- tools/pubsub2inbox/output/scc.py | 34 +++- tools/pubsub2inbox/requirements.txt | 4 +- .../pubsub2inbox/test/fixtures/cloud-ids.json | 13 ++ tools/pubsub2inbox/variables.tf | 27 ++- 13 files changed, 517 insertions(+), 53 deletions(-) create mode 100644 tools/pubsub2inbox/Dockerfile create mode 100644 tools/pubsub2inbox/examples/scc-cloud-ids.yaml create mode 100644 tools/pubsub2inbox/test/fixtures/cloud-ids.json diff --git a/tools/pubsub2inbox/Dockerfile b/tools/pubsub2inbox/Dockerfile new file mode 100644 index 0000000000..7d32acda50 --- /dev/null +++ b/tools/pubsub2inbox/Dockerfile @@ -0,0 +1,49 @@ +# Copyright 2022 Google, LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use the official lightweight Python image. +FROM python:3.10-slim + +# Allow statements and log messages to immediately appear in the Knative logs +ENV PYTHONUNBUFFERED=True +ENV CONFIG= +ENV SERVICE_ACCOUNT= +ENV LOG_LEVEL=10 +ENV WEBSERVER=1 +ENV PORT=8080 + +ENV APP_HOME /app +WORKDIR $APP_HOME +COPY main.py requirements.txt ./ +RUN mkdir {filters,output,processors,helpers} +COPY filters/*.py filters/ +COPY output/*.py output/ +COPY processors/*.py processors/ +COPY helpers/*.py helpers/ + +# Install some support packages +RUN apt-get update && apt-get install -y libmagic1 + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Run as a web service on using the gunicorn webserver, with one worker process and 8 threads. +# +# For environments with multiple CPU cores, increase the number of workers +# to be equal to the cores available. +# +# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle +# instance scaling. +CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app + diff --git a/tools/pubsub2inbox/PROCESSORS.md b/tools/pubsub2inbox/PROCESSORS.md index b20ea00876..758cba2797 100644 --- a/tools/pubsub2inbox/PROCESSORS.md +++ b/tools/pubsub2inbox/PROCESSORS.md @@ -115,6 +115,9 @@ Output parameters: Permissions: - Browser (`roles/browser`) to fetch project details. + - `roles/securitycenter.findingsEditor` and `roles/securitycenter.findingSecurityMarksWriter` for writing + findings to a custom SCC source. + - Network Viewer (`roles/compute.networkViewer`) for Cloud IDS network ID resolving. Output parameters: diff --git a/tools/pubsub2inbox/README.md b/tools/pubsub2inbox/README.md index 03c484572c..7c0c024d84 100644 --- a/tools/pubsub2inbox/README.md +++ b/tools/pubsub2inbox/README.md @@ -6,9 +6,9 @@ and output processors. Input processors can enrich the incoming messages with de (for example, fetching the budget from Cloud Billing Budgets API). Multiple output processors can be chained together. -Pubsub2Inbox is written in Python 3.8+ and can be deployed as a Cloud Function easily. -To guard credentials and other sensitive information, the tool can fetch its -YAML configuration from Google Cloud Secret Manager. +Pubsub2Inbox is written in Python 3.8+ and can be deployed as a Cloud Function or as a +Cloud Run function easily. To guard credentials and other sensitive information, the tool can +fetch its YAML configuration from Google Cloud Secret Manager. The tool also supports templating of emails, messages and other parameters through [Jinja2 templating](https://jinja.palletsprojects.com/en/2.10.x/templates/). @@ -24,6 +24,7 @@ Out of the box, you'll have the following functionality: - [How to set up programmatic notifications from billing budgets](https://cloud.google.com/billing/docs/how-to/budgets-programmatic-notifications) - [Cloud Security Command Center](https://cloud.google.com/security-command-center) - [Email notifications of findings](examples/scc-config.yaml) ([how to set up finding notifications from SCC](https://cloud.google.com/security-command-center/docs/how-to-notifications)) + - [Create findings from Cloud IDS](examples/scc-cloud-ids.yaml) - [Create custom findings](examples/scc-finding-config.yaml) - [Cloud Storage notifications](examples/storage-config.yaml) - [How to set up Cloud Storage notifications](https://cloud.google.com/storage/docs/reporting-changes) @@ -145,9 +146,12 @@ parameters in when using as a module: - `bucket_location` (string, optional): location of the bucket for Cloud Function archive (defaults to `EU`) - `helper_bucket_name` (string, optional): specify an additional Cloud Storage bucket where the service account is granted `storage.objectAdmin` on - `function_timeout` (number, optional): a timeout for the Cloud Function (defaults to `240` seconds) + - `retry_minimum_backoff` (string, optional): minimum backoff time for exponential backoff retries in Cloud Run. Defaults to 10s. + - `retry_maximum_backoff` (string, optional): maximum backoff time for exponential backoff retries in Cloud Run. Defaults to 600s. + - `cloud_run` (boolean, optional): deploy via Cloud Run instead of Cloud Function. Defaults to `false`. If set to `true`, also specify `cloud_run_container`. + - `cloud_run_container` (string, optional): container image to deploy on Cloud Run. See previous parameter. - -### Deploying manually +## Deploying manually First, we have the configuration in `config.yaml` and we're going to store the configuration for the function as a Cloud Secret Manager secret. @@ -207,6 +211,24 @@ gcloud functions deploy $FUNCTION_NAME \ --project $PROJECT_ID ``` +## Deploying via Cloud Run + +### Building the container + +A [`Dockerfile`](Dockerfile) has been provided for building the container. You can build the +image locally and push it to for example [Artifact Registry](https://cloud.google.com/artifact-registry). + +```sh +docker build -t europe-west4-docker.pkg.dev/$PROJECT_ID/pubsub2inbox/pubsub2inbox . +docker push europe-west4-docker.pkg.dev/$PROJECT_ID/pubsub2inbox/pubsub2inbox +``` + +### Deploying via Terraform + +The provided Terraform scripts can deploy the code as a Cloud Function or Cloud Run. To enable +Cloud Run deployment, build and push the image and set `cloud_run` and `cloud_run_container` +parameters (see the parameter descriptions above). + ### Running tests Run the command: diff --git a/tools/pubsub2inbox/examples/scc-cloud-ids.yaml b/tools/pubsub2inbox/examples/scc-cloud-ids.yaml new file mode 100644 index 0000000000..ba9288f322 --- /dev/null +++ b/tools/pubsub2inbox/examples/scc-cloud-ids.yaml @@ -0,0 +1,63 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Creates findings from Cloud IDS in Cloud Security Command Center. You'll have to use the API +# to create a source first (its identifier looks like organizations/123/sources/456), +# see here for an example: https://cloud.google.com/security-command-center/docs/how-to-api-create-manage-security-sources#creating_a_source +# +# You'll also need the scc_writer permission (if deploying via Terraform). This includes the +# compute.networkViewer role, which is required to turn the network names into IDs for SCC. +# +# Create a Pub/Sub topic and use a log sink with a filter like: +# logName:"ids.googleapis.com%2Fthreat" +# +retryPeriod: 3 day ago + +processors: + - genericjson + +outputs: + - type: scc + source: organizations/382949788687/sources/5355536199717451283 + finding_id: "{{ data.insertId|hash_string('md5') }}" + finding: # + resourceName: | + //compute.googleapis.com/{{ (data.jsonPayload.network|get_gcp_resource("compute", "compute")).selfLinkWithId|replace("https://www.googleapis.com/compute/v1/", "") }} + state: "ACTIVE" + description: | + {{ data.jsonPayload.name }} + + {{ data.jsonPayload.details }} + category: "{{ data.jsonPayload.category|replace('-', '_')|upper }}" + externalUri: "https://console.cloud.google.com/logs/query;cursorTimestamp={{ data.timestamp }};query=timestamp%3D%22{{ data.timestamp }}%22%0AinsertId%3D%22{{ data.insertId }}%22" + indicator: + ipAddresses: + - "{{ data.jsonPayload.source_ip_address }}" + - "{{ data.jsonPayload.destination_ip_address }}" + sourceProperties: + application: "{{ data.jsonPayload.application }}" + direction: "{{ data.jsonPayload.direction }}" + ipProtocol: "{{ data.jsonPayload.ip_protocol }}" + destinationIpAddress: "{{ data.jsonPayload.destination_ip_address }}" + destinationPort: "{{ data.jsonPayload.destination_port }}" + sourceIpAddress: "{{ data.jsonPayload.source_ip_address }}" + sourcePort: "{{ data.jsonPayload.source_port }}" + vulnerability: | + {% if data.jsonPayload.cves is iterable %}{% set cve = {"id":data.jsonPayload.cves[0]} %}{{ {"cve":cve}|json_encode }}{% endif %} + eventTime: "{{ data.jsonPayload.alert_time }}" + createTime: "{{ ''|utc_strftime('%Y-%m-%dT%H:%M:%SZ') }}" + severity: "{{ data.jsonPayload.alert_severity }}" + findingClass: "{{ data.jsonPayload.type|upper }}" + \ No newline at end of file diff --git a/tools/pubsub2inbox/filters/__init__.py b/tools/pubsub2inbox/filters/__init__.py index 017be0917d..82d17abec4 100644 --- a/tools/pubsub2inbox/filters/__init__.py +++ b/tools/pubsub2inbox/filters/__init__.py @@ -15,7 +15,7 @@ from .lists import split, index, merge_dict from .strings import add_links, urlencode, generate_signed_url, json_encode, json_decode, b64decode, csv_encode, re_escape, html_table_to_xlsx, make_list, read_gcs_object, filemagic, hash_string from .date import strftime, utc_strftime, recurring_date -from .gcp import format_cost, get_cost +from .gcp import format_cost, get_cost, get_gcp_resource from .tests import test_contains @@ -42,6 +42,7 @@ def get_jinja_filters(): 'html_table_to_xlsx': html_table_to_xlsx, 'format_cost': format_cost, 'get_cost': get_cost, + 'get_gcp_resource': get_gcp_resource, 'recurring_date': recurring_date, 'make_list': make_list, 'read_gcs_object': read_gcs_object, diff --git a/tools/pubsub2inbox/filters/gcp.py b/tools/pubsub2inbox/filters/gcp.py index 6aed9c3a3e..e4480dad03 100644 --- a/tools/pubsub2inbox/filters/gcp.py +++ b/tools/pubsub2inbox/filters/gcp.py @@ -11,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from helpers.base import get_branded_http +from googleapiclient.http import HttpRequest +import json + + def format_cost(cost, decimals=2): _format = '%%.%df %%s' % decimals return _format % ( @@ -20,3 +26,14 @@ def format_cost(cost, decimals=2): def get_cost(cost): return (float(cost['units']) + (float(cost['nanos']) / 1000000000.0)) + + +def get_gcp_resource(resource, api_domain, api_endpoint, api_version='v1'): + uri = "https://%s.googleapis.com/%s/%s/%s" % (api_domain, api_endpoint, + api_version, resource) + req = HttpRequest(get_branded_http(), lambda resp, content: content, uri) + response = req.execute() + if response: + parsed_response = json.loads(response.decode('utf-8')) + return parsed_response + return None diff --git a/tools/pubsub2inbox/helpers/base.py b/tools/pubsub2inbox/helpers/base.py index 00e556d497..c43481f575 100644 --- a/tools/pubsub2inbox/helpers/base.py +++ b/tools/pubsub2inbox/helpers/base.py @@ -21,7 +21,7 @@ import google.auth from googleapiclient import http -PUBSUB2INBOX_VERSION = '1.2.0' +PUBSUB2INBOX_VERSION = '1.3.0' class NoCredentialsException(Exception): @@ -133,4 +133,29 @@ def _jinja_expand_dict(self, _var, _tpl='config'): _var[k] = self._jinja_expand_string(v) else: _var[k] = self._jinja_expand_dict(_var[k]) + return _var + + def _jinja_expand_dict_all(self, _var, _tpl='config'): + if not isinstance(_var, dict): + return _var + for k, v in _var.items(): + if not isinstance(v, dict): + if isinstance(v, str): + _var[k] = self._jinja_expand_string(v) + if isinstance(v, list): + for idx, lv in enumerate(_var[k]): + if isinstance(lv, dict): + _var[k][idx] = self._jinja_expand_dict_all(lv) + if isinstance(lv, str): + _var[k][idx] = self._jinja_expand_string(lv) + else: + _var[k] = self._jinja_expand_dict_all(_var[k]) + return _var + + def _jinja_expand_list(self, _var, _tpl='config'): + if not isinstance(_var, list): + return _var + for idx, v in enumerate(_var): + if isinstance(v, str): + _var[idx] = self._jinja_expand_string(v) return _var \ No newline at end of file diff --git a/tools/pubsub2inbox/main.py b/tools/pubsub2inbox/main.py index e181d28efe..a1390d6c18 100644 --- a/tools/pubsub2inbox/main.py +++ b/tools/pubsub2inbox/main.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ def load_configuration(file_name): - if os.getenv('CONFIG'): + if os.getenv('CONFIG') and os.getenv('CONFIG') != '': logger = logging.getLogger('pubsub2inbox') secret_manager_url = os.getenv('CONFIG') if secret_manager_url.startswith('projects/'): @@ -96,6 +96,14 @@ class NoDataFieldException(Exception): pass +class NoMessageReceivedException(Exception): + pass + + +class InvalidMessageFormatException(Exception): + pass + + def process_message(config, data, event, context): logger = logging.getLogger('pubsub2inbox') @@ -263,11 +271,16 @@ def process_message(config, data, event, context): try: output_instance.output() except Exception as exc: - logger.error('Output processor %s failed, trying next...' % - (output_type), - extra={'exception': traceback.format_exc()}) - if 'allOutputsMustSucceed' in config and config[ - 'allOutputsMustSucceed']: + if len(config['outputs']) > 1: + logger.error('Output processor %s failed, trying next...' % + (output_type), + extra={'exception': traceback.format_exc()}) + if 'allOutputsMustSucceed' in config and config[ + 'allOutputsMustSucceed']: + raise exc + else: + logger.error('Output processor %s failed.' % (output_type), + extra={'exception': traceback.format_exc()}) raise exc else: @@ -298,7 +311,7 @@ def decode_and_process(logger, config, event, context): extra={'event_id': context.event_id}) -def process_pubsub(event, context): +def process_pubsub(event, context, message_too_old_exception=False): """Function that is triggered by Pub/Sub incoming message. Args: event (dict): The dictionary with data specific to this type of @@ -333,13 +346,79 @@ def process_pubsub(event, context): 'error': str(exc), }) raise exc - except MessageTooOldException: - pass + except MessageTooOldException as mtoe: + if not message_too_old_exception: + pass + else: + raise (mtoe) + + +class CloudRunServer: + + def on_get(self, req, res): + try: + import falcon + + res.content_type = falcon.MEDIA_TEXT + res.status = falcon.HTTP_400 + res.text = 'Bad Request: expecting POST' + except ImportError: + logger.error( + 'Falcon is required for web server mode, run: pip install falcon' + ) + + def on_post(self, req, res): + try: + import falcon + res.content_type = falcon.MEDIA_TEXT + + try: + envelope = req.media + except falcon.MediaNotFoundError: + raise NoMessageReceivedException('No Pub/Sub message received') + except falcon.MediaMalformedError: + raise InvalidMessageFormatException('Invalid Pub/Sub JSON') + + if not isinstance(envelope, dict) or 'message' not in envelope: + raise InvalidMessageFormatException( + 'Invalid Pub/Sub message format') + + event = { + 'data': + envelope['message']['data'], + 'attributes': + envelope['message']['attributes'] + if 'attributes' in envelope['message'] else {} + } + + context = Context(eventId=envelope['message']['messageId'], + timestamp=envelope['message']['publishTime']) + process_pubsub(event, context, message_too_old_exception=True) + res.status = falcon.HTTP_200 + res.text = 'Message processed.' + except (NoMessageReceivedException, + InvalidMessageFormatException) as me: + # Do not attempt to retry malformed messages + logger.error('%s' % (me), + extra={'exception': traceback.format_exc()}) + res.status = falcon.HTTP_204 + res.text = 'Bad Request: %s' % (str(me)) + except MessageTooOldException as mtoe: + res.status = falcon.HTTP_202 + res.text = 'Message ignored: %s' % (mtoe) + except ImportError: + logger.error( + 'Falcon is required for web server mode, run: pip install falcon' + ) + except Exception as e: + traceback.print_exc() + res.status = falcon.HTTP_500 + res.text = 'Internal Server Error: %s' % (e) def setup_logging(): logger = logging.getLogger('pubsub2inbox') - if os.getenv('LOG_LEVEL'): + if os.getenv('LOG_LEVEL') and os.getenv('LOG_LEVEL') != '': logger.setLevel(int(os.getenv('LOG_LEVEL'))) else: logger.setLevel(logging.INFO) @@ -350,6 +429,31 @@ def setup_logging(): return logger +def run_webserver(run_locally=False): + global logger + if not logger: + logger = setup_logging() + try: + import falcon + app = falcon.App() + server = CloudRunServer() + app.add_route('/', server) + if run_locally: + from waitress import serve + port = 8080 if not os.getenv('PORT') or os.getenv( + 'PORT') == '' else int(os.getenv('PORT')) + serve(app, listen='*:%d' % (port)) + return app + except ImportError: + logger.error( + 'Falcon and waitress is required for web server mode, run: pip install falcon waitress' + ) + + +app = None +if os.getenv('WEBSERVER') == '1': + app = run_webserver() + if __name__ == '__main__': arg_parser = argparse.ArgumentParser( description='Pub/Sub to Inbox, turn Pub/Sub messages into emails') @@ -358,26 +462,37 @@ def setup_logging(): '--ignore-period', action='store_true', help='Ignore the message timestamp (for skipping retry period)') + arg_parser.add_argument('--webserver', + action='store_true', + help='Run the function as a web server') arg_parser.add_argument('message', type=str, + nargs='?', help='JSON file containing the message(s)') args = arg_parser.parse_args() if args.config: config_file_name = args.config - with open(args.message) as f: - contents = f.read() - messages = json.loads(contents) - for message in messages: - event = { - 'data': - message['message']['data'], - 'attributes': - message['message']['attributes'] - if 'attributes' in message['message'] else {} - } - context = Context(eventId=message['message']['messageId'], - timestamp=message['message']['publishTime']) - if args.ignore_period: - context.timestamp = datetime.utcnow().strftime( - '%Y-%m-%dT%H:%M:%S.%fZ') - process_pubsub(event, context) + if args.webserver or os.getenv('WEBSERVER') == '1': + run_webserver(True) + else: + if not args.message: + print( + 'Specify a file containing the message to process on the command line.' + ) + with open(args.message) as f: + contents = f.read() + messages = json.loads(contents) + for message in messages: + event = { + 'data': + message['message']['data'], + 'attributes': + message['message']['attributes'] + if 'attributes' in message['message'] else {} + } + context = Context(eventId=message['message']['messageId'], + timestamp=message['message']['publishTime']) + if args.ignore_period: + context.timestamp = datetime.utcnow().strftime( + '%Y-%m-%dT%H:%M:%S.%fZ') + process_pubsub(event, context) diff --git a/tools/pubsub2inbox/main.tf b/tools/pubsub2inbox/main.tf index e6f2a202a7..6ff1d36629 100644 --- a/tools/pubsub2inbox/main.tf +++ b/tools/pubsub2inbox/main.tf @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,9 @@ provider "google" { provider "archive" { } +data "google_project" "project" { +} + # Secret Manager secret for the function resource "google_secret_manager_secret" "config-secret" { secret_id = var.secret_id != "" ? var.secret_id : var.function_name @@ -59,7 +62,7 @@ locals { # If you specify function_roles, the Terraform code will grant the service account some # privileges required for the particular functionalities. - default_apis = ["cloudfunctions.googleapis.com", "cloudbuild.googleapis.com"] + default_apis = [var.cloud_run ? "run.googleapis.com" : "cloudfunctions.googleapis.com", "cloudbuild.googleapis.com"] iam_permissions = { scc = { @@ -67,6 +70,11 @@ locals { project = [] apis = ["cloudresourcemanager.googleapis.com"] } + scc_writer = { + org = ["roles/browser", "roles/securitycenter.findingsEditor", "roles/securitycenter.findingSecurityMarksWriter", "roles/compute.networkViewer"] + project = [] + apis = ["cloudresourcemanager.googleapis.com"] + } budgets = { org = ["roles/browser", "roles/billing.viewer"] project = [] @@ -115,8 +123,8 @@ resource "google_project_service" "service-account-apis" { # Activate the Secrets Manager API resource "google_project_service" "secret-manager-api" { - project = var.project_id - service = "secretmanager.googleapis.com" + project = var.project_id + service = "secretmanager.googleapis.com" disable_on_destroy = false } @@ -158,13 +166,19 @@ resource "google_secret_manager_secret_iam_member" "config-secret-iam" { member = format("serviceAccount:%s", google_service_account.service-account.email) } +## Cloud Function + resource "random_id" "bucket-suffix" { + count = !var.cloud_run ? 1 : 0 + byte_length = 8 } # Bucket for storing the function archive resource "google_storage_bucket" "function-bucket" { - name = format("%s-%s", var.bucket_name, random_id.bucket-suffix.hex) + count = !var.cloud_run ? 1 : 0 + + name = format("%s-%s", var.bucket_name, random_id.bucket-suffix[0].hex) location = var.bucket_location uniform_bucket_level_access = true } @@ -176,6 +190,8 @@ locals { } data "archive_file" "function-zip" { + count = !var.cloud_run ? 1 : 0 + type = "zip" output_path = "${path.module}/index.zip" dynamic "source" { @@ -188,11 +204,13 @@ data "archive_file" "function-zip" { } resource "google_storage_bucket_object" "function-archive" { + count = !var.cloud_run ? 1 : 0 + name = format("index-%s.zip", md5(join(",", local.function_file_hashes))) - bucket = google_storage_bucket.function-bucket.name + bucket = google_storage_bucket.function-bucket[0].name source = format("%s/index.zip", path.root) depends_on = [ - data.archive_file.function-zip + data.archive_file.function-zip.0 ] } @@ -202,6 +220,8 @@ resource "google_storage_bucket_object" "function-archive" { # Workaround is to use "terraform taint google_cloudfunctions_function.function" # before plan/apply. resource "google_cloudfunctions_function" "function" { + count = !var.cloud_run ? 1 : 0 + name = var.function_name description = "Pubsub2Inbox" runtime = "python38" @@ -209,8 +229,8 @@ resource "google_cloudfunctions_function" "function" { service_account_email = google_service_account.service-account.email available_memory_mb = 256 - source_archive_bucket = google_storage_bucket.function-bucket.name - source_archive_object = google_storage_bucket_object.function-archive.name + source_archive_bucket = google_storage_bucket.function-bucket[0].name + source_archive_object = google_storage_bucket_object.function-archive[0].name entry_point = "process_pubsub" timeout = var.function_timeout @@ -230,3 +250,92 @@ resource "google_cloudfunctions_function" "function" { SERVICE_ACCOUNT = google_service_account.service-account.email } } + +## Cloud Run + +# Service account for Pub/Sub invoker +resource "google_service_account" "invoker-service-account" { + count = var.cloud_run ? 1 : 0 + + account_id = var.service_account != "" ? format("%s-invoker", var.service_account) : format("%s-invoker", var.function_name) + display_name = format("%s Cloud Run invoker Service Account", title(var.function_name)) +} + +# Allow the invoker service account to run the Cloud Run function +resource "google_cloud_run_service_iam_member" "pubsub-invoker" { + count = var.cloud_run ? 1 : 0 + + location = google_cloud_run_service.function[0].location + service = google_cloud_run_service.function[0].name + role = "roles/run.invoker" + member = format("serviceAccount:%s", google_service_account.invoker-service-account[0].email) +} + +# Grant Pub/Sub P4SA to create auth tokens for the invoker service account +resource "google_service_account_iam_member" "pubsub-token-creator" { + count = var.cloud_run ? 1 : 0 + + service_account_id = google_service_account.invoker-service-account[0].name + role = "roles/iam.serviceAccountTokenCreator" + member = format("serviceAccount:service-%d@gcp-sa-pubsub.iam.gserviceaccount.com", data.google_project.project.number) +} + +# Create a Pub/Sub push subscription that calls the Cloud Run function +resource "google_pubsub_subscription" "pubsub-subscription" { + count = var.cloud_run ? 1 : 0 + + name = format("%s-subscription", var.function_name) + topic = var.pubsub_topic + push_config { + push_endpoint = google_cloud_run_service.function[0].status[0].url + oidc_token { + service_account_email = google_service_account.invoker-service-account[0].email + } + attributes = { + x-goog-version = "v1" + } + } + retry_policy { + minimum_backoff = var.retry_minimum_backoff + maximum_backoff = var.retry_maximum_backoff + } + depends_on = [ + google_service_account_iam_member.pubsub-token-creator[0], + google_cloud_run_service_iam_member.pubsub-invoker[0] + ] +} + +resource "google_cloud_run_service" "function" { + count = var.cloud_run ? 1 : 0 + + name = var.function_name + location = var.region + + template { + spec { + containers { + image = var.cloud_run_container + + env { + name = "CONFIG" + value = google_secret_manager_secret_version.config-secret-version.name + } + env { + name = "LOG_LEVEL" + value = 10 + } + env { + name = "SERVICE_ACCOUNT" + value = google_service_account.service-account.email + } + } + service_account_name = google_service_account.service-account.email + container_concurrency = 8 + timeout_seconds = var.function_timeout + } + } + traffic { + percent = 100 + latest_revision = true + } +} diff --git a/tools/pubsub2inbox/output/scc.py b/tools/pubsub2inbox/output/scc.py index 63dc9f687e..d890e1ddd6 100644 --- a/tools/pubsub2inbox/output/scc.py +++ b/tools/pubsub2inbox/output/scc.py @@ -48,13 +48,33 @@ def output(self): finding = self._jinja_expand_dict(self.output_config['finding'], 'finding') finding['name'] = '%s/findings/%s' % (source, finding_id) - if 'sourceProperties' in finding: - if not isinstance(finding['sourceProperties'], dict): - try: - props = json.loads(finding['sourceProperties']) - finding['sourceProperties'] = props - except Exception: - pass + + json_fields = [ + 'sourceProperties', 'indicator', 'vulnerability', 'connections', + 'processes', 'compliances', 'iamBindings', 'containers' + ] + for json_field in json_fields: + if json_field in finding: + if not isinstance(finding[json_field], dict) and not isinstance( + finding[json_field], list): + try: + props = json.loads(finding[json_field]) + finding[json_field] = props + except Exception: + pass + elif isinstance(finding[json_field], dict): + finding[json_field] = self._jinja_expand_dict_all( + finding[json_field], 'finding') + elif isinstance(finding[json_field], list): + finding[json_field] = self._jinja_expand_list( + finding[json_field], 'finding') + + self.logger.debug('Sending finding to Security Command Center.', + extra={ + 'source': source, + 'finding_id': finding_id, + 'finding': finding + }) scc_service = discovery.build('securitycenter', 'v1', diff --git a/tools/pubsub2inbox/requirements.txt b/tools/pubsub2inbox/requirements.txt index c68a6436fb..0bb6cbf021 100644 --- a/tools/pubsub2inbox/requirements.txt +++ b/tools/pubsub2inbox/requirements.txt @@ -15,7 +15,7 @@ google-cloud-storage~=1.42.0 grpcio~=1.41.0 functions-framework~=2.2.0 google-cloud-iam~=2.3.0 -google-cloud-bigquery~=2.27.0 +google-cloud-bigquery~=2.34.0 google-cloud-recommender~=2.3.0 openpyxl~=3.0.0 tablepyxl~=0.6.0 @@ -28,3 +28,5 @@ requests~=2.26.0 responses~=0.18.0 markupsafe~=2.0.0 twilio~=7.9.0 +falcon~=3.1.0 +waitress~=2.1.0 \ No newline at end of file diff --git a/tools/pubsub2inbox/test/fixtures/cloud-ids.json b/tools/pubsub2inbox/test/fixtures/cloud-ids.json new file mode 100644 index 0000000000..f58e661b95 --- /dev/null +++ b/tools/pubsub2inbox/test/fixtures/cloud-ids.json @@ -0,0 +1,13 @@ +[ + { + "ackId": "RFAGFixdRkhRNxkIaFEOT14jPzUgKEURAgZPAihdeTFYMkFYcmhRDRlyfWB9aAwSVwJMVH8OURsHaE5tdR-n_5XaS0NVb14QBAFCUXZcWxMLbV5ccC-1zZu1q-qXU0AvOfS339Zpe5G9pqhvZiM9XhJLLD5-IDBFQV5AEkwrBURJUytDCypYEU4EISE-MD5FUw", + "message": { + "attributes": { + "logging.googleapis.com/timestamp": "2022-08-31T03:27:29Z" + }, + "data": "eyJpbnNlcnRJZCI6IjVlNzgxMTM5Y2QzZjcxYWQxZDNmZWJhYThmOTkxMmQ4LTFAYTEiLCJqc29uUGF5bG9hZCI6eyJhbGVydF9zZXZlcml0eSI6IkNSSVRJQ0FMIiwiYWxlcnRfdGltZSI6IjIwMjItMDgtMzFUMDM6Mjc6MjlaIiwiYXBwbGljYXRpb24iOiJ3ZWItYnJvd3NpbmciLCJjYXRlZ29yeSI6ImNvZGUtZXhlY3V0aW9uIiwiY3ZlcyI6WyJDVkUtMjAxOC0yMDA2MiIsIkNWRS0yMDE5LTkwODIiXSwiZGVzdGluYXRpb25faXBfYWRkcmVzcyI6IjEwLjMuMi4yIiwiZGVzdGluYXRpb25fcG9ydCI6IjgxODEiLCJkZXRhaWxzIjoiVGhpbmtQSFAgNS4xIGlzIHByb25lIHRvIGEgcmVtb3RlIGNvZGUgZXhlY3V0aW9uIHZ1bG5lcmFiaWxpdHkgd2hpbGUgcGFyc2luZyBjZXJ0YWluIGNyYWZ0ZWQgSFRUUCByZXF1ZXN0cy4gVGhlIHZ1bG5lcmFiaWxpdHkgaXMgZHVlIHRvIHRoZSBsYWNrIG9mIHByb3BlciBjaGVja3Mgb24gcmVxdWVzdCBwYXJhbWV0ZXJzIGluIEhUVFAgcmVxdWVzdHMsIGxlYWRpbmcgdG8gcmVtb3RlIGNvZGUgZXhlY3V0aW9uLiBBbiBhdHRhY2tlciBjb3VsZCBleHBsb2l0IHRoZSB2dWxuZXJhYmlsaXR5IGJ5IHNlbmRpbmcgYSBjcmFmdGVkIEhUVFAgcmVxdWVzdC4gQSBzdWNjZXNzZnVsIGF0dGFjayBjb3VsZCBsZWFkIHRvIHJlbW90ZSBjb2RlIGV4ZWN1dGlvbiB3aXRoIHRoZSBwcml2aWxlZ2VzIG9mIHRoZSBzZXJ2ZXIuIiwiZGlyZWN0aW9uIjoiY2xpZW50LXRvLXNlcnZlciIsImlwX3Byb3RvY29sIjoidGNwIiwibmFtZSI6IlRoaW5rUEhQIFJlbW90ZSBDb2RlIEV4ZWN1dGlvbiBWdWxuZXJhYmlsaXR5IiwibmV0d29yayI6InByb2plY3RzL3NmYW5zLWh1Yi1wcm9qZWN0LWQ2NDcvZ2xvYmFsL25ldHdvcmtzL2dpdGxhYiIsInJlcGVhdF9jb3VudCI6IjEiLCJzZXNzaW9uX2lkIjoiNTgwNTMiLCJzb3VyY2VfaXBfYWRkcmVzcyI6IjM1LjE5MS4wLjYwIiwic291cmNlX3BvcnQiOiI2MTYzNCIsInRocmVhdF9pZCI6IjU0ODI1IiwidHlwZSI6InZ1bG5lcmFiaWxpdHkiLCJ1cmlfb3JfZmlsZW5hbWUiOiJpbmRleC5waHAifSwibG9nTmFtZSI6InByb2plY3RzL3NmYW5zLWh1Yi1wcm9qZWN0LWQ2NDcvbG9ncy9pZHMuZ29vZ2xlYXBpcy5jb20lMkZ0aHJlYXQiLCJyZWNlaXZlVGltZXN0YW1wIjoiMjAyMi0wOC0zMVQwMzoyNzozMS41Mjg2ODkzNjNaIiwicmVzb3VyY2UiOnsibGFiZWxzIjp7ImlkIjoiaWRzIiwibG9jYXRpb24iOiJldXJvcGUtd2VzdDQtYyIsInJlc291cmNlX2NvbnRhaW5lciI6InByb2plY3RzLzkwOTQxNTY4MDg2MSJ9LCJ0eXBlIjoiaWRzLmdvb2dsZWFwaXMuY29tL0VuZHBvaW50In0sInRpbWVzdGFtcCI6IjIwMjItMDgtMzFUMDM6Mjc6MjlaIn0=", + "messageId": "5435372830865205", + "publishTime": "2022-08-31T03:27:32.808Z" + } + } +] diff --git a/tools/pubsub2inbox/variables.tf b/tools/pubsub2inbox/variables.tf index 1519f95374..c6745d1b8c 100644 --- a/tools/pubsub2inbox/variables.tf +++ b/tools/pubsub2inbox/variables.tf @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -84,8 +84,33 @@ variable "helper_bucket_name" { description = "Helper bucket name for granting IAM permission (storage.objectAdmin)" default = "" } + variable "function_timeout" { type = number description = "Cloud Function timeout (maximum 540 seconds)" default = 240 } + +variable "retry_minimum_backoff" { + type = string + description = "Minimum retry backoff (value between 0-600 seconds, suffixed with s, default 10s, Cloud Run only)" + default = "10s" +} + +variable "retry_maximum_backoff" { + type = string + description = "Maximum retry backoff (value between 0-600 seconds, suffixed with s, default 600s, Cloud Run Only)" + default = "600s" +} + +variable "cloud_run" { + type = bool + description = "Deploy via Cloud Run" + default = false +} + +variable "cloud_run_container" { + type = string + description = "Container URL when deploying via Cloud Run" + default = "" +}