Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrieve the CR3 download secrets with 1PW #1262

Merged
merged 5 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ on:
- "atd-etl/cris_import/**"
- "atd-etl/cr3_extract_diagram/**"
- "atd-etl/socrata_export/**"
- "atd-etl/cr3_download/**"
pull_request:
branches:
- master
Expand All @@ -24,7 +23,6 @@ on:
- "atd-etl/cris_import/**"
- "atd-etl/cr3_extract_diagram/**"
- "atd-etl/socrata_export/**"
- "atd-etl/cr3_download/**"
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

Expand Down Expand Up @@ -55,8 +53,6 @@ jobs:
- 'atd-etl/cr3_extract_diagram/**'
socrata_export:
- 'atd-etl/socrata_export/**'
cr3_download:
- 'atd-etl/cr3_download/**'

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand Down Expand Up @@ -96,12 +92,3 @@ jobs:
context: atd-etl/socrata_export
push: true
tags: atddocker/vz-socrata-export:${{ github.ref == 'refs/heads/master' && 'production' || 'latest' }}

- name: Build and push CR3 download image
if: steps.changes.outputs.cr3_download == 'true'
uses: docker/build-push-action@v4
with:
platforms: linux/amd64,linux/arm64
context: atd-etl/cr3_download
push: true
tags: atddocker/vz-cr3-download:${{ github.ref == 'refs/heads/master' && 'production' || 'latest' }}
2 changes: 2 additions & 0 deletions atd-etl/cr3_download/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ WORKDIR /app
COPY . /app

RUN cd /app && pip install -r requirements.txt

CMD python /app/cr3_download.py
11 changes: 6 additions & 5 deletions atd-etl/cr3_download/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
### Invocation

After creating an .env file using the variables listed in the env_template file, you can run this script with:

If you are running this for the first time or developing this script, you will need to run and build:
```
$ docker compose run -it cr3_download python cr3_download.py
$ docker compose run --build cr3_download
```

If you are developing, you may find that you need to run and build:
Otherwise, you can run:
```
$ docker compose run --build -it cr3_download python cr3_download.py
$ docker compose run cr3_download
```

In production, they will be run from a DAG which handles starting the containers with
the needed environment and other parameters.
The script will prompt for the cookie and then download any pending CR3s.
74 changes: 70 additions & 4 deletions atd-etl/cr3_download/cr3_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,87 @@
any CR3 files associated.
"""

import os
import time
import json

from process.config import ATD_ETL_CONFIG
from process.helpers_cr3 import *

from onepasswordconnectsdk.client import Client, new_client
import onepasswordconnectsdk

# Start timer
start = time.time()


# Get 1Password secrets from environment
ONEPASSWORD_CONNECT_HOST = os.getenv("OP_CONNECT")
ONEPASSWORD_CONNECT_TOKEN = os.getenv("OP_API_TOKEN")
VAULT_ID = os.getenv("OP_VAULT_ID")

# Setup 1Password server connection
one_password_client = new_client(ONEPASSWORD_CONNECT_HOST, ONEPASSWORD_CONNECT_TOKEN)

# Get secrets from 1Password
REQUIRED_SECRETS = {
"HASURA_ENDPOINT": {
"opitem": "Vision Zero graphql-engine Endpoints",
"opfield": "production.GraphQL Endpoint",
"opvault": VAULT_ID,
},
"HASURA_ADMIN_KEY": {
"opitem": "Vision Zero graphql-engine Endpoints",
"opfield": "production.Admin Key",
"opvault": VAULT_ID,
},
"AWS_ACCESS_KEY_ID": {
"opitem": "CR3 Download IAM Access Key and Secret",
"opfield": "production.accessKeyId",
"opvault": VAULT_ID,
},
"AWS_SECRET_ACCESS_KEY": {
"opitem": "CR3 Download IAM Access Key and Secret",
"opfield": "production.accessSecret",
"opvault": VAULT_ID,
},
"AWS_DEFAULT_REGION": {
"opitem": "CR3 Download IAM Access Key and Secret",
"opfield": "production.awsDefaultRegion",
"opvault": VAULT_ID,
},
"ATD_CRIS_CR3_URL": {
"opitem": "Vision Zero CRIS CR3 Download",
"opfield": "production.ATD_CRIS_CR3_URL",
"opvault": VAULT_ID,
},
"AWS_CRIS_CR3_BUCKET_NAME": {
"opitem": "Vision Zero CRIS CR3 Download",
"opfield": "production.AWS_CRIS_CR3_BUCKET_NAME",
"opvault": VAULT_ID,
},
"AWS_CRIS_CR3_BUCKET_PATH": {
"opitem": "Vision Zero CRIS CR3 Download",
"opfield": "production.AWS_CRIS_CR3_BUCKET_PATH",
"opvault": VAULT_ID,
},
}

env_vars = onepasswordconnectsdk.load_dict(one_password_client, REQUIRED_SECRETS)

# Set secrets from 1Password in environment
for key, value in env_vars.items():
os.environ[key] = value

#
# We now need to request a list of N number of records
# that do not have a CR3. For each record we must download
# the CR3 pdf, upload to S3
#

# ask user for a set of valid cookies for requests to the CRIS website
CRIS_BROWSER_COOKIES = input(
"Please login to CRIS and extract the contents of the Cookie: header and please paste it here:"
)
Comment on lines +89 to +92
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🥲


print("Preparing download loop.")

Expand All @@ -40,8 +106,8 @@
crashes_list_without_skips = []

try:
print("Hasura endpoint: '%s' " % ATD_ETL_CONFIG["HASURA_ENDPOINT"])
downloads_per_run = ATD_ETL_CONFIG["ATD_CRIS_CR3_DOWNLOADS_PER_RUN"]
print("Hasura endpoint: '%s' " % os.getenv("HASURA_ENDPOINT"))
downloads_per_run = os.getenv("ATD_CRIS_CR3_DOWNLOADS_PER_RUN")
downloads_per_run = 2000
print("Downloads Per This Run: %s" % str(downloads_per_run))

Expand All @@ -67,7 +133,7 @@
for crash_record in crashes_list_without_skips:
process_crash_cr3(
crash_record,
ATD_ETL_CONFIG["CRIS_CR3_DOWNLOAD_COOKIE"],
CRIS_BROWSER_COOKIES,
skipped_uploads_and_updates,
)

Expand Down
15 changes: 3 additions & 12 deletions atd-etl/cr3_download/env_template
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
AWS_DEFAULT_REGION=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
# HASURA
HASURA_ENDPOINT=
HASURA_ADMIN_KEY=
# CRIS
CRIS_CR3_DOWNLOAD_COOKIE=
# CR3
ATD_CRIS_CR3_URL=
AWS_CRIS_CR3_BUCKET_NAME=
AWS_CRIS_CR3_BUCKET_PATH=
OP_API_TOKEN=
OP_CONNECT=
OP_VAULT_ID=
33 changes: 0 additions & 33 deletions atd-etl/cr3_download/process/config.py

This file was deleted.

10 changes: 5 additions & 5 deletions atd-etl/cr3_download/process/helpers_cr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
https://pypi.org/project/requests/
"""

import os
import requests
import base64
import subprocess
Expand All @@ -18,8 +19,7 @@

import magic

# We need to import our configuration, and the run_query method
from .config import ATD_ETL_CONFIG
# We need the run_query method
from .request import run_query


Expand Down Expand Up @@ -49,7 +49,7 @@ def download_cr3(crash_id, cookies):
crash_id_encoded = base64.b64encode(
str("CrashId=" + crash_id).encode("utf-8")
).decode("utf-8")
url = ATD_ETL_CONFIG["ATD_CRIS_CR3_URL"] + crash_id_encoded
url = os.getenv("ATD_CRIS_CR3_URL") + crash_id_encoded
download_path = "/tmp/" + "%s.pdf" % crash_id

print("Downloading (%s): '%s' from %s" % (crash_id, download_path, url))
Expand All @@ -66,8 +66,8 @@ def upload_cr3(crash_id):
"""
file = "/tmp/%s.pdf" % crash_id
destination = "s3://%s/%s/%s.pdf" % (
ATD_ETL_CONFIG["AWS_CRIS_CR3_BUCKET_NAME"],
ATD_ETL_CONFIG["AWS_CRIS_CR3_BUCKET_PATH"],
os.getenv("AWS_CRIS_CR3_BUCKET_NAME"),
os.getenv("AWS_CRIS_CR3_BUCKET_PATH"),
crash_id,
)

Expand Down
20 changes: 9 additions & 11 deletions atd-etl/cr3_download/process/request.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# Request Helper - Makes post requests to a Hasura/GraphQL endpoint.
#
import os
import time
import requests
from .config import ATD_ETL_CONFIG

MAX_ATTEMPTS = ATD_ETL_CONFIG["MAX_ATTEMPTS"]
RETRY_WAIT_TIME = ATD_ETL_CONFIG["RETRY_WAIT_TIME"]
MAX_ATTEMPTS = int(os.getenv("MAX_ATTEMPTS", "5"))
RETRY_WAIT_TIME = int(os.getenv("RETRY_WAIT_TIME", "5"))


def run_query(query):
Expand All @@ -16,23 +16,21 @@ def run_query(query):
:return: object - A Json dictionary directly from Hasura
"""
# Build Header with Admin Secret
headers = {
"x-hasura-admin-secret": ATD_ETL_CONFIG["HASURA_ADMIN_KEY"]
}
headers = {"x-hasura-admin-secret": os.getenv("HASURA_ADMIN_KEY")}

# Try up to n times as defined by max_attempts
for current_attempt in range(MAX_ATTEMPTS):
# Try making the request via POST
try:
return requests.post(ATD_ETL_CONFIG["HASURA_ENDPOINT"],
json={'query': query},
headers=headers).json()
return requests.post(
os.getenv("HASURA_ENDPOINT"), json={"query": query}, headers=headers
).json()
except Exception as e:
print("Exception, could not insert: " + str(e))
print("Query: '%s'" % query)
response = {
"errors": "Exception, could not insert: " + str(e),
"query": query
"query": query,
}

# If the current attempt is equal to MAX_ATTEMPTS, then exit with failure
Expand All @@ -41,6 +39,6 @@ def run_query(query):

# If less than 5, then wait 5 seconds and try again
else:
print("Attempt (%s out of %s)" % (current_attempt+1, MAX_ATTEMPTS))
print("Attempt (%s out of %s)" % (current_attempt + 1, MAX_ATTEMPTS))
print("Trying again in %s seconds..." % RETRY_WAIT_TIME)
time.sleep(RETRY_WAIT_TIME)
1 change: 1 addition & 0 deletions atd-etl/cr3_download/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ awscli==1.*
boto3==1.*
python-magic==0.*
requests==2.*
onepasswordconnectsdk==1.*
Loading