|
1 |
| -name: Create Source Data |
| 1 | +name: Create Dumps from DBs |
2 | 2 | on:
|
3 | 3 | workflow_dispatch:
|
| 4 | + schedule: |
| 5 | + - cron: '0 10 * * 2,6' |
4 | 6 | jobs:
|
5 | 7 | create-runner:
|
6 | 8 | permissions:
|
@@ -28,16 +30,77 @@ jobs:
|
28 | 30 | with:
|
29 | 31 | token: ${{ steps.app-token.outputs.token }}
|
30 | 32 | image_project: ubuntu-os-cloud
|
31 |
| - image_family: ubuntu-2204-lts |
| 33 | + image_family: ubuntu-2404-lts-amd64 |
32 | 34 | machine_zone: europe-west4-b
|
33 | 35 | machine_type: e2-standard-4
|
34 | 36 | runner_service_account: ${{ vars.RUNNER_GCP_SERVICE_ACCOUNT }}
|
35 | 37 | preemptible: true
|
36 | 38 | ephemeral: true
|
37 | 39 | boot_disk_type: pd-ssd
|
38 | 40 | disk_size: 70GB
|
39 |
| - test: |
| 41 | + export-pgdumps: |
40 | 42 | needs: create-runner
|
41 | 43 | runs-on: ${{ needs.create-runner.outputs.label }}
|
42 | 44 | steps:
|
43 |
| - - run: echo "This runs on the GCE VM" |
| 45 | + # We are running on barebones VM, so there is more scripting involved |
| 46 | + # then needed if we were running on standard GitHub Actions runner. |
| 47 | + - name: Checkout source |
| 48 | + run: | |
| 49 | + mkdir src |
| 50 | + cd src |
| 51 | + git init |
| 52 | + git remote add origin $GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git |
| 53 | + git fetch origin $GITHUB_REF |
| 54 | + git reset --hard FETCH_HEAD |
| 55 | + cd .. |
| 56 | + - name: Set up PostgreSQL |
| 57 | + run: | |
| 58 | + sudo apt-get --yes install postgresql |
| 59 | + while ! pg_isready; do |
| 60 | + echo "waiting for postgres..." |
| 61 | + sleep 1 |
| 62 | + done |
| 63 | + sudo -u postgres psql -c "ALTER USER postgres PASSWORD '12345';" |
| 64 | + - name: Setup DuckDB |
| 65 | + run: | |
| 66 | + sudo apt-get install --yes unzip |
| 67 | + curl -L https://github.com/duckdb/duckdb/releases/download/v1.1.0/duckdb_cli-linux-amd64.zip > duckdb.zip |
| 68 | + unzip duckdb.zip duckdb |
| 69 | + sudo mv duckdb /usr/local/bin |
| 70 | + export HOME=$(pwd) |
| 71 | + duckdb :memory: 'INSTALL postgres;' |
| 72 | + - name: Restore databases |
| 73 | + run: | |
| 74 | + function restore { |
| 75 | + local BACKUP="$(gcloud storage ls gs://$1 | sort -r | head -n 1)" |
| 76 | + gcloud storage cp "$BACKUP" . |
| 77 | + psql -c "CREATE DATABASE $2;" |
| 78 | + time zstdcat "$(basename "$BACKUP")" \ |
| 79 | + | pg_restore -d postgres --clean --create --no-owner --no-privileges |
| 80 | + } |
| 81 | +
|
| 82 | + restore "$REPLAY_BACKUPS_GCS_BUCKET" bar & |
| 83 | + restore "$TEISERVER_BACKUPS_GCS_BUCKET" teiserver_prod & |
| 84 | +
|
| 85 | + wait %1 %2 |
| 86 | + env: |
| 87 | + REPLAY_BACKUPS_GCS_BUCKET: ${{ vars.REPLAY_BACKUPS_GCS_BUCKET }} |
| 88 | + TEISERVER_BACKUPS_GCS_BUCKET: ${{ vars.TEISERVER_BACKUPS_GCS_BUCKET }} |
| 89 | + PGPASSWORD: 12345 |
| 90 | + PGHOST: 127.0.0.1 |
| 91 | + PGUSER: postgres |
| 92 | + - name: Export parquet files |
| 93 | + run: | |
| 94 | + mkdir data_export |
| 95 | + export HOME=$(pwd) |
| 96 | + duckdb < src/scripts/export_prod_data_source.sql |
| 97 | + env: |
| 98 | + PGPASSWORD: 12345 |
| 99 | + PGHOST: 127.0.0.1 |
| 100 | + PGUSER: postgres |
| 101 | + - name: Save data export in GCS bucket |
| 102 | + run: | |
| 103 | + gcloud config set storage/parallel_composite_upload_compatibility_check False |
| 104 | + gcloud storage rsync data_export/ gs://$DATA_MART_GCS_BUCKET/pgdumps --recursive --delete-unmatched-destination-objects |
| 105 | + env: |
| 106 | + DATA_MART_GCS_BUCKET: ${{ vars.DATA_MART_GCS_BUCKET }} |
0 commit comments