diff --git a/.github/workflows/create-source-data.yaml b/.github/workflows/create-source-data.yaml index 2904b34..d848518 100644 --- a/.github/workflows/create-source-data.yaml +++ b/.github/workflows/create-source-data.yaml @@ -28,7 +28,7 @@ jobs: with: token: ${{ steps.app-token.outputs.token }} image_project: ubuntu-os-cloud - image_family: ubuntu-2204-lts + image_family: ubuntu-2404-lts-amd64 machine_zone: europe-west4-b machine_type: e2-standard-4 runner_service_account: ${{ vars.RUNNER_GCP_SERVICE_ACCOUNT }} @@ -40,4 +40,64 @@ jobs: needs: create-runner runs-on: ${{ needs.create-runner.outputs.label }} steps: - - run: echo "This runs on the GCE VM" + # We are running on barebones VM, so there is more scripting involved + # then needed if we were running on standard GitHub Actions runner. + - name: Checkout source + run: | + mkdir src + cd src + git init + git remote add origin $GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git + git fetch origin $GITHUB_REF + git reset --hard FETCH_HEAD + cd .. + - name: Set up PostgreSQL + run: | + sudo apt-get --yes install postgresql + while ! pg_isready; do + echo "waiting for postgres..." + sleep 1 + done + sudo -u postgres psql -c "ALTER USER postgres PASSWORD '12345';" + - name: Setup DuckDB + run: | + sudo apt-get install --yes unzip + curl -L https://github.com/duckdb/duckdb/releases/download/v1.1.0/duckdb_cli-linux-amd64.zip > duckdb.zip + unzip duckdb.zip duckdb + sudo mv duckdb /usr/local/bin + - name: Restore replay db + run: | + REPLAY_BACKUP="$(gcloud storage ls gs://${REPLAY_BACKUPS_GCS_BUCKET} | sort -r | head -n 1)" + gcloud storage cp "$REPLAY_BACKUP" . + psql -c "CREATE DATABASE bar;" + time zstdcat "$(basename "$REPLAY_BACKUP")" | pg_restore -d postgres --clean --create --no-owner --no-privileges + env: + REPLAY_BACKUPS_GCS_BUCKET: ${{ vars.REPLAY_BACKUPS_GCS_BUCKET }} + PGPASSWORD: 12345 + PGHOST: 127.0.0.1 + PGUSER: postgres + - name: Restore teiserver db + run: + TEI_BACKUP="$(gcloud storage ls gs://${TEISERVER_BACKUPS_GCS_BUCKET} | sort -r | head -n 1)" + gcloud storage cp "$TEI_BACKUP" . + psql -c "CREATE DATABASE teiserver_prod;" + time zstdcat "$(basename "$TEI_BACKUP")" | pg_restore -d postgres --clean --create --no-owner --no-privileges + env: + TEISERVER_BACKUPS_GCS_BUCKET: ${{ vars.TEISERVER_BACKUPS_GCS_BUCKET }} + PGPASSWORD: 12345 + PGHOST: 127.0.0.1 + PGUSER: postgres + - name: Export parquet files + run: | + mkdir data_export + duckdb < src/scripts/export_prod_data_source.sql + env: + PGPASSWORD: 12345 + PGHOST: 127.0.0.1 + PGUSER: postgres + - name: Save data export in GCS bucket + run: | + gcloud config set storage/parallel_composite_upload_compatibility_check False + gcloud storage rsync data_export/ gs://$DATA_MART_GCS_BUCKET/pgdumps --recursive --delete-unmatched-destination-objects + env: + DATA_MART_GCS_BUCKET: ${{ vars.DATA_MART_GCS_BUCKET }} diff --git a/scripts/export_prod_data_source.sql b/scripts/export_prod_data_source.sql new file mode 100644 index 0000000..2da8a80 --- /dev/null +++ b/scripts/export_prod_data_source.sql @@ -0,0 +1,14 @@ +-- noqa: disable=all + +ATTACH 'dbname=teiserver_prod' AS teiserver (TYPE POSTGRES, READ_ONLY); + +COPY teiserver.public.teiserver_battle_matches TO 'data_export/teiserver_battle_matches.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); +COPY teiserver.public.teiserver_battle_match_memberships TO 'data_export/teiserver_battle_match_memberships.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); +COPY teiserver.public.teiserver_game_rating_logs TO 'data_export/teiserver_game_rating_logs.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); + +ATTACH 'dbname=bar' AS replay (TYPE POSTGRES, READ_ONLY); + +COPY replay.public.Demos TO 'data_export/replay_demos.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); +COPY replay.public.AllyTeams TO 'data_export/replay_ally_teams.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); +COPY replay.public.Players TO 'data_export/replay_players.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9); +COPY replay.public.Maps TO 'data_export/replay_maps.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9);