large oltp benchmark #141
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: large oltp benchmark | |
on: | |
# uncomment to run on push for debugging your PR | |
#push: | |
# branches: [ bodobolero/synthetic_oltp_workload ] | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
# ┌───────────── minute (0 - 59) | |
# │ ┌───────────── hour (0 - 23) | |
# │ │ ┌───────────── day of the month (1 - 31) | |
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) | |
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) | |
- cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC | |
workflow_dispatch: # adds ability to run this manually | |
defaults: | |
run: | |
shell: bash -euxo pipefail {0} | |
concurrency: | |
# Allow only one workflow globally because we need dedicated resources which only exist once | |
group: large-oltp-bench-workflow | |
cancel-in-progress: false | |
permissions: | |
contents: read | |
jobs: | |
oltp: | |
strategy: | |
fail-fast: false # allow other variants to continue even if one fails | |
matrix: | |
include: | |
# test only read-only custom scripts in new branch without database maintenance | |
- target: new_branch | |
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 | |
test_maintenance: false | |
# test all custom scripts in new branch with database maintenance | |
- target: new_branch | |
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 | |
test_maintenance: true | |
# test all custom scripts in reuse branch with database maintenance | |
- target: reuse_branch | |
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 | |
test_maintenance: true | |
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK) | |
permissions: | |
contents: write | |
statuses: write | |
id-token: write # aws-actions/configure-aws-credentials | |
env: | |
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h | |
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }} | |
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install | |
PG_VERSION: 16 # pre-determined by pre-determined project | |
TEST_OUTPUT: /tmp/test_output | |
BUILD_TYPE: remote | |
PLATFORM: ${{ matrix.target }} | |
runs-on: [ self-hosted, us-east-2, x64 ] | |
container: | |
image: ghcr.io/neondatabase/build-tools:pinned-bookworm | |
credentials: | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
options: --init | |
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time | |
# (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions | |
# in one run vacuum didn't finish within 12 hours | |
timeout-minutes: 2880 | |
steps: | |
- name: Harden the runner (Audit all outbound calls) | |
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 | |
with: | |
egress-policy: audit | |
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
- name: Configure AWS credentials # necessary to download artefacts | |
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 | |
with: | |
aws-region: eu-central-1 | |
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role | |
- name: Download Neon artifact | |
uses: ./.github/actions/download | |
with: | |
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact | |
path: /tmp/neon/ | |
prefix: latest | |
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
- name: Create Neon Branch for large tenant | |
if: ${{ matrix.target == 'new_branch' }} | |
id: create-neon-branch-oltp-target | |
uses: ./.github/actions/neon-branch-create | |
with: | |
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }} | |
api_key: ${{ secrets.NEON_STAGING_API_KEY }} | |
- name: Set up Connection String | |
id: set-up-connstr | |
run: | | |
case "${{ matrix.target }}" in | |
new_branch) | |
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} | |
;; | |
reuse_branch) | |
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} | |
;; | |
*) | |
echo >&2 "Unknown target=${{ matrix.target }}" | |
exit 1 | |
;; | |
esac | |
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}" | |
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT | |
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT | |
- name: Delete rows from prior runs in reuse branch | |
if: ${{ matrix.target == 'reuse_branch' }} | |
env: | |
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} | |
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config | |
PSQL: /tmp/neon/pg_install/v16/bin/psql | |
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib | |
run: | | |
echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs" | |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH} | |
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';" | |
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs" | |
- name: Benchmark pgbench with custom-scripts | |
uses: ./.github/actions/run-python-test-set | |
with: | |
build_type: ${{ env.BUILD_TYPE }} | |
test_selection: performance | |
run_in_parallel: false | |
save_perf_report: true | |
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench | |
pg_version: ${{ env.PG_VERSION }} | |
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
env: | |
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} | |
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" | |
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" | |
- name: Benchmark database maintenance | |
if: ${{ matrix.test_maintenance }} | |
uses: ./.github/actions/run-python-test-set | |
with: | |
build_type: ${{ env.BUILD_TYPE }} | |
test_selection: performance | |
run_in_parallel: false | |
save_perf_report: true | |
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance | |
pg_version: ${{ env.PG_VERSION }} | |
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
env: | |
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} | |
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" | |
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" | |
- name: Delete Neon Branch for large tenant | |
if: ${{ always() && matrix.target == 'new_branch' }} | |
uses: ./.github/actions/neon-branch-delete | |
with: | |
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }} | |
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }} | |
api_key: ${{ secrets.NEON_STAGING_API_KEY }} | |
- name: Configure AWS credentials # again because prior steps could have exceeded 5 hours | |
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 | |
with: | |
aws-region: eu-central-1 | |
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
role-duration-seconds: 18000 # 5 hours | |
- name: Create Allure report | |
id: create-allure-report | |
if: ${{ !cancelled() }} | |
uses: ./.github/actions/allure-report-generate | |
with: | |
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
- name: Post to a Slack channel | |
if: ${{ github.event.schedule && failure() }} | |
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1 | |
with: | |
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream | |
slack-message: | | |
Periodic large oltp perf testing: ${{ job.status }} | |
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> | |
<${{ steps.create-allure-report.outputs.report-url }}|Allure report> | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} |