Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/custom_docker_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- docker-image: ./images/cache-indexer
image-tags: ghcr.io/spack/cache-indexer:0.0.6
- docker-image: ./analytics
image-tags: ghcr.io/spack/django:0.5.1
image-tags: ghcr.io/spack/django:0.5.2
- docker-image: ./images/ci-prune-buildcache
image-tags: ghcr.io/spack/ci-prune-buildcache:0.0.4
- docker-image: ./images/protected-publish
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from concurrent.futures import Future, ThreadPoolExecutor, wait
from dataclasses import dataclass
from datetime import timedelta
import itertools
import re

from django.db import connections
import djclick as click

from analytics.core.models import JobFact
from analytics.job_processor.utils import get_gitlab_handle

# The URL of the webhook handler service specified in the GitLab project settings.
# This is the URL in the web_hook_logs table in the GitLab DB.
WEBHOOK_URL = "http://webhook-handler.custom.svc.cluster.local"


@dataclass
class WebhookEvent:
created_at: str
build_id: int
project_id: int
webhook_id: int
webhook_event_id: int

def __str__(self) -> str:
return f"[{self.created_at}] build_id: {self.build_id}, project_id: {self.project_id}, webhook_id: {self.webhook_id}, webhook_event_id: {self.webhook_event_id}"


def retry_webhook(webhook_event: WebhookEvent, dry_run: bool) -> None:
if dry_run:
click.echo(f"Would retry webhook {webhook_event}")
return

click.echo(f"Retrying webhook {webhook_event}")
gl = get_gitlab_handle()

# https://docs.gitlab.com/ee/api/project_webhooks.html#resend-a-project-webhook-event
retry_url = f"/projects/{webhook_event.project_id}/hooks/{webhook_event.webhook_id}/events/{webhook_event.webhook_event_id}/resend"
gl.http_post(retry_url)
Comment on lines +30 to +40
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: try fetching the webhook payload from the web_hook_logs table and invoke the ingest function directly instead of sending a webhook



@click.command()
@click.option(
"--seconds",
type=int,
default=timedelta(days=1).total_seconds(),
help="Retry webhooks that failed in the last N seconds",
)
@click.option(
"--dry-run",
is_flag=True,
default=False,
help="Print the webhooks that would be retried without actually retrying them",
)
def retry_failed_job_webhooks(seconds: int, dry_run: bool) -> None:
with connections["gitlab"].cursor() as cursor:
cursor.execute("BEGIN;")

cursor.execute(
"""
DECLARE
webhook_cursor
CURSOR FOR
SELECT
created_at,
request_data,
web_hook_id,
id
FROM
public.web_hook_logs
WHERE
url = %s AND
created_at > NOW() - INTERVAL %s;
""",
[WEBHOOK_URL, f"{seconds} seconds"],
)

futures: list[Future] = []

with ThreadPoolExecutor() as executor:
while True:
# Fetch a batch of rows from the cursor
cursor.execute("FETCH FORWARD %s FROM webhook_cursor", [5000])
rows = cursor.fetchall()
if not rows:
break

webhook_events = [
WebhookEvent(
created_at=row[0],
build_id=int(re.search(r"build_id: (\d+)", row[1]).group(1)),
project_id=int(re.search(r"project_id: (\d+)", row[1]).group(1)),
webhook_id=row[2],
webhook_event_id=row[3],
)
for row in rows
]

# We only want to retry webhooks for builds that have finished (i.e.
# status is 'success' or 'failed'). Skipped or cancelled builds are
# not stored in the analytics DB.
cursor.execute(
"""
SELECT
id
FROM
ci_builds
WHERE
id IN %s AND
status IN ('success', 'failed');
""",
[tuple(event.build_id for event in webhook_events)],
)
finished_jobs: set[int] = set(itertools.chain.from_iterable(cursor.fetchall()))

# Build a mapping of build ID to webhook event object for fast lookup by build ID
build_id_to_webhook_mapping: dict[int, WebhookEvent] = {
event.build_id: event
for event in webhook_events
if event.build_id in finished_jobs
}

# Collect all build IDs
build_ids: set[int] = set(build_id_to_webhook_mapping.keys())

# Filter out build IDs that already have a corresponding analytics DB record
existing_build_ids: set[int] = set(
JobFact.objects.filter(job_id__in=build_ids).values_list("job_id", flat=True)
)

# Calculate the missing build IDs
missing_build_ids: set[int] = build_ids - existing_build_ids

# Retry the webhooks for the missing build IDs
for build_id in missing_build_ids:
futures.append(
executor.submit(
retry_webhook, build_id_to_webhook_mapping[build_id], dry_run
)
)

cursor.execute("CLOSE webhook_cursor;")
cursor.execute("COMMIT;")

wait(futures)
4 changes: 2 additions & 2 deletions k8s/production/custom/webhook-handler/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
serviceAccountName: webhook-handler
containers:
- name: webhook-handler
image: ghcr.io/spack/django:0.5.1
image: ghcr.io/spack/django:0.5.2
imagePullPolicy: Always
resources:
requests:
Expand Down Expand Up @@ -146,7 +146,7 @@ spec:
serviceAccountName: webhook-handler
containers:
- name: webhook-handler-worker
image: ghcr.io/spack/django:0.5.1
image: ghcr.io/spack/django:0.5.2
command:
[
"celery",
Expand Down
Loading