|
| 1 | +from collections import deque |
| 2 | +from concurrent.futures import Future, ThreadPoolExecutor, wait |
| 3 | +from dataclasses import dataclass |
| 4 | +from datetime import timedelta |
| 5 | +import re |
| 6 | + |
| 7 | +from django.db import connections |
| 8 | +import djclick as click |
| 9 | + |
| 10 | +from analytics.core.models import JobDataDimension |
| 11 | +from analytics.job_processor.utils import get_gitlab_handle |
| 12 | + |
| 13 | +# The URL of the webhook handler service specified in the GitLab project settings. |
| 14 | +# This is the URL in the web_hook_logs table in the GitLab DB. |
| 15 | +WEBHOOK_URL = "http://webhook-handler.custom.svc.cluster.local" |
| 16 | + |
| 17 | + |
| 18 | +@dataclass |
| 19 | +class WebhookEvent: |
| 20 | + created_at: str |
| 21 | + build_id: int |
| 22 | + project_id: int |
| 23 | + webhook_id: int |
| 24 | + webhook_event_id: int |
| 25 | + |
| 26 | + def __str__(self) -> str: |
| 27 | + return f"[{self.created_at}] build_id: {self.build_id}, project_id: {self.project_id}, webhook_id: {self.webhook_id}, webhook_event_id: {self.webhook_event_id}" |
| 28 | + |
| 29 | + |
| 30 | +def retry_webhook(webhook_event: WebhookEvent, dry_run: bool) -> None: |
| 31 | + if dry_run: |
| 32 | + click.echo(f"Would retry webhook {webhook_event}") |
| 33 | + return |
| 34 | + |
| 35 | + click.echo(f"Retrying webhook {webhook_event}") |
| 36 | + gl = get_gitlab_handle() |
| 37 | + |
| 38 | + # https://docs.gitlab.com/ee/api/project_webhooks.html#resend-a-project-webhook-event |
| 39 | + retry_url = f"/projects/{webhook_event.project_id}/hooks/{webhook_event.webhook_id}/events/{webhook_event.webhook_event_id}/resend" |
| 40 | + gl.http_post(retry_url) |
| 41 | + |
| 42 | + |
| 43 | +@click.command() |
| 44 | +@click.option( |
| 45 | + "--seconds", |
| 46 | + type=int, |
| 47 | + default=timedelta(days=1).total_seconds(), |
| 48 | + help="Retry webhooks that failed in the last N seconds", |
| 49 | +) |
| 50 | +@click.option( |
| 51 | + "--dry-run", |
| 52 | + is_flag=True, |
| 53 | + default=False, |
| 54 | + help="Print the webhooks that would be retried without actually retrying them", |
| 55 | +) |
| 56 | +def retry_failed_job_webhooks(seconds: int, dry_run: bool) -> None: |
| 57 | + with connections["gitlab"].cursor() as cursor: |
| 58 | + cursor.execute("BEGIN;") |
| 59 | + |
| 60 | + cursor.execute( |
| 61 | + """ |
| 62 | + DECLARE webhook_cursor CURSOR FOR |
| 63 | + SELECT created_at, request_data, web_hook_id, id |
| 64 | + FROM public.web_hook_logs |
| 65 | + WHERE url = %s |
| 66 | + AND created_at > NOW() - INTERVAL %s; |
| 67 | + """, |
| 68 | + [WEBHOOK_URL, f"{seconds} seconds"], |
| 69 | + ) |
| 70 | + |
| 71 | + build_ids: deque[int] = deque([]) |
| 72 | + |
| 73 | + futures: list[Future] = [] |
| 74 | + |
| 75 | + with ThreadPoolExecutor() as executor: |
| 76 | + while True: |
| 77 | + # Fetch a batch of rows from the cursor |
| 78 | + cursor.execute("FETCH FORWARD %s FROM webhook_cursor", [5000]) |
| 79 | + rows = cursor.fetchall() |
| 80 | + if not rows: |
| 81 | + break |
| 82 | + |
| 83 | + webhook_events = [ |
| 84 | + WebhookEvent( |
| 85 | + created_at=row[0], |
| 86 | + build_id=int(re.search(r"build_id: (\d+)", row[1]).group(1)), |
| 87 | + project_id=int(re.search(r"project_id: (\d+)", row[1]).group(1)), |
| 88 | + webhook_id=row[2], |
| 89 | + webhook_event_id=row[3], |
| 90 | + ) |
| 91 | + for row in rows |
| 92 | + ] |
| 93 | + |
| 94 | + # Build a mapping of build ID to webhook event object for fast lookup by build ID |
| 95 | + build_id_to_webhook_mapping: dict[int, WebhookEvent] = { |
| 96 | + event.build_id: event for event in webhook_events |
| 97 | + } |
| 98 | + |
| 99 | + # Collect all build IDs |
| 100 | + build_ids: set[int] = set(build_id_to_webhook_mapping.keys()) |
| 101 | + |
| 102 | + # Filter out build IDs that already have a corresponding JobDataDimension record |
| 103 | + existing_build_ids: set[int] = set( |
| 104 | + JobDataDimension.objects.filter(job_id__in=build_ids).values_list( |
| 105 | + "job_id", flat=True |
| 106 | + ) |
| 107 | + ) |
| 108 | + |
| 109 | + # Calculate the missing build IDs |
| 110 | + missing_build_ids: set[int] = build_ids - existing_build_ids |
| 111 | + |
| 112 | + # Retry the webhooks for the missing build IDs |
| 113 | + for build_id in missing_build_ids: |
| 114 | + futures.append( |
| 115 | + executor.submit( |
| 116 | + retry_webhook, build_id_to_webhook_mapping[build_id], dry_run |
| 117 | + ) |
| 118 | + ) |
| 119 | + |
| 120 | + cursor.execute("CLOSE webhook_cursor;") |
| 121 | + cursor.execute("COMMIT;") |
| 122 | + |
| 123 | + wait(futures) |
0 commit comments