|
| 1 | +import logging |
| 2 | +import time |
| 3 | + |
| 4 | +import redis_lock |
| 5 | +from django.conf import settings |
| 6 | +from django.core.management.base import BaseCommand, CommandError |
| 7 | +from django.core.management.commands.migrate import Command as MigrateCommand |
| 8 | +from django.db import connections |
| 9 | +from django.db import transaction as django_transaction |
| 10 | +from django.db.utils import IntegrityError, ProgrammingError |
| 11 | + |
| 12 | +from services.redis import get_redis_connection |
| 13 | + |
| 14 | +log = logging.getLogger(__name__) |
| 15 | + |
| 16 | +MIGRATION_LOCK_NAME = "djang-migrations-lock" |
| 17 | + |
| 18 | + |
| 19 | +class MockLock: |
| 20 | + def release(self): |
| 21 | + pass |
| 22 | + |
| 23 | + |
| 24 | +class Command(MigrateCommand): |
| 25 | + """ |
| 26 | + We need to override the migrate command to block on acquiring a lock in Redis. |
| 27 | + Otherwise, concurrent worker and api deploys could attempt to run migrations |
| 28 | + at the same time which is not safe. |
| 29 | + This class is copied from `codecov-api` except it omits logic about faking |
| 30 | + certain migrations. When the `legacy_migrations` app is moved to `shared` |
| 31 | + and installed in `worker`, which is a prerequisite for core models, we can |
| 32 | + delete this. |
| 33 | + """ |
| 34 | + |
| 35 | + def _obtain_lock(self): |
| 36 | + """ |
| 37 | + In certain environments we might be running mutliple servers that will try and run the migrations at the same time. This is |
| 38 | + not safe to do. So we have the command obtain a lock to try and run the migration. If it cannot get a lock, it will wait |
| 39 | + until it is able to do so before continuing to run. We need to wait for the lock instead of hard exiting on seeing another |
| 40 | + server running the migrations because we write code in such a way that the server expects for migrations to be applied before |
| 41 | + new code is deployed (but the opposite of new db with old code is fine). |
| 42 | + """ |
| 43 | + # If we're running in a non-server environment, we don't need to worry about acquiring a lock |
| 44 | + if settings.IS_DEV: |
| 45 | + return MockLock() |
| 46 | + |
| 47 | + redis_connection = get_redis_connection() |
| 48 | + lock = redis_lock.Lock( |
| 49 | + redis_connection, MIGRATION_LOCK_NAME, expire=180, auto_renewal=True |
| 50 | + ) |
| 51 | + log.info("Trying to acquire migrations lock...") |
| 52 | + acquired = lock.acquire(timeout=180) |
| 53 | + |
| 54 | + if not acquired: |
| 55 | + return None |
| 56 | + |
| 57 | + return lock |
| 58 | + |
| 59 | + def handle(self, *args, **options): |
| 60 | + log.info("Codecov is starting migrations...") |
| 61 | + database = options["database"] |
| 62 | + db_connection = connections[database] |
| 63 | + options["run_syncdb"] = False |
| 64 | + |
| 65 | + lock = self._obtain_lock() |
| 66 | + |
| 67 | + # Failed to acquire lock due to timeout |
| 68 | + if not lock: |
| 69 | + log.error("Potential deadlock detected in api migrations.") |
| 70 | + raise Exception("Failed to obtain lock for api migration.") |
| 71 | + |
| 72 | + try: |
| 73 | + super().handle(*args, **options) |
| 74 | + |
| 75 | + # Autocommit is disabled in worker |
| 76 | + django_transaction.commit(database) |
| 77 | + except: |
| 78 | + log.info("Codecov migrations failed.") |
| 79 | + raise |
| 80 | + else: |
| 81 | + log.info("Codecov migrations succeeded.") |
| 82 | + finally: |
| 83 | + lock.release() |
0 commit comments