Skip to content

backup retention logic in /scripts/postgres_backup.sh is broken if basebackup frequency is low #425

@georgebarbarosie

Description

@georgebarbarosie

If base backup is infrequently executed (less than daily) the logic in postgres_backup will fail to ever execute any cleanup. The LAST variable counts only backups that are inside the retention interval, which for less than daily backups means less than $DAYS_TO_RETAIN; even when there are enough backups in total available, the delete operation never gets called. Here's a better logic IMO:

--- postgres_backup.sh	2020-04-01 02:09:12.005762700 +0100
+++ postgres_backup_new2.sh	2020-04-01 02:59:55.458355000 +0100
@@ -38,33 +38,50 @@
     POOL_SIZE="--pool-size ${POOL_SIZE}"
 fi
 
-BEFORE=""
-LEFT=0
+BEFORE_BY_TIME=""
+BEFORE_BY_COUNT=""
+INDEX=0
+# count how many backups 
+BACKUPS_IN_TIME=""
 
 readonly NOW=$(date +%s -u)
-while read name last_modified rest; do
+while read last_modified name; do
     last_modified=$(date +%s -ud "$last_modified")
+    ((INDEX=INDEX+1))
     if [ $(((NOW-last_modified)/86400)) -ge $DAYS_TO_RETAIN ]; then
-        if [ -z "$BEFORE" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then
+        [ -z "$BACKUPS_IN_TIME" ] && ((BACKUPS_IN_TIME=INDEX))
+        if [ -z "$BEFORE_BY_TIME" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then
             BEFORE_TIME=$last_modified
-            BEFORE=$name
+            BEFORE_BY_TIME=$name
+
+        fi
+        if [ $INDEX -ge $DAYS_TO_RETAIN ] || [ -z "$BEFORE_BY_COUNT" ]; then
+            # this is the last backup to keep
+            BEFORE_BY_COUNT=$name
         fi
-    else
-        # count how many backups will remain after we remove everything up to certain date
-        ((LEFT=LEFT+1))
     fi
-done < <($WAL_E backup-list 2> /dev/null | sed '0,/^name\s*last_modified\s*/d')
+done < <($WAL_E backup-list 2> /dev/null | tail -n +2 | awk '{ print $2 " " $1 }' | sort -r)
+
+BEFORE=""
+if [ -z "$BEFORE_BY_COUNT" ]; then
+  log "Less than $DAYS_TO_RETAIN backups are present, not deleting any"
+else
+  if [ -z "$BACKUPS_IN_TIME" ] || [ $BACKUPS_IN_TIME -lt $DAYS_TO_RETAIN ]; then
+      BEFORE=$BEFORE_BY_COUNT
+  else
+      BEFORE=$BEFORE_BY_TIME
+  fi
+fi
 
-# we want keep at least N backups even if the number of days exceeded
-if [ ! -z "$BEFORE" ] && [ $LEFT -ge $DAYS_TO_RETAIN ]; then
+if [ ! -z "$BEFORE" ]; then
     if [[ "$USE_WALG_BACKUP" == "true" ]]; then
-        $WAL_E delete before FIND_FULL "$BEFORE" --confirm
+        echo $WAL_E delete before FIND_FULL "$BEFORE_BY_TIME" --confirm
     else
-        $WAL_E delete --confirm before "$BEFORE"
+        echo $WAL_E delete --confirm before "$BEFORE_BY_TIME"
     fi
 fi
 
 # push a new base backup
 log "producing a new backup"
 # We reduce the priority of the backup for CPU consumption
-exec nice -n 5 $WAL_E backup-push "${PGDATA}" $POOL_SIZE
+echo exec nice -n 5 $WAL_E backup-push "${PGDATA}" $POOL_SIZE

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions