|
507 | 507 | - name: Prepare PostgreSQL | start PostgreSQL on Master
|
508 | 508 | become: true
|
509 | 509 | become_user: postgres
|
510 |
| - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t 1800" |
| 510 | + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }}" |
511 | 511 | when: pg_ctl_status_result.rc == 3
|
512 | 512 |
|
513 | 513 | - name: Prepare PostgreSQL | check PostgreSQL is accepting connections
|
|
584 | 584 | - name: Prepare PostgreSQL | stop PostgreSQL (will be managed by patroni)
|
585 | 585 | become: true
|
586 | 586 | become_user: postgres
|
587 |
| - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t 1800" |
| 587 | + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }}" |
588 | 588 | when: checkpoint_result.rc is defined and checkpoint_result.rc == 0
|
589 | 589 |
|
590 | 590 | - name: Prepare PostgreSQL | check PostgreSQL is stopped
|
|
598 | 598 | tags: patroni, patroni_start_master
|
599 | 599 |
|
600 | 600 | - block: # PITR (custom bootstrap)
|
601 |
| - # Prepare (install pexpect, ruamel.yaml) |
| 601 | + # Prepare (install pexpect, ruamel.yaml) |
602 | 602 | - name: Prepare | Make sure the ansible required python library is exist
|
603 | 603 | ansible.builtin.pip:
|
604 | 604 | name: "{{ item }}"
|
|
612 | 612 | environment:
|
613 | 613 | PATH: "{{ ansible_env.PATH }}:/usr/local/bin:/usr/bin"
|
614 | 614 | PIP_BREAK_SYSTEM_PACKAGES: "1"
|
615 |
| - # Run PITR |
| 615 | + |
| 616 | + # Run PITR |
616 | 617 | - name: Stop patroni service on the Replica servers (if running)
|
617 | 618 | ansible.builtin.systemd:
|
618 | 619 | name: patroni
|
|
625 | 626 | state: stopped
|
626 | 627 | when: is_master | bool
|
627 | 628 |
|
| 629 | + - name: Check that PostgreSQL is stopped |
| 630 | + become: true |
| 631 | + become_user: postgres |
| 632 | + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}" |
| 633 | + register: pg_ctl_status_result |
| 634 | + changed_when: false |
| 635 | + failed_when: false |
| 636 | + |
| 637 | + - name: Stop PostgreSQL |
| 638 | + become: true |
| 639 | + become_user: postgres |
| 640 | + ansible.builtin.command: >- |
| 641 | + {{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }} |
| 642 | + when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4) |
| 643 | + |
628 | 644 | - name: Remove patroni cluster "{{ patroni_cluster_name }}" from DCS (if exist)
|
629 | 645 | become: true
|
630 | 646 | become_user: postgres
|
|
648 | 664 | ansible.builtin.command: >
|
649 | 665 | {{ pgbackrest_patroni_cluster_restore_command }}
|
650 | 666 | {{ '--target-action=promote' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }}
|
651 |
| - async: 86400 # timeout 24 hours |
| 667 | + async: "{{ cluster_restore_timeout | default(86400) }}" # timeout 24 hours |
652 | 668 | poll: 0
|
653 | 669 | register: pgbackrest_restore_master
|
654 | 670 | when: is_master | bool
|
|
658 | 674 | ansible.builtin.command: >
|
659 | 675 | {{ pgbackrest_patroni_cluster_restore_command }}
|
660 | 676 | {{ '--target-action=shutdown' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }}
|
661 |
| - async: 86400 # timeout 24 hours |
| 677 | + async: "{{ cluster_restore_timeout | default(86400) }}" # timeout 24 hours |
662 | 678 | poll: 0
|
663 | 679 | register: pgbackrest_restore_replica
|
664 | 680 | when: not is_master | bool and 'pgbackrest' in patroni_create_replica_methods
|
|
673 | 689 | label: "{{ item.changed }}"
|
674 | 690 | register: pgbackrest_restore_jobs_result
|
675 | 691 | until: pgbackrest_restore_jobs_result.finished
|
676 |
| - retries: 2880 # timeout 24 hours |
| 692 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
677 | 693 | delay: 30
|
678 | 694 | when: item.ansible_job_id is defined
|
679 | 695 |
|
|
685 | 701 | when: not keep_patroni_dynamic_json|bool
|
686 | 702 |
|
687 | 703 | - name: Start PostgreSQL for Recovery
|
688 |
| - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t 1800" |
| 704 | + ansible.builtin.command: >- |
| 705 | + {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }} |
| 706 | + -o '--config-file={{ postgresql_conf_dir }}/postgresql.conf' |
| 707 | + -o '-c hot_standby=off' |
| 708 | + {% if postgresql_version | int >= 12 %} |
| 709 | + -o '-c restore_command="pgbackrest --stanza={{ pgbackrest_stanza }} archive-get %f %p"' |
| 710 | + {% endif %} |
| 711 | + -o '-c archive_command=/bin/true' |
| 712 | + -l /tmp/pg_recovery_{{ ansible_date_time.date }}.log |
| 713 | + async: "{{ pg_ctl_timeout | default(3600) }}" # run the command asynchronously |
| 714 | + poll: 0 |
| 715 | + register: pg_ctl_start_result |
689 | 716 | when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods)
|
690 | 717 |
|
691 |
| - - name: Waiting for PostgreSQL Recovery to complete (WAL apply) |
| 718 | + - name: Wait for the PostgreSQL start command to complete |
| 719 | + ansible.builtin.async_status: |
| 720 | + jid: "{{ pg_ctl_start_result.ansible_job_id }}" |
| 721 | + register: pg_ctl_start_job_result |
| 722 | + until: pg_ctl_start_job_result.finished |
| 723 | + retries: "{{ (pg_ctl_timeout | default(3600) | int) // 10 }}" |
| 724 | + delay: 10 |
| 725 | + when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) |
| 726 | + |
| 727 | + - name: Wait for PostgreSQL recovery to complete (WAL apply) |
692 | 728 | ansible.builtin.command: >-
|
693 |
| - {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc |
694 |
| - "select pg_is_in_recovery()" |
| 729 | + {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres |
| 730 | + -tAXc "select pg_is_in_recovery()" |
695 | 731 | register: pg_is_in_recovery
|
696 |
| - until: pg_is_in_recovery.stdout != "t" |
697 |
| - retries: 1200 # timeout 10 hours |
| 732 | + until: pg_is_in_recovery.stdout == "f" |
| 733 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
698 | 734 | delay: 30
|
699 | 735 | changed_when: false
|
700 | 736 | failed_when: false
|
701 |
| - when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) |
| 737 | + when: is_master | bool |
| 738 | + |
| 739 | + - name: Check PostgreSQL recovery log |
| 740 | + ansible.builtin.command: "grep -A2 'recovery stopping' /tmp/pg_recovery_{{ ansible_date_time.date }}.log" |
| 741 | + register: pg_recovery_result |
| 742 | + changed_when: false |
| 743 | + failed_when: false |
| 744 | + when: is_master | bool |
| 745 | + |
| 746 | + - name: PostgreSQL recovery details |
| 747 | + ansible.builtin.debug: |
| 748 | + msg: '{{ pg_recovery_result.stdout_lines }}' |
| 749 | + when: pg_recovery_result.stdout_lines is defined |
702 | 750 |
|
703 | 751 | - name: Check that PostgreSQL is stopped
|
704 | 752 | ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}"
|
|
707 | 755 | failed_when: false
|
708 | 756 |
|
709 | 757 | - name: Stop PostgreSQL
|
710 |
| - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t 1800" |
| 758 | + ansible.builtin.command: >- |
| 759 | + {{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }} |
711 | 760 | when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4)
|
712 | 761 | when: patroni_cluster_bootstrap_method == "pgbackrest"
|
713 | 762 | become: true
|
|
718 | 767 | tags: patroni, point_in_time_recovery
|
719 | 768 |
|
720 | 769 | - block: # PITR (custom bootstrap) - disable archive_command
|
721 |
| - - name: Check the patroni.dynamic.json exists |
| 770 | + - name: Check if patroni.dynamic.json exists |
722 | 771 | ansible.builtin.stat:
|
723 | 772 | path: "{{ postgresql_data_dir }}/patroni.dynamic.json"
|
724 | 773 | register: patroni_dynamic_json
|
| 774 | + when: not keep_patroni_dynamic_json | bool |
725 | 775 |
|
726 | 776 | - name: Remove patroni.dynamic.json file
|
727 | 777 | ansible.builtin.file:
|
728 | 778 | path: "{{ postgresql_data_dir }}/patroni.dynamic.json"
|
729 | 779 | state: absent
|
730 |
| - when: patroni_dynamic_json.stat.exists and |
731 |
| - not keep_patroni_dynamic_json|bool |
| 780 | + when: |
| 781 | + - patroni_dynamic_json is defined |
| 782 | + - patroni_dynamic_json.stat is defined |
| 783 | + - patroni_dynamic_json.stat.exists |
732 | 784 |
|
733 | 785 | - name: Edit patroni.dynamic.json | disable archive_command (if enabled)
|
734 | 786 | yedit:
|
735 | 787 | src: "{{ postgresql_data_dir }}/patroni.dynamic.json"
|
736 | 788 | key: postgresql.parameters.archive_command
|
737 | 789 | value: "cd ." # not doing anything yet with WAL-s
|
738 | 790 | content_type: json
|
739 |
| - when: patroni_dynamic_json.stat.exists and |
740 |
| - keep_patroni_dynamic_json|bool and disable_archive_command|bool |
| 791 | + when: disable_archive_command | bool |
741 | 792 |
|
742 | 793 | - name: Edit patroni.yml | disable archive_command (if enabled)
|
743 | 794 | yedit:
|
744 | 795 | src: /etc/patroni/patroni.yml
|
745 | 796 | key: bootstrap.dcs.postgresql.parameters.archive_command
|
746 | 797 | value: "cd ." # not doing anything yet with WAL-s
|
747 |
| - when: disable_archive_command|bool |
| 798 | + when: disable_archive_command | bool |
748 | 799 | when: patroni_cluster_bootstrap_method != "initdb" and
|
749 |
| - (pgbackrest_install|bool or wal_g_install|bool) and |
750 |
| - (existing_pgcluster is not defined or not existing_pgcluster|bool) |
| 800 | + (pgbackrest_install | bool or wal_g_install | bool) and |
| 801 | + (existing_pgcluster is not defined or not existing_pgcluster | bool) |
751 | 802 | become: true
|
752 | 803 | become_user: postgres
|
753 | 804 | tags: patroni, point_in_time_recovery
|
|
791 | 842 | "select pg_is_in_recovery()"
|
792 | 843 | register: pg_is_in_recovery
|
793 | 844 | until: pg_is_in_recovery.stdout == "f"
|
794 |
| - retries: 1200 # timeout 10 hours |
| 845 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
795 | 846 | delay: 30
|
796 | 847 | changed_when: false
|
797 | 848 | failed_when: false
|
798 | 849 | when: patroni_cluster_bootstrap_method == "wal-g"
|
799 | 850 |
|
800 |
| - - name: Check PostgreSQL is started and accepting connections on Master |
| 851 | + - name: Wait for the Standby cluster initialization to complete |
| 852 | + ansible.builtin.uri: |
| 853 | + url: "http://{{ inventory_hostname }}:{{ patroni_restapi_port }}/standby-leader" |
| 854 | + status_code: 200 |
| 855 | + register: standby_leader_result |
| 856 | + until: standby_leader_result.status == 200 |
| 857 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
| 858 | + delay: 30 |
| 859 | + environment: |
| 860 | + no_proxy: "{{ inventory_hostname }}" |
| 861 | + when: |
| 862 | + - (patroni_standby_cluster.host is defined and patroni_standby_cluster.host | length > 0) |
| 863 | + - not ansible_check_mode |
| 864 | + |
| 865 | + - name: Check PostgreSQL is started and accepting connections |
801 | 866 | become: true
|
802 | 867 | become_user: postgres
|
803 | 868 | ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_isready -p {{ postgresql_port }}"
|
|
853 | 918 | {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
|
854 | 919 | "select pg_is_in_recovery()"
|
855 | 920 | register: pg_is_in_recovery
|
856 |
| - until: pg_is_in_recovery.stdout != "t" |
857 |
| - retries: 1200 # timeout 10 hours |
| 921 | + until: pg_is_in_recovery.stdout == "f" |
| 922 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
858 | 923 | delay: 30
|
859 | 924 | changed_when: false
|
860 | 925 | when: is_master | bool
|
|
961 | 1026 | status_code: 200
|
962 | 1027 | register: replica_result
|
963 | 1028 | until: replica_result.status == 200
|
964 |
| - retries: 1200 # timeout 10 hours |
| 1029 | + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours |
965 | 1030 | delay: 30
|
966 | 1031 | environment:
|
967 | 1032 | no_proxy: "{{ inventory_hostname }}"
|
|
0 commit comments