-- https://www.postgresql.org/docs/14/app-pgrewind.html -- start gcloud beta compute --project=celtic-house-266612 instances create postgres --zone=us-central1-a --machine-type=e2-medium --subnet=default --network-tier=PREMIUM --maintenance-policy=MIGRATE --service-account=933982307116-compute@developer.gserviceaccount.com --scopes=https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append --image-family=ubuntu-2004-lts --image-project=ubuntu-os-cloud --boot-disk-size=10GB --boot-disk-type=pd-ssd --boot-disk-device-name=postgres --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --reservation-affinity=any gcloud compute ssh postgres -- поставим докер -- https://docs.docker.com/engine/install/ubuntu/ curl -fsSL https://get.docker.com -o get-docker.sh && sudo sh get-docker.sh && rm get-docker.sh && sudo usermod -aG docker $USER sudo apt-get install -y uidmap dockerd-rootless-setuptool.sh install -- 1 terminal docker run -it --rm -p 5551:5432 --name=db1 --hostname=db1 ubuntu:20.04 bash -- 2 docker run -it --rm -p 5552:5432 --name=db2 --hostname=db2 ubuntu:20.04 bash -- on both containers -- postgres 12 - как думаете почему установится 12 версия?) ln -snf /usr/share/zoneinfo/UTC /etc/localtime && echo UTC > /etc/timezone && apt update && apt install -y postgresql postgresql-contrib iputils-ping sudo vim netcat echo "listen_addresses = '*'" >> /etc/postgresql/12/main/postgresql.conf # необходимо для pg_rewind echo "wal_log_hints = on" >> /etc/postgresql/12/main/postgresql.conf echo "archive_mode = on" >> /etc/postgresql/12/main/postgresql.conf echo "archive_command = 'test ! -f /archive/%f && cp %p /archive/%f'" >> /etc/postgresql/12/main/postgresql.conf # echo "archive_cleanup_command = 'pg_archivecleanup /archive %r'" >> /etc/postgresql/12/main/postgresql.conf # echo "restore_command = 'cp /archive/%f %p'" >> /etc/postgresql/12/main/postgresql.conf echo "host replication replica 0.0.0.0/0 md5" >> /etc/postgresql/12/main/pg_hba.conf echo "host all rewind 0.0.0.0/0 md5" >> /etc/postgresql/12/main/pg_hba.conf mkdir /archive chown -R postgres:postgres /archive -- on host machine # db1 - 172.17.0.2 docker inspect db1 | grep IPAddress # db2 - 172.17.0.3 docker inspect db2 | grep IPAddress -- on first container -- check connectivity by sending pings ping 172.17.0.3 -- 1 terminal -- start postgres pg_ctlcluster 12 main start -- create replica and rewind users with password test123 sudo -u postgres psql -c "CREATE USER replica with replication encrypted password 'test123'" sudo -u postgres psql -c "CREATE USER rewind SUPERUSER encrypted PASSWORD 'test123'" -- create sample database and fill it sudo -u postgres psql -c "create database sample" sudo -u postgres pgbench -i -s 10 sample -- on a second container -- cleanup data directory sudo -u postgres rm -rf /var/lib/postgresql/12/main -- ensure first container is listening on expected port nc -vz 172.17.0.2 5432 -- MANUAL restore cluster from master (it will ask for test123 password of replica user) -- test123 sudo -u postgres pg_basebackup --host=172.17.0.2 --port=5432 --username=replica --pgdata=/var/lib/postgresql/12/main/ --progress --write-recovery-conf --create-slot --slot=replica1 -- waiting for checkpoint -- notes: -- it will ask for test123 password of replica user created earlier -- it might take some time to backup restore 500mb of data -- it will wait for a checkpoint before starting, so run on a master sudo -u postgres psql -c "checkpoint" -- on 2 container -- make sure that connection info is saved cat /var/lib/postgresql/12/main/postgresql.auto.conf -- and that you have standby.signal file in place (existence of this file will force postgres to run as slave) ls -la /var/lib/postgresql/12/main/ | grep standby -- start postgres pg_ctlcluster 12 main start -- and make sure it is up and running - see online,recovery pg_lsclusters -- on a first container - check replication slots sudo -u postgres psql -c "select * from pg_replication_slots" sudo -u postgres psql -c "select * from pg_stat_replication" -- lets create table and fill it with some dummy data sudo -u postgres psql sample -c "create table messages(message text)" sudo -u postgres psql sample -c "insert into messages values('hello')" sudo -u postgres psql sample -c "select * from messages" -- almost immediatelly you should see that table and message on a replica -- Failover -- on a second container -- lets pretend that we lose our master - promote second container as a new master sudo pg_ctlcluster 12 main promote -- standby file should be removed automatically ls -la /var/lib/postgresql/12/main/ | grep standby -- Connection info in postgres.auto.conf will left inact, but it is ok, until there is no standby file cat /var/lib/postgresql/12/main/postgresql.auto.conf -- write records now to 2 server sudo -u postgres psql sample -c "insert into messages values('world')" sudo -u postgres psql sample -c "select * from messages" -- on a 1 container - master is still alive and received one more update sudo -u postgres psql sample -c "insert into messages values('contoso')" sudo -u postgres psql sample -c "select * from messages" -- has splitbrain -- stop 1 postgres pg_ctlcluster 12 main stop -- rewind sudo -u postgres /usr/lib/postgresql/12/bin/pg_rewind --target-pgdata /var/lib/postgresql/12/main --source-server="postgresql://rewind:test123@172.17.0.3:5432/sample" --progress -- if error -- rewind might complain with error like: pg_rewind: error: could not open file "/var/lib/postgresql/12/main/pg_wal/00000001000000000000000A": No such file or directory you gonna need to copy this file from /archive to pg_wal, e.g.: ls -la /archive/ cp /archive/00000001000000000000000A /var/lib/postgresql/12/main/pg_wal/ chown postgres:postgres /var/lib/postgresql/12/main/pg_wal/00000001000000000000000A sudo -u postgres /usr/lib/postgresql/12/bin/pg_rewind --target-pgdata /var/lib/postgresql/12/main --source-server="postgresql://rewind:test123@172.17.0.3:5432/sample" --progress -- now, when we rewinded lets make it slave -- create standy signal touch /var/lib/postgresql/12/main/standby.signal -- and add replication info echo "primary_conninfo = 'user=replica password=test123 host=172.17.0.3 port=5432 sslmode=prefer sslcompression=0 gssencmode=prefer krbsrvname=postgres target_session_attrs=any'" >> /var/lib/postgresql/12/main/postgresql.auto.conf -- adding another relication slot echo "primary_slot_name = 'replica2'" >> /var/lib/postgresql/12/main/postgresql.auto.conf -- create slot on new master -- 2 container sudo -u postgres psql -c "select * from pg_create_physical_replication_slot('replica2')" sudo -u postgres psql -c "select * from pg_replication_slots" -- start postgres on 1 container pg_ctlcluster 12 main start pg_lsclusters -- check that data is synced sudo -u postgres psql sample -c "select * from messages" -- important: just in case check that old replica1 is not here and removed sudo -u postgres psql -c "select * from pg_replication_slots" sudo -u postgres psql -c "select * from pg_drop_replication_slot('replica1')" -- exit from docker sudo docker ps -- удалим наш проект gcloud compute instances delete postgres