diff --git a/.gitignore b/.gitignore index 349c501..0810037 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.o repmgr repmgrd +README.htm* +README.pdf diff --git a/COPYRIGHT b/COPYRIGHT index 5bef748..f5aae95 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,4 @@ -Copyright (c) 2009-2010, 2ndQuadrant Limited +Copyright (c) 2010-2011, 2ndQuadrant Limited All rights reserved. This program is free software: you can redistribute it and/or modify diff --git a/CREDITS b/CREDITS index f8b33cd..78e707b 100644 --- a/CREDITS +++ b/CREDITS @@ -5,3 +5,8 @@ Simon Riggs Greg Smith Robert J. Noles Gabriele Bartolini +Bas van Oostveen +Hannu Krosing +Cédric Villemain +Charles Duffy +Daniel Farina diff --git a/HISTORY b/HISTORY index bbeada8..3a6b891 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,33 @@ -1.0.0 2010-12-05 First public release +1.0.0 2010-12-05 + First public release + +1.1.0b1 2011-02-24 + Fix missing "--force" option in help (Greg Smith) + Correct warning message for wal_keep_segments (Bas van Oostveen) + Add Debian build/usage docs (Bas, Hannu Krosing, Cedric Villemain) + Add Debian .deb packaging (Hannu) + Move configuration data into a structure (Bas, Gabriele Bartolini) + Make rsync options configurable (Bas) + Add syslog as alternate logging destination (Gabriele) + Change from using malloc to static memory allocations (Gabriele) + Add debugging messages after every query (Gabriele) + Parameterize schema name used for repmgr (Gabriele) + Avoid buffer overruns by using snprintf etc. (Gabriele) + Fix use of database query after close (Gabriele) + Add information about progress during "standby clone" (Gabriele) + Fix double free errors in repmgrd (Charles Duffy, Greg) + Make repmgr exit with an error code when encountering an error (Charles) + Standardize on error return codes, use in repmgrd too (Greg) + Add [un]install actions/SQL like most contrib modules (Daniel Farina) + Wrap all string construction and produce error on overflow (Daniel) + Correct freeing of memory from first_wal_segment (Daniel) + Allow creating recovery.conf file with a password (Daniel) + Inform when STANDBY CLONE sees an unused config file (Daniel) + Use 64-bit computation for WAL apply_lag (Greg) + Add info messages for database and general work done (Greg) + Map old verbose flag into a useful setting for the new logger (Greg) + Document repmgrd startup restrictions and log info about them (Greg) + +1.1.0 2011-03-09 + Make options -U, -R and -p not mandatory (Jaime) -1.0.1 2010-12-XX Fix missing "--force" option in help diff --git a/Makefile b/Makefile index 655467a..d225da1 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,11 @@ # # Makefile -# Copyright (c) 2ndQuadrant, 2010 +# Copyright (c) 2ndQuadrant, 2010-2011 -repmgrd_OBJS = dbutils.o config.o repmgrd.o -repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o +repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o +repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o + +DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) @@ -26,11 +28,27 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now +# is overriding pgxs install. install: $(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' +ifneq (,$(DATA)$(DATA_built)) + @for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \ + echo "$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'"; \ + $(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'; \ + done +endif + clean: rm -f *.o rm -f repmgrd rm -f repmgr + +deb: repmgrd repmgr + mkdir -p ./debian/usr/bin + cp repmgrd repmgr ./debian/usr/bin/ + dpkg-deb --build debian + mv debian.deb ../postgresql-repmgr-9.0_1.0.0.deb + diff --git a/README.rst b/README.rst index 18192c9..80b2681 100644 --- a/README.rst +++ b/README.rst @@ -13,20 +13,67 @@ PostgreSQL, the user is expected to manage the high availability part of it. repmgr allows you to monitor and manage your replicated PostgreSQL -databases as a single cluster. - -repmgr includes two components: +databases as a single cluster. repmgr includes two components: * repmgr: command program that performs tasks and then exits + * repmgrd: management and monitoring daemon that watches the cluster + and can automate remote actions. Requirements ------------ repmgr is currently aimed for installation on UNIX-like systems that include -development tools such as gcc and gmake. It also requires that the +development tools such as ``gcc`` and ``gmake``. It also requires that the ``rsync`` utility is available in the PATH of the user running the repmgr -programs. +programs. Some operations also require PostgreSQL components such +as ``pg_config`` and ``pg_ctl`` be in the PATH. + +Introduction to repmgr commands +=============================== + +Suppose we have 3 nodes: node1 (the initial master), node2 and node3. +To make node2 and node3 be standbys of node1, execute this on both nodes +(node2 and node3):: + + repmgr -D /var/lib/pgsql/9.0 standby clone node1 + +In order to get full monitoring and easier state transitions, +you register each of the nodes, by creating a ``repmgr.conf`` file +and executing commands like this on the appropriate nodes:: + + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf --verbose master register + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf --verbose standby register + +Once everything is registered, you start the repmgrd daemon. It +will maintain a view showing the state of all the nodes in the cluster, +including how far they are lagging behind the master. + +If you lose node1 you can then run this on node2:: + + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote + +To make node2 the new master. Then on node3 run:: + + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby follow + +To make node3 follow node2 (rather than node1). + +If now we want to add a new node, we can a prepare a new server (node4) +and run:: + + repmgr -D /var/lib/pgsql/9.0 standby clone node2 + +And if a previously failed node becomes available again, such as +the lost node1 above, you can get it to resynchronize by only copying +over changes made while it was down using. That hapens with what's +called a forced clone, which overwrites existing data rather than +assuming it starts with an empty database directory tree:: + + repmgr -D /var/lib/pgsql/9.0 --force standby clone node1 + +This can be much faster than creating a brand new node that must +copy over every file in the database. Installation Outline ==================== @@ -126,6 +173,97 @@ path either. The following recipe should work:: sudo PATH="/usr/pgsql-9.0/bin:$PATH" make USE_PGXS=1 install +Issues with 32 and 64 bit RPMs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If when building, you receive a series of errors of this form:: + + /usr/bin/ld: skipping incompatible /usr/pgsql-9.0/lib/libpq.so when searching for -lpq + +This is likely because you have both the 32 and 64 bit versions of the +``postgresql90-devel`` package installed. You can check that like this:: + + rpm -qa --queryformat '%{NAME}\t%{ARCH}\n' | grep postgresql90-devel + +And if two packages appear, one for i386 and one for x86_64, that's not supposed +to be allowed. + +This can happen when using the PGDG repo to install that package; +here is an example sessions demonstrating the problem case appearing:: + + # yum install postgresql-devel + .. + Setting up Install Process + Resolving Dependencies + --> Running transaction check + ---> Package postgresql90-devel.i386 0:9.0.2-2PGDG.rhel5 set to be updated + ---> Package postgresql90-devel.x86_64 0:9.0.2-2PGDG.rhel5 set to be updated + --> Finished Dependency Resolution + + Dependencies Resolved + + ========================================================================= + Package Arch Version Repository Size + ========================================================================= + Installing: + postgresql90-devel i386 9.0.2-2PGDG.rhel5 pgdg90 1.5 M + postgresql90-devel x86_64 9.0.2-2PGDG.rhel5 pgdg90 1.6 M + +Note how both the i386 and x86_64 platform architectures are selected for +installation. Your main PostgreSQL package will only be compatible with one of +those, and if the repmgr build finds the wrong postgresql90-devel these +"skipping incompatible" messages appear. + +In this case, you can temporarily remove both packages, then just install the +correct one for your architecture. Example:: + + rpm -e postgresql90-devel --allmatches + yum install postgresql90-devel-9.0.2-2PGDG.rhel5.x86_64 + +Instead just deleting the package from the wrong platform might not leave behind +the correct files, due to the way in which these accidentally happen to interact. +If you already tried to build repmgr before doing this, you'll need to do:: + + make USE_PGXS=1 clean + +To get rid of leftover files from the wrong architecture. + +Notes on Ubuntu, Debian or other Debian-based Builds +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Debian packages of PostgreSQL put ``pg_config`` into the development package +called ``postgresql-server-dev-$version``. + +When building repmgr against a Debian packages build, you may discover that some +development packages are needed as well. You will need the following development +packages installed:: + + sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev + +If your using Debian packages for PostgreSQL and are building repmgr with the +USE_PGXS option you also need to install the corresponding development package:: + + sudo apt-get install postgresql-server-dev-9.0 + +If you build and install repmgr manually it will not be on the system path. The +binaries will be installed in /usr/lib/postgresql/$version/bin/ which is not on +the default path. The reason behind this is that Ubuntu/Debian systems manage +multiple installed versions of PostgreSQL on the same system through a wrapper +called pg_wrapper and repmgr is not (yet) known to this wrapper. + +You can solve this in many different ways, the most Debian like is to make an +alternate for repmgr and repmgrd:: + + sudo update-alternatives --install /usr/bin/repmgr repmgr /usr/lib/postgresql/9.0/bin/repmgr 10 + sudo update-alternatives --install /usr/bin/repmgrd repmgrd /usr/lib/postgresql/9.0/bin/repmgrd 10 + +You can also make a deb package of repmgr using:: + + make USE_PGXS=1 deb + +This will build a Debian package one level up from where you build, normally the +same directory that you have your repmgr/ directory in. + Confirm software was built correctly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,13 +273,18 @@ is available by checking its version:: repmgr --version repmgrd --version - -You may need to include -the full path of the binary instead, such as this RHEL example:: + +You may need to include the full path of the binary instead, such as this +RHEL example:: /usr/pgsql-9.0/bin/repmgr --version /usr/pgsql-9.0/bin/repmgrd --version +Or in this Debian example:: + + /usr/lib/postgresql/9.0/bin/repmgr --version + /usr/lib/postgresql/9.0/bin/repmgrd --version + Below this binary installation base directory is referred to as PGDIR. Set up trusted copy between postgres accounts @@ -154,7 +297,7 @@ on their partner node without a password. First generate a ssh key, using an empty passphrase, and copy the resulting keys and a maching authorization file to a privledged user on the other system:: - [postgres@db1]$ ssh-keygen -t rsa + [postgres@node1]$ ssh-keygen -t rsa Generating public/private rsa key pair. Enter file in which to save the key (/var/lib/pgsql/.ssh/id_rsa): Enter passphrase (empty for no passphrase): @@ -163,25 +306,25 @@ keys and a maching authorization file to a privledged user on the other system:: Your public key has been saved in /var/lib/pgsql/.ssh/id_rsa.pub. The key fingerprint is: aa:bb:cc:dd:ee:ff:aa:11:22:33:44:55:66:77:88:99 postgres@db1.domain.com - [postgres@db1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys - [postgres@db1]$ chmod go-rwx ~/.ssh/* - [postgres@db1]$ cd ~/.ssh - [postgres@db1]$ scp id_rsa.pub id_rsa authorized_keys user@db2: + [postgres@node1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + [postgres@node1]$ chmod go-rwx ~/.ssh/* + [postgres@node1]$ cd ~/.ssh + [postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys postgres@node2: Login as a user on the other system, and install the files into the postgres user's account:: - [user@db2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa - [user@db2 ~]$ sudo mkdir -p ~postgres/.ssh - [user@db2 ~]$ sudo chown postgres.postgres ~postgres/.ssh - [user@db2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh - [user@db2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh + [user@node2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa + [user@node2 ~]$ sudo mkdir -p ~postgres/.ssh + [user@node2 ~]$ sudo chown postgres.postgres ~postgres/.ssh + [user@node2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh + [user@node2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh Now test that ssh in both directions works. You may have to accept some new known hosts in the process. Primary server configuration -============================ +---------------------------- PostgreSQL should have been previously built and installed on the system. Here is a sample of changes to the ``postgresql.conf`` file:: @@ -214,263 +357,295 @@ them as well to allow automatic login. In this case you might use the Don't forget to restart the database server after making all these changes. -Configuration File -================== +Usage walkthrough +================= -``repmgr.conf`` is looked for in the directory repmgrd or repmgr exists. -The configuration file should have 3 lines: +This assumes you've already followed the steps in "Installation Outline" to +install repmgr and repmgrd on the system. -It should have these three parameters: +A normal production installation of ``repmgr`` will normally involve two +different systems running on the same port, typically the default of 5432, +with both using files owned by the ``postgres`` user account. This +walkthrough assumes the following setup: -1. cluster: A string (single quoted) that identify the cluster we are on +* A primary (master) server called "node1," running as the "postgres" user + who is also the owner of the files. This server is operating on port 5432. This + server will be known as "node1" in the cluster "test". -2. node: An integer that identify our node in the cluster +* A secondary (standby) server called "node2," running as the "postgres" user + who is also the owner of the files. This server is operating on port 5432. This + server will be known as "node2" in the cluster "test". -3. conninfo: A string (single quoted) specifying how we can connect to this node's PostgreSQL service +* Another standby server called "node3" with a similar configuration to "node2". -Command line syntax -=================== +* The Postgress installation in each of the above is defined as $PGDATA, + which is represented here as ``/var/lib/pgsql/9.0/data`` + +Creating some sample data +------------------------- -The current supported syntax for the program can be seen using:: +If you already have a database with useful data to replicate, you can +skip this step and use it instead. But if you do not already have +data in this cluster to replication, you can create some like this:: - repmgr --help - -The output from this program looks like this:: + createdb pgbench + pgbench -i -s 10 pgbench + +Examples below will use the database name ``pgbench`` to match this. +Substitute the name of your database instead. Note that the standby +nodes created here will include information for every database in the +cluster, not just the specified one. Needing the database name is +mainly for user authentication purposes. - repmgr: Replicator manager - Usage: - repmgr [OPTIONS] master {register} - repmgr [OPTIONS] standby {register|clone|promote|follow} +Setting up a repmgr user +------------------------ - General options: - --help show this help, then exit - --version output version information, then exit - --verbose output verbose activity information +Make sure that the "standby" user has a role in the database, "pgbench" in this +case, and can login. On "node1":: - Connection options: - -d, --dbname=DBNAME database to connect to - -h, --host=HOSTNAME database server host or socket directory - -p, --port=PORT database server port - -U, --username=USERNAME database user name to connect as + createuser --login --superuser repmgr - Configuration options: - -D, --data-dir=DIR local directory where the files will be copied to - -f, --config_file=PATH path to the configuration file - -R, --remote-user=USERNAME database server username for rsync - -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000) - -F, --force force potentially dangerous operations to happen +Alternately you could start ``psql`` on the pgbench database on "node1" and at +the node1b# prompt type:: - repmgr performs some tasks like clone a node, promote it or making follow another node and then exits. - COMMANDS: - master register - registers the master in a cluster - standby register - registers a standby in a cluster - standby clone [node] - allows creation of a new standby - standby promote - allows manual promotion of a specific standby into a new master in the event of a failover - standby follow - allows the standby to re-point itself to a new master + CREATE ROLE repmgr SUPERUSER LOGIN; -The ``--verbose`` option can be useful in troubleshooting issues with -the program. +The main advantage of the latter is that you can do it remotely to any +system you already have superuser access to. -Commands -======== +Clearing the PostgreSQL installation on the Standby +--------------------------------------------------- -Not all of these commands need the ``repmgr.conf`` file, but they need to be able to -connect to the remote and local databases. +To setup a new streaming replica, startin by removing any PostgreSQL +installation on the existing standby nodes. -You can teach it which is the remote database by using the -h parameter or -as a last parameter in standby clone and standby follow. If you need to specify -a port different then the default 5432 you can specify a -p parameter. -Standby is always considered as localhost and a second -p parameter will indicate -its port if is different from the default one. +* Stop any server on "node2" and "node3". You can confirm the database + servers running using a command like this:: + + ps -eaf | grep postgres + + And looking for the various database server processes: server, logger, + wal writer, and autovacuum launcher. + +* Go to "node2" and "node3" database directories and remove the PostgreSQL installation:: -* master register + cd $PGDATA + rm -rf * - * Registers a master in a cluster, it needs to be executed before any node is - registered + This will delete the entire database installation in ``/var/lib/pgsql/9.0/data``. + Be careful that $PGDATA is defined here; executing ``ls`` to confirm you're + in the right place is always a good idea before executing ``rm``. -* standby register +Testing remote access to the master +----------------------------------- - * Registers a standby in a cluster, it needs to be executed before any repmgrd - is executed +On the "node2" server, first test that you can connect to "node1" the +way repmgr will by executing:: -* standby clone [node to be cloned] + psql -h node1 -U repmgr -d pgbench - * Does a backup via ``rsync`` of the data directory of the primary. And it - creates the recovery file we need to start a new hot standby server. - It doesn't need the ``repmgr.conf`` so it can be executed anywhere on the - new node. You can change to the directory you want the new database - cluster at and execute:: +Possible sources for a problem here include: - ./repmgr standby clone 10.68.1.161 +* Login role specified was not created on "node1" - or run from wherever you are with a full path:: +* The database configuration on "node1" is not listening on a TCP/IP port. + That could be because the ``listen_addresses`` parameter was not updated, + or if it was but the server wasn't restarted afterwards. You can + test this on "node1" itself the same way:: - ./repmgr -D /path/to/new/data/directory standby clone 10.68.1.161 + psql -h node1 -U repmgr -d pgbench - That will make a backup of the primary then you only need to start the server - using a command like:: + With the "-h" parameter forcing a connnection over TCP/IP, rather + than the default UNIX socket method. - pg_ctl -D /your_data_directory_path start +* There is a firewall setup that prevents incoming access to the + PostgreSQL port (defaulting to 5432) used to access "node1". In + this situation you would be able to connect to the "node1" server + on itself, but not from any other host, and you'd just get a timeout + when trying rather than a proper error message. + +* The ``pg_hba.conf`` file does not list appropriate statements to allow + this user to login. In this case you should connect to the server, + but see an error message mentioning the ``pg_hba.conf``. - Note that some installations will also redirect the output log file when - executing ``pg_ctl``. +Cloning the standby +------------------- -* standby promote +With "node1" server running, we want to use the ``clone standby`` command +in repmgr to copy over the entire PostgreSQL database cluster onto the +"node2" server. Execute the clone process with:: - * Allows manual promotion of a specific standby into a new primary in the - event of a failover. This needs to be executed on the same directory - where the ``repmgr.conf`` is in the standby, or you can use the ``-f`` option - to indicate where the ``repmgr.conf`` is at. It doesn't need any - additional arguments:: + repmgr -D $PGDATA -d pgbench -p 5432 -U repmgr -R postgres --verbose standby clone node1 - ./repmgr standby promote +Here "-U" specifies the database user to connect to the master as, while +"-R" specifies what user to run the rsync command as. Potentially you +could leave out one or both of these, in situations where the user and/or +role setup is the same on each node. - That will restart your standby postgresql service. +If this fails with an error message about accessing the master database, +you should return to the previous step and confirm access to "node1" +from "node2" with ``psql``, using the same parameters given to repmgr. -* standby follow +NOTE: you need to have $PGDIR/bin (where the PostgreSQL binaries are installed) +in your path for the above to work. If you don't want that as a permanent +setting, you can temporarily set it before running individual commands like +this:: - * Allows the standby to base itself to the new primary passed as a - parameter. This needs to be executed on the same directory where the - ``repmgr.conf`` is in the standby, or you can use the ``-f`` option - to indicate where the ``repmgr.conf`` is at. Example:: + PATH=$PGDIR/bin:$PATH repmgr -D $PGDATA ... - ./repmgr standby follow +Setup repmgr configuration file +------------------------------- -Examples -======== +Create a directory to store each repmgr configuration in for each node. +In that, there needs to be a ``repmgr.conf`` file for each node in the cluster. +For each node we'll assume this is stored in ``/var/lib/pgsql/repmgr/repmgr.conf`` +following the standard directory structure of a RHEL system. It should contain:: -Suppose we have 3 nodes: node1 (the initial master), node2 and node3 + cluster=test + node=1 + conninfo='host=node1 user=repmgr dbname=pgbench' -To make node2 and node3 be standbys of node1, execute this on both nodes -(node2 and node3):: +On "node2" create the file ``/var/lib/pgsql/repmgr/repmgr.conf`` with:: - repmgr -D /var/lib/postgresql/9.0 standby clone node1 + cluster=test + node=2 + conninfo='host=node2 user=repmgr dbname=pgbench' -If we lose node1 we can run on node2:: +The STANDBY CLONE process should have created a recovery.conf file on +"node2" in the $PGDATA directory that reads as follows:: - repmgr -f /home/postgres/repmgr.conf standby promote + standby_mode = 'on' + primary_conninfo = 'host=node1 port=5432' -Which makes node2 the new master. We then run on node3:: +Registering the master and standby +---------------------------------- - repmgr standby follow +First, register the master by typing on "node1":: -To make node3 follow node2 (rather than node1) + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf --verbose master register -If now we want to add a new node we can a prepare a new server (node4) -and run:: +Then start the "standby" server. - repmgr -D /var/lib/postgresql/9.0 standby clone node2 +You could now register the standby by typing on "node2":: -NOTE: you need to have $PGDIR/bin (where the PostgreSQL binaries are installed) -in your path for the above to work. If you don't want that as a permanent -setting, you can temporarily set it before running individual commands like -this:: + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf --verbose standby register - PATH=$PGDIR/bin:$PATH repmgr standby promote +However, you can instead start repmgrd:: -repmgr Daemon -============= + repmgrd -f /var/lib/pgsql/repmgr/repmgr.conf --verbose > /var/lib/pgsql/repmgr/repmgr.log 2>&1 -Command line syntax -------------------- +Which will automatically register your standby system. And eventually +you need repmgrd running anyway, to save lag monitoring information. +repmgrd will log the deamon activity to the listed file. You can +watch what it is doing with:: -The current supported syntax for the program can be seen using:: + tail -f /var/lib/pgsql/repmgr/repmgr.log - repmgrd --help - -The output from this program looks like this:: +Hit control-C to exit this tail command when you are done. - repmgrd: Replicator manager daemon - Usage: - repmgrd [OPTIONS] - - Options: - --help show this help, then exit - --version output version information, then exit - --verbose output verbose activity information - -f, --config_file=PATH database to connect to - - repmgrd monitors a cluster of servers. +Monitoring and testing +---------------------- -The ``--verbose`` option can be useful in troubleshooting issues with -the program. +At this point, you have a functioning primary on "node1" and a functioning +standby server running on "node2". You can confirm the master knows +about the standby, and that it is keeping it current, by looking at +``repl_status``:: -Setup ------ + postgres@node2 $ psql -x -d pgbench -c "SELECT * FROM repmgr_test.repl_status" + -[ RECORD 1 ]-------------+------------------------------ + primary_node | 1 + standby_node | 2 + last_monitor_time | 2011-02-23 08:19:39.791974-05 + last_wal_primary_location | 0/1902D5E0 + last_wal_standby_location | 0/1902D5E0 + replication_lag | 0 bytes + apply_lag | 0 bytes + time_lag | 00:26:13.30293 -To use the repmgrd (repmgr daemon) to monitor standby so we know how is going -the replication and how far they are from primary, you need to execute the -``repmgr.sql`` script in the postgres database. +Some tests you might do at this point include: -You also need to add a row for every node in the ``repl_node`` table. This work -may be done for you by the daemon itself, as described below. +* Insert some records into the primary server here, confirm they appear + very quickly (within milliseconds) on the standby, and that the + repl_status view advances accordingly. -Lag monitoring --------------- +* Verify that you can run queries against the standby server, but + cannot make insertions into the standby database. -To look at the current lag between primary and each node listed -in ``repl_node``, consult the ``repl_status`` view:: +Simulating the failure of the primary server +-------------------------------------------- - psql -d postgres -c "SELECT * FROM repl_status" +To simulate the loss of the primary server, simply stop the "node1" server. +At this point, the standby contains the database as it existed at the time of +the "failure" of the primary server. If looking at ``repl_status`` on +"node2", you should see the time_lag value increase the longer "node1" +is down. -This view shows the latest monitor info from every node. - -* replication_lag: in bytes. This is how far the latest xlog record - we have received is from master. +Promoting the Standby to be the Primary +--------------------------------------- -* apply_lag: in bytes. This is how far the latest xlog record - we have applied is from the latest record we have received. +Now you can promote the standby server to be the primary, to allow +applications to read and write to the database again, by typing:: -* time_lag: in seconds. How many seconds behind the master is this node. + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf --verbose standby promote -Usage ------ +The server restarts and now has read/write ability. -repmgrd reads the ``repmgr.conf`` file in current directory, or as indicated with -f -parameter. It checks if the standby is in repl_nodes and adds it if not. +Bringing the former Primary up as a Standby +------------------------------------------- -Before you can run the repmgr daemon (repmgrd) you need to register a master -and at least a standby in a cluster using the ``MASTER REGISTER`` and -``STANDBY REGISTER`` commands. +To make the former primary act as a standby, which is necessary before +restoring the original roles, type the following on node1:: -For example, following last example and assuming that ``repmgr.conf`` is in postgres -home directory you will run this on the master:: + repmgr -D $PGDATA -d pgbench -p 5432 -U repmgr -R postgres --verbose --force standby clone node2 - repmgr -f /home/postgres/repmgr.conf master register +Then start the "node1" server, which is now acting as a standby server. +Check -and the same in the standby. +Make sure the record(s) inserted the earlier step are still available on the +now standby (prime). Confirm the database on "node1" is read-only. -The repmgr daemon creates 2 connections: one to the master and another to the -standby. +Restoring the original roles of prime to primary and standby to standby +----------------------------------------------------------------------- -Detailed walkthrough -==================== +Now restore to the original configuration by stopping +"node2" (now acting as a primary), promoting "node1" again to be the +primary server, then bringing up "node2" as a standby with a valid +``recovery.conf`` file. -This assumes you've already followed the steps in "Installation Outline" to -install repmgr and repmgr on the system. - -The following scenario involves two PostgreSQL installations on the same server -hardware, so that additional systems aren't needed for testing. A normal -production installation of ``repmgr`` will normally involve two different -systems running on the same port, typically the default of 5432, -with both using files owned by the ``postgres`` user account. In places where -``127.0.0.1`` is used as a host name below, you would instead use the name of -the relevant host for that parameter. You can usually leave out changes -to the port number in this case too. - -The test setup assumes you might be using the default installation of +Stop the "node2" server:: + + repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote + +Now the original primary, "node1" is acting again as primary. + +Start the "node2" server and type this on "node1":: + + repmgr standby clone --force -h node2 -p 5432 -U postgres -R postgres --verbose + +Verify the roles have reversed by attempting to insert a record on "node" +and on "node1". + +The servers are now again acting as primary on "node1" and standby on "node2". + +Alternate setup: both servers on one host +========================================== + +Another test setup assumes you might be using the default installation of PostgreSQL on port 5432 for some other purpose, and instead relocates these -instances onto different ports running as different users: +instances onto different ports running as different users. In places where +``127.0.0.1`` is used as a host name, a more traditional configuration +would instead use the name of the relevant host for that parameter. +You can usually leave out changes to the port number in this case too. -* A primary (master) server called “prime," with a user as “prime," who is +* A primary (master) server called "prime," with a user as "prime," who is also the owner of the files. This server is operating on port 5433. This - server will be known as “node1" in the cluster “test" + server will be known as "node1" in the cluster "test" -* A standby server called “standby", with a user of “standby", who is the +* A standby server called "standby", with a user of "standby", who is the owner of the files. This server is operating on port 5434. This server - will be known and “node2" on the cluster “test." + will be known and "node2" on the cluster "test." -* A database exists on “prime" called “testdb." +* A database exists on "prime" called "testdb." * The Postgress installation in each of the above is defined as $PGDATA, which is represented here with ``/data/prime`` as the "prime" server and @@ -509,7 +684,7 @@ Setup a streaming replica, strip away any PostgreSQL installation on the existin * Stop both servers. -* Go to “standby" database directory and remove the PostgreSQL installation:: +* Go to "standby" database directory and remove the PostgreSQL installation:: cd $PGDATA rm -rf * @@ -521,33 +696,33 @@ Building the standby Create a directory to store each repmgr configuration in for each node. In that, there needs to be a ``repmgr.conf`` file for each node in the cluster. -For “prime" we'll assume this is stored in ``/home/prime/repmgr`` +For "prime" we'll assume this is stored in ``/home/prime/repmgr`` and it should contain:: cluster=test node=1 - conninfo='host=127.0.0.1 dbname=dbtest' + conninfo='host=127.0.0.1 dbname=testdb' -On “standby" create the file ``/home/standby/repmgr/repmgr.conf`` with:: +On "standby" create the file ``/home/standby/repmgr/repmgr.conf`` with:: cluster=test node=2 - conninfo='host=127.0.0.1 dbname=dbtest' + conninfo='host=127.0.0.1 dbname=testdb' -Next, with “prime" server running, we want to use the ``clone standby`` command +Next, with "prime" server running, we want to use the ``clone standby`` command in repmgr to copy over the entire PostgreSQL database cluster onto the -“standby" server. On the “standby" server, type:: +"standby" server. On the "standby" server, type:: repmgr -D $PGDATA -p 5433 -U prime -R prime --verbose standby clone localhost -Next, we need a recovery.conf file on “standby" in the $PGDATA directory +Next, we need a recovery.conf file on "standby" in the $PGDATA directory that reads as follows:: standby_mode = 'on' primary_conninfo = 'host=127.0.0.1 port=5433' -Make sure that standby has a qualifying role in the database, “testdb" in this -case, and can login. Start ``psql`` on the testdb database on “prime" and at +Make sure that standby has a qualifying role in the database, "testdb" in this +case, and can login. Start ``psql`` on the testdb database on "prime" and at the testdb# prompt type:: CREATE ROLE standby SUPERUSER LOGIN @@ -555,30 +730,40 @@ the testdb# prompt type:: Registering the master and standby ---------------------------------- -First, register the master by typing on “prime":: +First, register the master by typing on "prime":: repmgr -f /home/prime/repmgr/repmgr.conf --verbose master register -On “standby," edit the ``postgresql.conf`` file and change the port to 5434. +On "standby," edit the ``postgresql.conf`` file and change the port to 5434. -Start the “standby" server. +Start the "standby" server. -Register the standby by typing on “standby":: +Register the standby by typing on "standby":: repmgr -f /home/standby/repmgr/repmgr.conf --verbose standby register -At this point, you have a functioning primary on “prime" and a functioning -standby server running on “standby." It's recommended that you insert some -records into the primary server here, then confirm they appear very quickly -(within milliseconds) on the standby. Also verify that one can make queries -against the standby server and cannot make insertions into the standby database. +At this point, you have a functioning primary on "prime" and a functioning +standby server running on "standby." You can confirm the master knows +about the standby, and that it is keeping it current, by running the +following on the master:: + + psql -x -d pgbench -c "SELECT * FROM repmgr_test.repl_status" + +Some tests you might do at this point include: + +* Insert some records into the primary server here, confirm they appear + very quickly (within milliseconds) on the standby, and that the + repl_status view advances accordingly. + +* Verify that you can run queries against the standby server, but + cannot make insertions into the standby database. Simulating the failure of the primary server -------------------------------------------- -To simulate the loss of the primary server, simply stop the “prime" server. +To simulate the loss of the primary server, simply stop the "prime" server. At this point, the standby contains the database as it existed at the time of -the “failure" of the primary server. +the "failure" of the primary server. Promoting the Standby to be the Primary --------------------------------------- @@ -596,44 +781,254 @@ Bringing the former Primary up as a Standby To make the former primary act as a standby, which is necessary before restoring the original roles, type:: - repmgr -U standby -R prime -h 127.0.0.1 -p 5433 -d dbtest --force --verbose standby clone + repmgr -U standby -R prime -h 127.0.0.1 -p 5433 -d testdb --force --verbose standby clone -Stop and restart the “prime" server, which is now acting as a standby server. +Stop and restart the "prime" server, which is now acting as a standby server. Make sure the record(s) inserted the earlier step are still available on the -now standby (prime). Confirm the database on “prime" is read-only. +now standby (prime). Confirm the database on "prime" is read-only. Restoring the original roles of prime to primary and standby to standby ----------------------------------------------------------------------- Now restore to the original configuration by stopping the -“standby" (now acting as a primary), promoting “prime" again to be the -primary server, then bringing up “standby" as a standby with a valid -``recovery.conf`` file on “standby". +"standby" (now acting as a primary), promoting "prime" again to be the +primary server, then bringing up "standby" as a standby with a valid +``recovery.conf`` file on "standby". -Stop the “standby" server:: +Stop the "standby" server:: repmgr -f /home/prime/repmgr/repmgr.conf standby promote -Now the original primary, “prime" is acting again as primary. +Now the original primary, "prime" is acting again as primary. -Start the “standby" server and type this on “prime":: +Start the "standby" server and type this on "prime":: repmgr standby clone --force -h 127.0.0.1 -p 5434 -U prime -R standby --verbose -Stop the “standby" and change the port to be 5434 in the ``postgresql.conf`` +Stop the "standby" and change the port to be 5434 in the ``postgresql.conf`` file. -Verify the roles have reversed by attempting to insert a record on “standby" -and on “prime." +Verify the roles have reversed by attempting to insert a record on "standby" +and on "prime." -The servers are now again acting as primary on “prime" and standby on “standby". +The servers are now again acting as primary on "prime" and standby on "standby". + +Configuration and command reference +=================================== + +Configuration File +------------------ + +``repmgr.conf`` is looked for in the directory repmgrd or repmgr exists in. +The configuration file should have 3 lines: + +1. cluster: A string (single quoted) that identify the cluster we are on + +2. node: An integer that identify our node in the cluster + +3. conninfo: A string (single quoted) specifying how we can connect to this node's PostgreSQL service + +repmgr +------ + +Command line syntax +~~~~~~~~~~~~~~~~~~~ + +The current supported syntax for the program can be seen using:: + + repmgr --help + +The output from this program looks like this:: + + repmgr: Replicator manager + Usage: + repmgr [OPTIONS] master {register} + repmgr [OPTIONS] standby {register|clone|promote|follow} + + General options: + --help show this help, then exit + --version output version information, then exit + --verbose output verbose activity information + + Connection options: + -d, --dbname=DBNAME database to connect to + -h, --host=HOSTNAME database server host or socket directory + -p, --port=PORT database server port + -U, --username=USERNAME database user name to connect as + + Configuration options: + -D, --data-dir=DIR local directory where the files will be copied to + -f, --config_file=PATH path to the configuration file + -R, --remote-user=USERNAME database server username for rsync + -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000) + -F, --force force potentially dangerous operations to happen + + repmgr performs some tasks like clone a node, promote it or making follow another node and then exits. + COMMANDS: + master register - registers the master in a cluster + standby register - registers a standby in a cluster + standby clone [node] - allows creation of a new standby + standby promote - allows manual promotion of a specific standby into a new master in the event of a failover + standby follow - allows the standby to re-point itself to a new master + +The ``--verbose`` option can be useful in troubleshooting issues with +the program. + +repmgr commands +--------------- + +Not all of these commands need the ``repmgr.conf`` file, but they need to be able to +connect to the remote and local databases. + +You can teach it which is the remote database by using the -h parameter or +as a last parameter in standby clone and standby follow. If you need to specify +a port different then the default 5432 you can specify a -p parameter. +Standby is always considered as localhost and a second -p parameter will indicate +its port if is different from the default one. + +* master register + + * Registers a master in a cluster, it needs to be executed before any + standby nodes are registered + +* standby register + + * Registers a standby in a cluster, it needs to be executed before + repmgrd will function on the node. + +* standby clone [node to be cloned] + + * Does a backup via ``rsync`` of the data directory of the primary. And it + creates the recovery file we need to start a new hot standby server. + It doesn't need the ``repmgr.conf`` so it can be executed anywhere on the + new node. You can change to the directory you want the new database + cluster at and execute:: + + ./repmgr standby clone node1 + + or run from wherever you are with a full path:: + + ./repmgr -D /path/to/new/data/directory standby clone node1 + + That will make a backup of the primary then you only need to start the server + using a command like:: + + pg_ctl -D /your_data_directory_path start + + Note that some installations will also redirect the output log file when + executing ``pg_ctl``; check the server startup script you are using + and try to match what it does. + +* standby promote + + * Allows manual promotion of a specific standby into a new primary in the + event of a failover. This needs to be executed on the same directory + where the ``repmgr.conf`` is in the standby, or you can use the ``-f`` option + to indicate where the ``repmgr.conf`` is at. It doesn't need any + additional arguments:: + + ./repmgr standby promote + + That will restart your standby postgresql service. + +* standby follow + + * Allows the standby to base itself to the new primary passed as a + parameter. This needs to be executed on the same directory where the + ``repmgr.conf`` is in the standby, or you can use the ``-f`` option + to indicate where the ``repmgr.conf`` is at. Example:: + + ./repmgr standby follow + +repmgrd Daemon +-------------- + +Command line syntax +~~~~~~~~~~~~~~~~~~~ + +The current supported syntax for the program can be seen using:: + + repmgrd --help + +The output from this program looks like this:: + + repmgrd: Replicator manager daemon + Usage: + repmgrd [OPTIONS] + + Options: + --help show this help, then exit + --version output version information, then exit + --verbose output verbose activity information + -f, --config_file=PATH database to connect to + + repmgrd monitors a cluster of servers. + +The ``--verbose`` option can be useful in troubleshooting issues with +the program. + +Usage +----- + +repmgrd reads the ``repmgr.conf`` file in current directory, or as +indicated with -f parameter. If run on a standby, it checks if that +standby is in ``repl_nodes`` and adds it if not. + +Before you can run repmgrd you need to register a master in a cluster +using the ``MASTER REGISTER`` command. If run on a master, +repmgrd will exit, as it has nothing to do on them yet. It is only +targeted at running on standby servers currently. If converting +a former master into a standby, you will need to start repmgrd +in order to make it fully operational in its new role. + +The repmgr daemon creates 2 connections: one to the master and another to the +standby. + +Lag monitoring +-------------- + +repmgrd helps monitor a set of master and standby servers. You can +see which node is the current master, as well as how far behind each +is from current. + +To look at the current lag between primary and each node listed +in ``repl_node``, consult the ``repl_status`` view:: + + psql -d postgres -c "SELECT * FROM repmgr_test.repl_status" + +This view shows the latest monitor info from every node. + +* replication_lag: in bytes. This is how far the latest xlog record + we have received is from master. + +* apply_lag: in bytes. This is how far the latest xlog record + we have applied is from the latest record we have received. + +* time_lag: in seconds. How many seconds behind the master is this node. + +Error codes +----------- + +When the repmgr or repmgrd program exits, it will set one of the +following + +* SUCCESS 0: Program ran successfully. +* ERR_BAD_CONFIG 1: One of the configuration checks the program makes failed. +* ERR_BAD_RSYNC 2: An rsync call made by the program returned an error. +* ERR_STOP_BACKUP 3: A ``pg_stop_backup()`` call made by the program didn't succeed. +* ERR_NO_RESTART 4: An attempt to restart a PostgreSQL instance failed. +* ERR_NEEDS_XLOG 5: Could note create the ``pg_xlog`` directory when cloning. +* ERR_DB_CON 6: Error when trying to connect to a database. +* ERR_DB_QUERY 7: Error executing a database query. +* ERR_PROMOTED 8: Exiting program because the node has been promoted to master. +* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected. License and Contributions ========================= repmgr is licensed under the GPL v3. All of its code and documentation is -Copyright 2010, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for +Copyright 2010-2011, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for details. Contributions to repmgr are welcome, and listed in the file CREDITS. @@ -642,3 +1037,13 @@ assignment and a disclaimer of any work-for-hire ownership claims from the employer of the developer. This lets us make sure that all of the repmgr distribution remains free code. Please contact info@2ndQuadrant.com for a copy of the relevant Copyright Assignment Form. + +Code style +---------- + +Code in repmgr is formatted to a consistent style using the following command:: + + astyle --style=ansi --indent=tab --suffix=none *.c *.h + +Contributors should reformat their code similarly before submitting code to +the project, in order to minimize merge conflicts with other work. diff --git a/TODO b/TODO new file mode 100644 index 0000000..276fce4 --- /dev/null +++ b/TODO @@ -0,0 +1,21 @@ +Known issues in repmgr +====================== + +* The check for whether ``wal_keep_segments`` is considered large enough + does a string comparison rather than an integer one. It can give both + false positive (setting is large enough but flagged as too small) and + false negative (setting is too small but not noted as such) errors. + +* When running repmgr against a remote machine, operations that start + the database server using the ``pg_ctl`` command may accidentally + terminate after their associated ssh session ends. + +* After running repmgrd as a regular foreground application, hitting + control-C causes the program to crash. + +Planned feature improvements +============================ + +* Before running ``pg_start_backup()``, a sanity check that there is a + a working ssh connection to the destination would help find + configuration errors before disturbing the database. diff --git a/check_dir.c b/check_dir.c index 26b0733..ccf94ab 100644 --- a/check_dir.c +++ b/check_dir.c @@ -1,6 +1,6 @@ /* * check_dir.c - Directories management functions - * Copyright (C) 2ndQuadrant, 2010 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,9 +24,12 @@ #include #include +/* NB: postgres_fe must be included BEFORE check_dir */ #include "postgres_fe.h" #include "check_dir.h" +#include "strutil.h" +#include "log.h" static int mkdir_p(char *path, mode_t omode); @@ -64,7 +67,7 @@ check_dir(char *dir) } else { - result = 2; /* not empty */ + result = 2; /* not empty */ break; } } @@ -96,7 +99,7 @@ create_directory(char *dir) if (mkdir_p(dir, 0700) == 0) return true; - fprintf(stderr, _("Could not create directory \"%s\": %s\n"), + log_err(_("Could not create directory \"%s\": %s\n"), dir, strerror(errno)); return false; @@ -111,7 +114,7 @@ set_directory_permissions(char *dir) /* function from initdb.c */ -/* source stolen from FreeBSD /src/bin/mkdir/mkdir.c and adapted */ +/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */ /* * this tries to build all the elements of a path to a directory a la mkdir -p @@ -219,10 +222,11 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *dir) { - char path[8192]; - struct stat sb; + const size_t buf_sz = 8192; + char path[buf_sz]; + struct stat sb; - sprintf(path, "%s/PG_VERSION", dir); + xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); return (stat(path, &sb) == 0) ? true : false; } diff --git a/check_dir.h b/check_dir.h index 9acac47..37a14fe 100644 --- a/check_dir.h +++ b/check_dir.h @@ -1,6 +1,6 @@ /* * check_dir.h - * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,12 @@ * */ +#ifndef _REPMGR_CHECK_DIR_H_ +#define _REPMGR_CHECK_DIR_H_ + int check_dir(char *dir); bool create_directory(char *dir); bool set_directory_permissions(char *dir); bool is_pg_dir(char *dir); + +#endif diff --git a/config.c b/config.c index 4499509..3754aac 100644 --- a/config.c +++ b/config.c @@ -1,6 +1,6 @@ /* * config.c - Functions to parse the config file - * Copyright (C) 2ndQuadrant, 2010 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,12 +17,12 @@ * */ +#include "config.h" #include "repmgr.h" - -#define MAXLINELENGTH 4096 +#include "strutil.h" void -parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo) +parse_config(const char* config_file, t_configuration_options* options) { char *s, buff[MAXLINELENGTH]; char name[MAXLEN]; @@ -30,8 +30,21 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni FILE *fp = fopen (config_file, "r"); + /* Initialize */ + memset(options->cluster_name, 0, sizeof(options->cluster_name)); + options->node = -1; + memset(options->conninfo, 0, sizeof(options->conninfo)); + memset(options->rsync_options, 0, sizeof(options->rsync_options)); + + /* + * Since some commands don't require a config file at all, not + * having one isn't necessarily a problem. + */ if (fp == NULL) + { + fprintf(stderr, _("Did not find the configuration file '%s', continuing\n"), config_file); return; + } /* Read next line */ while ((s = fgets (buff, sizeof buff, fp)) != NULL) @@ -45,18 +58,38 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni /* Copy into correct entry in parameters struct */ if (strcmp(name, "cluster") == 0) - strncpy (cluster_name, value, MAXLEN); + strncpy (options->cluster_name, value, MAXLEN); else if (strcmp(name, "node") == 0) - *node = atoi(value); + options->node = atoi(value); else if (strcmp(name, "conninfo") == 0) - strncpy (conninfo, value, MAXLEN); + strncpy (options->conninfo, value, MAXLEN); + else if (strcmp(name, "rsync_options") == 0) + strncpy (options->rsync_options, value, QUERY_STR_LEN); + else if (strcmp(name, "loglevel") == 0) + strncpy (options->loglevel, value, MAXLEN); + else if (strcmp(name, "logfacility") == 0) + strncpy (options->logfacility, value, MAXLEN); else printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value); } - /* Close file */ fclose (fp); + + /* Check config settings */ + if (strnlen(options->cluster_name, MAXLEN)==0) + { + fprintf(stderr, "Cluster name is missing. " + "Check the configuration file.\n"); + exit(ERR_BAD_CONFIG); + } + + if (options->node == -1) + { + fprintf(stderr, "Node information is missing. " + "Check the configuration file.\n"); + exit(ERR_BAD_CONFIG); + } } char * diff --git a/config.h b/config.h index 9b2ea3f..14af2ca 100644 --- a/config.h +++ b/config.h @@ -1,6 +1,6 @@ /* * config.h - * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +17,24 @@ * */ -void parse_config(const char *config_file, char *cluster_name, int *node, char *service); +#ifndef _REPMGR_CONFIG_H_ +#define _REPMGR_CONFIG_H_ + +#include "repmgr.h" +#include "strutil.h" + +typedef struct +{ + char cluster_name[MAXLEN]; + int node; + char conninfo[MAXLEN]; + char loglevel[MAXLEN]; + char logfacility[MAXLEN]; + char rsync_options[QUERY_STR_LEN]; +} t_configuration_options; + +void parse_config(const char* config_file, t_configuration_options* options); void parse_line(char *buff, char *name, char *value); char *trim(char *s); + +#endif diff --git a/dbutils.c b/dbutils.c index 38b8c05..c3a3be2 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1,6 +1,6 @@ /* * dbutils.c - Database connection/management functions - * Copyright (C) 2ndQuadrant, 2010 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,9 +18,8 @@ */ #include "repmgr.h" - -#define MAXQUERY 8192 -#define MAXCONNINFO 1024 +#include "strutil.h" +#include "log.h" PGconn * establishDBConnection(const char *conninfo, const bool exit_on_error) @@ -31,19 +30,39 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) /* Check to see that the backend connection was successfully made */ if ((PQstatus(conn) != CONNECTION_OK)) { - fprintf(stderr, "Connection to database failed: %s", + log_err(_("Connection to database failed: %s\n"), PQerrorMessage(conn)); + if (exit_on_error) { PQfinish(conn); - exit(1); + exit(ERR_DB_CON); } } return conn; } +PGconn * +establishDBConnectionByParams(const char *keywords[], const char *values[],const bool exit_on_error) +{ + /* Make a connection to the database */ + PGconn *conn = PQconnectdbParams(keywords, values, true); + + /* Check to see that the backend connection was successfully made */ + if ((PQstatus(conn) != CONNECTION_OK)) + { + log_err(_("Connection to database failed: %s\n"), + PQerrorMessage(conn)); + if (exit_on_error) + { + PQfinish(conn); + exit(ERR_DB_CON); + } + } + return conn; +} bool is_standby(PGconn *conn) @@ -52,12 +71,14 @@ is_standby(PGconn *conn) bool result; res = PQexec(conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn)); + log_err(_("Can't query server mode: %s"), + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(1); + exit(ERR_DB_QUERY); } if (strcmp(PQgetvalue(res, 0, 0), "f") == 0) @@ -79,51 +100,61 @@ pg_version(PGconn *conn, char* major_version) { PGresult *res; - int major_version1; - char *major_version2; + int major_version1; + char *major_version2; + + res = PQexec(conn, + "WITH pg_version(ver) AS " + "(SELECT split_part(version(), ' ', 2)) " + "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) " + "FROM pg_version"); - res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) " - "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); + log_err(_("Version check PQexec failed: %s"), + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(1); + exit(ERR_DB_QUERY); } + major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version2 = PQgetvalue(res, 0, 1); - PQclear(res); if (major_version1 >= 9) { /* form a major version string */ - snprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, major_version2); + xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, + major_version2); } else strcpy(major_version, ""); + PQclear(res); + return major_version; } bool -guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value) +guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value) { PGresult *res; - char sqlquery[MAXQUERY]; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT true FROM pg_settings " - " WHERE name = '%s' AND setting %s '%s'", - parameter, op, value); + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " + " WHERE name = '%s' AND setting %s '%s'", + parameter, op, value); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); + log_err(_("GUC setting check PQexec failed: %s"), + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(1); + exit(ERR_DB_QUERY); } if (PQntuples(res) == 0) { @@ -140,19 +171,22 @@ const char * get_cluster_size(PGconn *conn) { PGresult *res; - const char *size; - char sqlquery[MAXQUERY]; + const char *size; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " - " FROM pg_database "); + sqlquery_snprintf( + sqlquery, + "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " + " FROM pg_database "); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); + log_err(_("Get cluster size PQexec failed: %s"), + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(1); + exit(ERR_DB_QUERY); } size = PQgetvalue(res, 0, 0); PQclear(res); @@ -162,29 +196,63 @@ get_cluster_size(PGconn *conn) /* * get a connection to master by reading repl_nodes, creating a connection * to each node (one at a time) and finding if it is a master or a standby + * + * NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to + * point to allocated memory of MAXCONNINFO in length, and the master server + * connection string is placed there. */ PGconn * -getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) +getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out) { - PGconn *master_conn = NULL; - PGresult *res1; - PGresult *res2; - char sqlquery[MAXQUERY]; - char master_conninfo[MAXCONNINFO]; + PGconn *master_conn = NULL; + PGresult *res1; + PGresult *res2; + char sqlquery[QUERY_STR_LEN]; + char master_conninfo_stack[MAXCONNINFO]; + char *master_conninfo = &*master_conninfo_stack; + char schema_str[MAXLEN]; + char schema_quoted[MAXLEN]; + int i; + /* + * If the caller wanted to get a copy of the connection info string, sub + * out the local stack pointer for the pointer passed by the caller. + */ + if (master_conninfo_out != NULL) + master_conninfo = master_conninfo_out; + + /* + * XXX: This is copied in at least two other procedures + * + * Assemble the unquoted schema name + */ + maxlen_snprintf(schema_str, "repmgr_%s", cluster); + { + char *identifier = PQescapeIdentifier(standby_conn, schema_str, + strlen(schema_str)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* find all nodes belonging to this cluster */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE cluster = '%s' and id <> %d", - cluster, cluster, id); + log_info(_("finding node list for cluster '%s'\n"), + cluster); + + sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " + " WHERE cluster = '%s' and id <> %d", + schema_quoted, cluster, id); res1 = PQexec(standby_conn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn)); + log_err(_("Can't get nodes info: %s\n"), + PQerrorMessage(standby_conn)); PQclear(res1); PQfinish(standby_conn); - exit(1); + exit(ERR_DB_QUERY); } for (i = 0; i < PQntuples(res1); i++) @@ -192,19 +260,24 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) /* initialize with the values of the current node being processed */ *master_id = atoi(PQgetvalue(res1, i, 0)); strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO); + log_info(_("checking role of cluster node '%s'\n"), + master_conninfo); master_conn = establishDBConnection(master_conninfo, false); + if (PQstatus(master_conn) != CONNECTION_OK) continue; /* - * I can't use the is_standby() function here because on error that - * function closes the connection i pass and exit, but i still need to close - * standby_conn + * Can't use the is_standby() function here because on error that + * function closes the connection passed and exits. This still + * needs to close master_conn first. */ res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res2) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn)); + log_err(_("Can't get recovery state from this node: %s\n"), + PQerrorMessage(master_conn)); PQclear(res2); PQfinish(master_conn); continue; @@ -229,7 +302,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) /* If we finish this loop without finding a master then * we doesn't have the info or the master has failed (or we * reached max_connections or superuser_reserved_connections, - * anything else i'm missing?), + * anything else I'm missing?). + * * Probably we will need to check the error to know if we need * to start failover procedure or just fix some situation on the * standby. @@ -237,4 +311,3 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) PQclear(res1); return NULL; } - diff --git a/dbutils.h b/dbutils.h index cc5830d..7eb68ed 100644 --- a/dbutils.h +++ b/dbutils.h @@ -1,6 +1,6 @@ /* * dbutils.h - * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,9 +17,19 @@ * */ +#ifndef _REPMGR_DBUTILS_H_ +#define _REPMGR_DBUTILS_H_ + PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error); -bool is_standby(PGconn *conn); +PGconn *establishDBConnectionByParams(const char *keywords[], + const char *values[], + const bool exit_on_error); +bool is_standby(PGconn *conn); char *pg_version(PGconn *conn, char* major_version); -bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); -const char *get_cluster_size(PGconn *conn); -PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id); +bool guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value); +const char *get_cluster_size(PGconn *conn); +PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out); + +#endif diff --git a/debian/DEBIAN/control b/debian/DEBIAN/control new file mode 100644 index 0000000..6fc360d --- /dev/null +++ b/debian/DEBIAN/control @@ -0,0 +1,9 @@ +Package: repmgr +Version: 1.0-1 +Section: database +Priority: optional +Architecture: all +Depends: rsync, postgresql-9.0 +Maintainer: Greg Smith +Description: PostgreSQL replication setup, magament and monitoring + has two main executables diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..c82e216 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +repmgr (1.1.0-1ppa1ubuntu1) lucid; urgency=low + + * Initial Debian version. + + -- Daniel Farina Tue, 22 Mar 2011 20:38:24 -0700 diff --git a/debian/packages b/debian/packages new file mode 100644 index 0000000..3d3091e --- /dev/null +++ b/debian/packages @@ -0,0 +1,43 @@ +Source: repmgr +Section: database +Priority: extra +Maintainer: Daniel Farina +Build-Depends: git-buildpackage (>= 0.4.57), yada (>= 0.55), + postgresql-server-dev-9.0, libperl-dev, tcl8.5-dev, libedit-dev, + libssl-dev, zlib1g-dev | libz-dev, libpam0g-dev | libpam-dev, + libxml2-dev, libkrb5-dev, libldap2-dev, libxslt1-dev, + libossp-uuid-dev, python-dev +Standards-Version: 3.9.1 +Upstream-Source: https://github.com/fdr/repmgr/tarball/7b0a142075df9e4b1eb8eb054341b4fc6b2a5c43 +Homepage: http://projects.2ndquadrant.com/repmgr +Description: PostgreSQL 9.0 Replication Manager +Copyright: GPL-3 + Copyright 2010-2011 2ndQuadrent +Major-Changes: + Introduced many bugs by not editing debian/packages appropriately. +Build: sh + export PGXS90=$(/usr/lib/postgresql/9.0/bin/pg_config --pgxs) + make all USE_PGXS=1 PGXS="${PGXS90}" +Clean: sh + export PGXS90=$(/usr/lib/postgresql/9.0/bin/pg_config --pgxs) + make clean USE_PGXS=1 PGXS="${PGXS84}" + +Package: postgresql-9.0-repmgr +Architecture: any +Depends: [/usr/lib/postgresql/9.0/bin/*], postgresql-9.0 +Description: PostgreSQL 9.0 Replication Manager + PostgreSQL 9.0 allow us to have replicated Hot Standby servers which + we can query and/or use for high availability. While the main + components of the feature are included with PostgreSQL, the user is + expected to manage the high availability part of it. repmgr allows + you to monitor and manage your replicated PostgreSQL databases as a + single cluster. +Install: sh + yada install -bin -into /usr/lib/postgresql/9.0/bin repmgrd + yada install -bin -into /usr/lib/postgresql/9.0/bin repmgr + . + yada symlink -bin -as repmgr ../share/postgresql-common/pg_wrapper + yada symlink -bin -as repmgrd ../share/postgresql-common/pg_wrapper + . + yada install -data -into /usr/share/postgresql/9.0/contrib repmgr.sql + yada install -data -into /usr/share/postgresql/9.0/contrib uninstall_repmgr.sql diff --git a/errcode.h b/errcode.h new file mode 100644 index 0000000..49433da --- /dev/null +++ b/errcode.h @@ -0,0 +1,37 @@ +/* + * errcode.h + * Copyright (C) 2ndQuadrant, 2011 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef _ERRCODE_H_ +#define _ERRCODE_H_ + +/* Exit return code */ + +#define SUCCESS 0 +#define ERR_BAD_CONFIG 1 +#define ERR_BAD_RSYNC 2 +#define ERR_STOP_BACKUP 3 +#define ERR_NO_RESTART 4 +#define ERR_NEEDS_XLOG 5 +#define ERR_DB_CON 6 +#define ERR_DB_QUERY 7 +#define ERR_PROMOTED 8 +#define ERR_BAD_PASSWORD 9 +#define ERR_STR_OVERFLOW 10 + +#endif /* _ERRCODE_H_ */ diff --git a/log.c b/log.c new file mode 100644 index 0000000..d9e5ed3 --- /dev/null +++ b/log.c @@ -0,0 +1,213 @@ +/* + * log.c - Logging methods + * Copyright (C) 2ndQuadrant, 2010-2011 + * + * This module is a set of methods for logging (currently only syslog) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "repmgr.h" + +#include + +#ifdef HAVE_SYSLOG +#include +#include +#endif + +#include "log.h" + +#define DEFAULT_IDENT "repmgr" +#ifdef HAVE_SYSLOG +#define DEFAULT_SYSLOG_FACILITY LOG_LOCAL0 +#endif + +/* #define REPMGR_DEBUG */ + +static int detect_log_level(const char* level); +static int detect_log_facility(const char* facility); + +int log_type = REPMGR_STDERR; +int log_level = LOG_NOTICE; + +bool logger_init(const char* ident, const char* level, const char* facility) +{ + + int l; + int f; + +#ifdef HAVE_SYSLOG + int syslog_facility = DEFAULT_SYSLOG_FACILITY; +#endif + +#ifdef REPMGR_DEBUG + printf("Logger initialisation (Level: %s, Facility: %s)\n", level, facility); +#endif + + if (!ident) + { + ident = DEFAULT_IDENT; + } + + if (level && *level) + { + l = detect_log_level(level); +#ifdef REPMGR_DEBUG + printf("Assigned level for logger: %d\n", l); +#endif + + if (l > 0) + log_level = l; + else + stderr_log_warning(_("Cannot detect log level %s (use any of DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level); + } + + if (facility && *facility) + { + + f = detect_log_facility(facility); +#ifdef REPMGR_DEBUG + printf("Assigned facility for logger: %d\n", f); +#endif + + if (f == 0) + { + /* No syslog requested, just stderr */ +#ifdef REPMGR_DEBUG + printf(_("Use stderr for logging\n")); +#endif + } + else if (f == -1) + { + stderr_log_warning(_("Cannot detect log facility %s (use any of LOCAL0, LOCAL1, ..., LOCAL7, USER or STDERR)\n"), facility); + } +#ifdef HAVE_SYSLOG + else + { + syslog_facility = f; + log_type = REPMGR_SYSLOG; + } +#endif + } + +#ifdef HAVE_SYSLOG + + if (log_type == REPMGR_SYSLOG) + { + setlogmask (LOG_UPTO (log_level)); + openlog (ident, LOG_CONS | LOG_PID | LOG_NDELAY, syslog_facility); + + stderr_log_notice(_("Setup syslog (level: %s, facility: %s)\n"), level, facility); + } + +#endif + + return true; + +} + +bool logger_shutdown(void) +{ + +#ifdef HAVE_SYSLOG + if (log_type == REPMGR_SYSLOG) + closelog(); +#endif + + return true; +} + +/* + * Set a minimum logging level. Intended for command line verbosity + * options, which might increase requested logging over what's specified + * in the regular configuration file. + */ +void logger_min_verbose(int minimum) +{ + if (log_level < minimum) + log_level = minimum; +} + +int detect_log_level(const char* level) +{ + if (!strcmp(level, "DEBUG")) + return LOG_DEBUG; + if (!strcmp(level, "INFO")) + return LOG_INFO; + if (!strcmp(level, "NOTICE")) + return LOG_NOTICE; + if (!strcmp(level, "WARNING")) + return LOG_WARNING; + if (!strcmp(level, "ERR")) + return LOG_ERR; + if (!strcmp(level, "ALERT")) + return LOG_ALERT; + if (!strcmp(level, "CRIT")) + return LOG_CRIT; + if (!strcmp(level, "EMERG")) + return LOG_EMERG; + + return 0; +} + +int detect_log_facility(const char* facility) +{ + int local = 0; + if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6) + { + + local = atoi (&facility[5]); + + switch (local) + { + case 0: + return LOG_LOCAL0; + break; + case 1: + return LOG_LOCAL1; + break; + case 2: + return LOG_LOCAL2; + break; + case 3: + return LOG_LOCAL3; + break; + case 4: + return LOG_LOCAL4; + break; + case 5: + return LOG_LOCAL5; + break; + case 6: + return LOG_LOCAL6; + break; + case 7: + return LOG_LOCAL7; + break; + } + + } + else if (!strcmp(facility, "USER")) + { + return LOG_USER; + } + else if (!strcmp(facility, "STDERR")) + { + return 0; + } + + return -1; +} diff --git a/log.h b/log.h new file mode 100644 index 0000000..dc9d784 --- /dev/null +++ b/log.h @@ -0,0 +1,121 @@ +/* + * log.h + * Copyright (c) 2ndQuadrant, 2010-2011 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef _REPMGR_LOG_H_ +#define _REPMGR_LOG_H_ + +#include "repmgr.h" + +#define REPMGR_SYSLOG 1 +#define REPMGR_STDERR 2 + +/* Standard error logging */ +#define stderr_log_debug(...) if (log_level >= LOG_DEBUG) fprintf(stderr, __VA_ARGS__) +#define stderr_log_info(...) if (log_level >= LOG_INFO) fprintf(stderr, __VA_ARGS__) +#define stderr_log_notice(...) if (log_level >= LOG_NOTICE) fprintf(stderr, __VA_ARGS__) +#define stderr_log_warning(...) if (log_level >= LOG_WARNING) fprintf(stderr, __VA_ARGS__) +#define stderr_log_err(...) if (log_level >= LOG_ERR) fprintf(stderr, __VA_ARGS__) +#define stderr_log_crit(...) if (log_level >= LOG_CRIT) fprintf(stderr, __VA_ARGS__) +#define stderr_log_alert(...) if (log_level >= LOG_ALERT) fprintf(stderr, __VA_ARGS__) +#define stderr_log_emerg(...) if (log_level >= LOG_EMERG) fprintf(stderr, __VA_ARGS__) + +#ifdef HAVE_SYSLOG + +#include + +#define log_debug(...) \ + if (log_type == REPMGR_SYSLOG) \ + syslog(LOG_DEBUG, __VA_ARGS__); \ + else \ + stderr_log_debug(__VA_ARGS__); + +#define log_info(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_INFO, __VA_ARGS__); \ + else stderr_log_info(__VA_ARGS__); \ + } + +#define log_notice(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_NOTICE, __VA_ARGS__); \ + else stderr_log_notice(__VA_ARGS__); \ + } + +#define log_warning(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_WARNING, __VA_ARGS__); \ + else stderr_log_warning(__VA_ARGS__); \ + } + +#define log_err(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_ERR, __VA_ARGS__); \ + else stderr_log_err(__VA_ARGS__); \ + } + +#define log_crit(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_CRIT, __VA_ARGS__); \ + else stderr_log_crit(__VA_ARGS__); \ + } + +#define log_alert(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_ALERT, __VA_ARGS__); \ + else stderr_log_alert(__VA_ARGS__); \ + } + +#define log_emerg(...) \ + { \ + if (log_type == REPMGR_SYSLOG) syslog(LOG_ALERT, __VA_ARGS__); \ + else stderr_log_alert(__VA_ARGS__); \ + } + +#else + +#define LOG_EMERG 0 /* system is unusable */ +#define LOG_ALERT 1 /* action must be taken immediately */ +#define LOG_CRIT 2 /* critical conditions */ +#define LOG_ERR 3 /* error conditions */ +#define LOG_WARNING 4 /* warning conditions */ +#define LOG_NOTICE 5 /* normal but significant condition */ +#define LOG_INFO 6 /* informational */ +#define LOG_DEBUG 7 /* debug-level messages */ + +#define log_debug(...) stderr_log_debug(__VA_ARGS__) +#define log_info(...) stderr_log_info(__VA_ARGS__) +#define log_notice(...) stderr_log_notice(__VA_ARGS__) +#define log_warning(...) stderr_log_warning(__VA_ARGS__) +#define log_err(...) stderr_log_err(__VA_ARGS__) +#define log_crit(...) stderr_log_crit(__VA_ARGS__) +#define log_alert(...) stderr_log_alert(__VA_ARGS__) +#define log_emerg(...) stderr_log_emerg(__VA_ARGS__) + +#endif + + +/* Logger initialisation and shutdown */ +bool logger_shutdown(void); +bool logger_init(const char* ident, const char* level, const char* facility); +void logger_min_verbose(int minimum); + +extern int log_type; +extern int log_level; + +#endif diff --git a/repmgr.c b/repmgr.c index 8e2ce89..75295b6 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1,6 +1,6 @@ /* * repmgr.c - Command interpreter for the repmgr - * Copyright (C) 2ndQuadrant, 2010 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This module is a command-line utility to easily setup a cluster of * hot standby servers for an HA environment @@ -31,23 +31,25 @@ #include #include +#include "log.h" +#include "config.h" #include "check_dir.h" +#include "strutil.h" #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" -#define NO_ACTION 0 /* Not a real action, just to initialize */ -#define MASTER_REGISTER 1 +#define NO_ACTION 0 /* Not a real action, just to initialize */ +#define MASTER_REGISTER 1 #define STANDBY_REGISTER 2 #define STANDBY_CLONE 3 #define STANDBY_PROMOTE 4 #define STANDBY_FOLLOW 5 -#define QUERY_STR_LEN 8192 - static void help(const char *progname); static bool create_recovery_file(const char *data_dir); -static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); +static int copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static void do_master_register(void); @@ -55,28 +57,22 @@ static void do_standby_register(void); static void do_standby_clone(void); static void do_standby_promote(void); static void do_standby_follow(void); +static void help(const char* progname); +static void usage(void); -const char *progname; - -const char *keywords[6]; -const char *values[6]; - -const char *dbname = NULL; -char *host = NULL; -char *username = NULL; -char *dest_dir = NULL; -char *config_file = NULL; -char *remote_user = NULL; -char *wal_keep_segments = NULL; -bool verbose = false; -bool force = false; - -int numport = 0; -char *masterport = NULL; +/* Global variables */ +static const char *progname; +static const char *keywords[6]; +static const char *values[6]; +char repmgr_schema[MAXLEN]; +bool need_a_node = true; -char *server_mode = NULL; -char *server_cmd = NULL; +/* Initialization of runtime options */ +t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" }; +t_configuration_options options = { "", -1, "", "", "" }; +static char *server_mode = NULL; +static char *server_cmd = NULL; int main(int argc, char **argv) @@ -93,6 +89,7 @@ main(int argc, char **argv) {"wal-keep-segments", required_argument, NULL, 'w'}, {"force", no_argument, NULL, 'F'}, {"verbose", no_argument, NULL, 'v'}, + {"password-require", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; @@ -107,53 +104,60 @@ main(int argc, char **argv) if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); - exit(0); + exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); - exit(0); + exit(SUCCESS); } } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, + &optindex)) != -1) { switch (c) { - case 'd': - dbname = optarg; - break; - case 'h': - host = optarg; - break; - case 'p': - masterport = optarg; - break; - case 'U': - username = optarg; - break; - case 'D': - dest_dir = optarg; - break; - case 'f': - config_file = optarg; - break; - case 'R': - remote_user = optarg; - break; - case 'w': - wal_keep_segments = optarg; - break; - case 'F': - force = true; - break; - case 'v': - verbose = true; - break; - default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + case 'd': + strncpy(runtime_options.dbname, optarg, MAXLEN); + break; + case 'h': + strncpy(runtime_options.host, optarg, MAXLEN); + break; + case 'p': + if (atoi(optarg) > 0) + strncpy(runtime_options.masterport, optarg, MAXLEN); + break; + case 'U': + strncpy(runtime_options.username, optarg, MAXLEN); + break; + case 'D': + strncpy(runtime_options.dest_dir, optarg, MAXFILENAME); + break; + case 'f': + strncpy(runtime_options.config_file, optarg, MAXLEN); + break; + case 'R': + strncpy(runtime_options.remote_user, optarg, MAXLEN); + break; + case 'w': + if (atoi(optarg) > 0) + strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN); + break; + case 'F': + runtime_options.force = true; + break; + case 'v': + runtime_options.verbose = true; + break; + case 1: + /* For password-require */ + runtime_options.password_required = true; + break; + default: + usage(); + exit(ERR_BAD_CONFIG); } } @@ -168,10 +172,11 @@ main(int argc, char **argv) if (optind < argc) { server_mode = argv[optind++]; - if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0) + if (strcasecmp(server_mode, "STANDBY") != 0 && + strcasecmp(server_mode, "MASTER") != 0) { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + usage(); + exit(ERR_BAD_CONFIG); } } @@ -199,8 +204,8 @@ main(int argc, char **argv) action = STANDBY_FOLLOW; else { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + usage(); + exit(ERR_BAD_CONFIG); } } @@ -209,12 +214,13 @@ main(int argc, char **argv) { if (optind < argc) { - if (host != NULL) + if (runtime_options.host[0]) { - fprintf(stderr, _("Conflicting parameters you can't use -h while providing a node separately. Try \"%s --help\" for more information.\n"), progname); - exit(1); + log_err(_("Conflicting parameters: you can't use -h while providing a node separately.\n")); + usage(); + exit(ERR_BAD_CONFIG); } - host = argv[optind++]; + strncpy(runtime_options.host, argv[optind++], MAXLEN); } } @@ -223,46 +229,72 @@ main(int argc, char **argv) case 0: break; default: - fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"), + log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"), progname, argv[optind + 1]); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + usage(); + exit(ERR_BAD_CONFIG); } if (!check_parameters_for_action(action)) - exit(1); - - if (config_file == NULL) - { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); - } - - if (wal_keep_segments == NULL) - { - wal_keep_segments = malloc(5); - strcpy(wal_keep_segments, "5000"); - } + exit(ERR_BAD_CONFIG); - if (dbname == NULL) + if (!runtime_options.dbname[0]) { if (getenv("PGDATABASE")) - dbname = getenv("PGDATABASE"); + strncpy(runtime_options.dbname, getenv("PGDATABASE"), MAXLEN); else if (getenv("PGUSER")) - dbname = getenv("PGUSER"); + strncpy(runtime_options.dbname, getenv("PGUSER"), MAXLEN); else - dbname = "postgres"; + strncpy(runtime_options.dbname, DEFAULT_DBNAME, MAXLEN); } + /* Read the configuration file, normally repmgr.conf */ + if (!runtime_options.config_file[0]) + strncpy(runtime_options.config_file, DEFAULT_CONFIG_FILE, MAXLEN); + + if (runtime_options.verbose) + printf(_("Opening configuration file: %s\n"), runtime_options.config_file); + + parse_config(runtime_options.config_file, &options); + keywords[2] = "user"; - values[2] = username; + values[2] = (runtime_options.username[0]) ? runtime_options.username : NULL; keywords[3] = "dbname"; - values[3] = dbname; + values[3] = runtime_options.dbname; keywords[4] = "application_name"; values[4] = (char *) progname; keywords[5] = NULL; values[5] = NULL; + /* + * Initialize the logger. If verbose command line parameter was + * input, make sure that the log level is at least INFO. This + * is mainly useful for STANDBY CLONE. That doesn't require a + * configuration file where a logging level might be specified + * at, but it often requires detailed logging to troubleshoot + * problems. + */ + logger_init(progname, options.loglevel, options.logfacility); + if (runtime_options.verbose) + logger_min_verbose(LOG_INFO); + + /* + * Node configuration information is not needed for all actions, + * with STANDBY CLONE being the main exception. + */ + if (need_a_node) + { + if (options.node == -1) + { + log_err("Node information is missing. " + "Check the configuration file.\n"); + exit(ERR_BAD_CONFIG); + } + } + + /* Prepare the repmgr schema variable */ + snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, options.cluster_name); + switch (action) { case MASTER_REGISTER: @@ -281,77 +313,80 @@ main(int argc, char **argv) do_standby_follow(); break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + usage(); + exit(ERR_BAD_CONFIG); } + logger_shutdown(); return 0; } - static void do_master_register(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; - bool schema_exists = false; - char master_version[MAXVERSIONSTR]; - - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } + char schema_quoted[MAXLEN]; + char master_version[MAXVERSIONSTR]; - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(options.conninfo, true); /* master should be v9 or better */ + log_info(_("%s connecting to master database\n"), progname); pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are a master */ + log_info(_("%s connected to master, checking its state\n"), progname); if (is_standby(conn)) { - fprintf(stderr, "repmgr: This node should be a master\n"); + log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); + } + + /* Assemble a quoted schema name + * XXX This is not currently used due to a merge conflict, but + * probably should be */ + if (false) + { + char *identifier = PQescapeIdentifier(conn, repmgr_schema, + strlen(repmgr_schema)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace " + "WHERE nspname = '%s'", repmgr_schema); + log_debug("master register: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn)); + log_err(_("Can't get info about schemas: %s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } if (PQntuples(res) > 0) /* schema exists */ { - if (!force) /* and we are not forcing so error */ + if (!runtime_options.force) /* and we are not forcing so error */ { - fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName); + log_notice(_("Schema %s already exists.\n"), repmgr_schema); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } schema_exists = true; } @@ -359,107 +394,118 @@ do_master_register(void) if (!schema_exists) { + log_info("master register: creating database objects inside the %s schema\n", repmgr_schema); + /* ok, create the schema */ - sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n", - myClusterName, PQerrorMessage(conn)); + log_err(_("Cannot create the schema %s: %s\n"), + repmgr_schema, PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } /* ... the tables */ - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " - " id integer primary key, " - " cluster text not null, " - " conninfo text not null)", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " + " id integer primary key, " + " cluster text not null, " + " conninfo text not null)", repmgr_schema); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_nodes: %s\n", - myClusterName, PQerrorMessage(conn)); + log_err(_("Cannot create the table %s.repl_nodes: %s\n"), + repmgr_schema, PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " - " primary_node INTEGER NOT NULL, " - " standby_node INTEGER NOT NULL, " - " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " - " last_wal_primary_location TEXT NOT NULL, " - " last_wal_standby_location TEXT NOT NULL, " - " replication_lag BIGINT NOT NULL, " - " apply_lag BIGINT NOT NULL) ", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( " + " primary_node INTEGER NOT NULL, " + " standby_node INTEGER NOT NULL, " + " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " + " last_wal_primary_location TEXT NOT NULL, " + " last_wal_standby_location TEXT NOT NULL, " + " replication_lag BIGINT NOT NULL, " + " apply_lag BIGINT NOT NULL) ", repmgr_schema); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_monitor: %s\n", - myClusterName, PQerrorMessage(conn)); + log_err(_("Cannot create the table %s.repl_monitor: %s\n"), + repmgr_schema, PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } /* and the view */ - sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " - " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " - " ORDER BY last_monitor_time desc) " - " FROM repmgr_%s.repl_monitor) " - " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " - " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " - " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " - " FROM monitor_info a " - " WHERE row_number = 1", myClusterName, myClusterName); + sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " + " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " + " ORDER BY last_monitor_time desc) " + " FROM %s.repl_monitor) " + " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " + " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " + " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " + " FROM monitor_info a " + " WHERE row_number = 1", repmgr_schema, repmgr_schema); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the view repmgr_%s.repl_status: %s\n", - myClusterName, PQerrorMessage(conn)); + log_err(_("Cannot create the view %s.repl_status: %s\n"), + repmgr_schema, PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } } else { PGconn *master_conn; - int id; + int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); + master_conn = getMasterConnection(conn, options.node, + options.cluster_name, &id,NULL); if (master_conn != NULL) { PQfinish(master_conn); - fprintf(stderr, "There is a master already in this cluster"); - return; + log_warning(_("There is a master already in cluster %s\n"), options.cluster_name); + exit(ERR_BAD_CONFIG); } } /* Now register the master */ - if (force) + if (runtime_options.force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " + " WHERE id = %d", + repmgr_schema, options.node); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot delete node details, %s\n", - PQerrorMessage(conn)); + log_warning(_("Cannot delete node details, %s\n"), + PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " + "VALUES (%d, '%s', '%s')", + repmgr_schema, options.node, options.cluster_name, options.conninfo); + log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(conn)); + log_warning(_("Cannot insert node details, %s\n"), + PQerrorMessage(conn)); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } PQfinish(conn); + log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"), + options.cluster_name, options.node, options.conninfo); return; } @@ -467,83 +513,93 @@ do_master_register(void) static void do_standby_register(void) { - PGconn *conn; + PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; char sqlquery[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char schema_quoted[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } + /* XXX: A lot of copied code from do_master_register! Refactor */ - conn = establishDBConnection(conninfo, true); + log_info(_("%s connecting to standby database\n"), progname); + conn = establishDBConnection(options.conninfo, true); /* should be v9 or better */ + log_info(_("%s connected to standby, checking its state\n"), progname); pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* Check we are a standby */ if (!is_standby(conn)) { - fprintf(stderr, "repmgr: This node should be a standby\n"); + log_err(_("repmgr: This node should be a standby (%s)\n"), options.conninfo); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); + } + + /* Assemble a quoted schema name + * XXX This is not currently used due to a merge conflict, but + * probably should be */ + if (false) + { + char *identifier = PQescapeIdentifier(conn, repmgr_schema, + strlen(repmgr_schema)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema); + log_debug("standby register: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } - if (PQntuples(res) == 0) /* schema doesn't exists */ + if (PQntuples(res) == 0) { - fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName); + /* schema doesn't exist */ + log_err("Schema %s doesn't exists.\n", repmgr_schema); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } PQclear(res); /* check if there is a master in this cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + log_info(_("%s connecting to master database\n"), progname); + master_conn = getMasterConnection(conn, options.node, options.cluster_name, + &master_id, NULL); if (!master_conn) - return; + { + log_err(_("A master must be defined before configuring a slave\n")); + exit(ERR_BAD_CONFIG); + } /* master should be v9 or better */ + log_info(_("%s connected to master, checking its state\n"), progname); pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* master and standby version should match */ @@ -551,42 +607,45 @@ do_standby_register(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), + log_err(_("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); - return; + exit(ERR_BAD_CONFIG); } - /* Now register the standby */ - if (force) + log_info(_("%s registering the standby\n"), progname); + if (runtime_options.force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " + " WHERE id = %d", + repmgr_schema, options.node); + log_debug("standby register: %s\n", sqlquery); if (!PQexec(master_conn, sqlquery)) { - fprintf(stderr, "Cannot delete node details, %s\n", + log_err("Cannot delete node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " + "VALUES (%d, '%s', '%s')", + repmgr_schema, options.node, options.cluster_name, options.conninfo); + log_debug("standby register: %s\n", sqlquery); if (!PQexec(master_conn, sqlquery)) { - fprintf(stderr, "Cannot insert node details, %s\n", + log_err("Cannot insert node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } + log_info(_("%s registering the standby complete\n"), progname); PQfinish(master_conn); PQfinish(conn); return; @@ -596,79 +655,74 @@ do_standby_register(void) static void do_standby_clone(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; int r = 0; int i; bool pg_dir = false; - char master_data_directory[MAXLEN]; - char master_config_file[MAXLEN]; - char master_hba_file[MAXLEN]; - char master_ident_file[MAXLEN]; + char master_data_directory[MAXFILENAME]; + char master_config_file[MAXFILENAME]; + char master_hba_file[MAXFILENAME]; + char master_ident_file[MAXFILENAME]; - char master_control_file[MAXLEN]; - char local_control_file[MAXLEN]; + char master_control_file[MAXFILENAME]; + char local_control_file[MAXFILENAME]; + char tblspc_dir[MAXFILENAME]; - const char *first_wal_segment = NULL; - const char *last_wal_segment = NULL; + char *first_wal_segment = NULL; + const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; /* if dest_dir hasn't been provided, initialize to current directory */ - if (dest_dir == NULL) + if (!runtime_options.dest_dir[0]) { - dest_dir = malloc(5); - strcpy(dest_dir, "."); + strncpy(runtime_options.dest_dir, DEFAULT_DEST_DIR, MAXFILENAME); } /* Check this directory could be used as a PGDATA dir */ - switch (check_dir(dest_dir)) + switch (check_dir(runtime_options.dest_dir)) { case 0: /* dest_dir not there, must create it */ - if (verbose) - printf(_("creating directory %s ... "), dest_dir); - fflush(stdout); + log_info(_("creating directory %s ...\n"), runtime_options.dest_dir); - if (!create_directory(dest_dir)) + if (!create_directory(runtime_options.dest_dir)) { - fprintf(stderr, _("%s: couldn't create directory %s ... "), - progname, dest_dir); - return; + log_err(_("%s: couldn't create directory %s ...\n"), + progname, runtime_options.dest_dir); + exit(ERR_BAD_CONFIG); } break; case 1: /* Present but empty, fix permissions and use it */ - if (verbose) - printf(_("fixing permissions on existing directory %s ... "), - dest_dir); - fflush(stdout); + log_info(_("checking and correcting permissions on existing directory %s ...\n"), + runtime_options.dest_dir); - if (!set_directory_permissions(dest_dir)) + if (!set_directory_permissions(runtime_options.dest_dir)) { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); - return; + log_err(_("%s: could not change permissions of directory \"%s\": %s\n"), + progname, runtime_options.dest_dir, strerror(errno)); + exit(ERR_BAD_CONFIG); } break; case 2: /* Present and not empty */ - fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, dest_dir); + log_warning( _("%s: directory \"%s\" exists but is not empty\n"), + progname, runtime_options.dest_dir); - pg_dir = is_pg_dir(dest_dir); - if (pg_dir && !force) + pg_dir = is_pg_dir(runtime_options.dest_dir); + if (pg_dir && !runtime_options.force) { - fprintf(stderr, _("\nThis looks like a PostgreSQL directroy.\n" - "If you are sure you want to clone here, " - "please check there is no PostgreSQL server " - "running and use the --force option\n")); - return; + log_warning( _("\nThis looks like a PostgreSQL directory.\n" + "If you are sure you want to clone here, " + "please check there is no PostgreSQL server " + "running and use the --force option\n")); + exit(ERR_BAD_CONFIG); } - else if (pg_dir && force) + else if (pg_dir && runtime_options.force) { /* Let it continue */ break; @@ -677,161 +731,160 @@ do_standby_clone(void) return; default: /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); + log_err( _("%s: could not access directory \"%s\": %s\n"), + progname, runtime_options.dest_dir, strerror(errno)); + exit(ERR_BAD_CONFIG); } /* Connection parameters for master only */ keywords[0] = "host"; - values[0] = host; + values[0] = runtime_options.host; keywords[1] = "port"; - values[1] = masterport; + values[1] = runtime_options.masterport; /* We need to connect to check configuration and start a backup */ - conn = PQconnectdbParams(keywords, values, true); - if (!conn) - { - fprintf(stderr, _("%s: could not connect to master\n"), - progname); - return; - } + log_info(_("%s connecting to master database\n"), progname); + conn=establishDBConnectionByParams(keywords,values,true); /* primary should be v9 or better */ + log_info(_("%s connected to master, checking its state\n"), progname); pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* Check we are cloning a primary node */ if (is_standby(conn)) { PQfinish(conn); - fprintf(stderr, "\nThe command should clone a primary node\n"); - return; + log_err(_("\nThe command should clone a primary node\n")); + exit(ERR_BAD_CONFIG); } /* And check if it is well configured */ if (!guc_setted(conn, "wal_level", "=", "hot_standby")) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); - return; + log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); + exit(ERR_BAD_CONFIG); } - if (!guc_setted(conn, "wal_keep_segments", ">=", wal_keep_segments)) + if (!guc_setted(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments)) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), wal_keep_segments, progname); - return; + log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option)\n"), progname, runtime_options.wal_keep_segments); + exit(ERR_BAD_CONFIG); } if (!guc_setted(conn, "archive_mode", "=", "on")) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); - return; + log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); + exit(ERR_BAD_CONFIG); } - if (verbose) - printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); + log_info(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); + + /* + * Check if the tablespace locations exists and that we can write to + * them. + */ + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + "WHERE spcname NOT IN ('pg_default', 'pg_global')"); + log_debug("standby clone: %s\n", sqlquery); - /* Check if the tablespace locations exists and that we can write to them */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } for (i = 0; i < PQntuples(res); i++) { - char *tblspc_dir = NULL; - - strcpy(tblspc_dir, PQgetvalue(res, i, 0)); + strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME); /* Check this directory could be used as a PGDATA dir */ switch (check_dir(tblspc_dir)) { case 0: /* tblspc_dir not there, must create it */ - if (verbose) - printf(_("creating directory \"%s\"... "), tblspc_dir); - fflush(stdout); + log_info(_("creating directory \"%s\"... "), tblspc_dir); if (!create_directory(tblspc_dir)) { - fprintf(stderr, _("%s: couldn't create directory \"%s\"... "), + log_err(_("%s: couldn't create directory \"%s\"...\n"), progname, tblspc_dir); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } break; case 1: /* Present but empty, fix permissions and use it */ - if (verbose) - printf(_("fixing permissions on existing directory \"%s\"... "), - tblspc_dir); - fflush(stdout); + log_info(_("fixing permissions on existing directory \"%s\"... "), + tblspc_dir); if (!set_directory_permissions(tblspc_dir)) { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), + log_err(_("%s: could not change permissions of directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } break; case 2: /* Present and not empty */ - if (!force) + if (!runtime_options.force) { - fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), + log_err(_("%s: directory \"%s\" exists but is not empty\n"), progname, tblspc_dir); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } default: /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), + log_err(_("%s: could not access directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } } - fprintf(stderr, "Starting backup...\n"); + log_notice("Starting backup...\n"); /* Get the data directory full path and the configuration files location */ - sprintf(sqlquery, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + sqlquery_snprintf(sqlquery, + "SELECT name, setting " + " FROM pg_settings " + " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } for (i = 0; i < PQntuples(res); i++) { if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0) - strcpy(master_data_directory, PQgetvalue(res, i, 1)); + strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME); else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0) - strcpy(master_config_file, PQgetvalue(res, i, 1)); + strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME); else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0) - strcpy(master_hba_file, PQgetvalue(res, i, 1)); + strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME); else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0) - strcpy(master_ident_file, PQgetvalue(res, i, 1)); + strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME); else - fprintf(stderr, _("uknown parameter: %s"), PQgetvalue(res, i, 0)); + log_warning(_("unknown parameter: %s\n"), PQgetvalue(res, i, 0)); } PQclear(res); @@ -839,233 +892,296 @@ do_standby_clone(void) * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); + sqlquery_snprintf( + sqlquery, + "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", + time(NULL)); + log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn)); + log_err("Can't start backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } - first_wal_segment = PQgetvalue(res, 0, 0); + + if (runtime_options.verbose) + { + char *first_wal_seg_pq = PQgetvalue(res, 0, 0); + size_t buf_sz = strlen(first_wal_seg_pq); + + first_wal_segment = malloc(buf_sz + 1); + xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq); + } + PQclear(res); /* * 1) first move global/pg_control * - * 2) then move data_directory ommiting the files we have already moved and pg_xlog - * content + * 2) then move data_directory ommiting the files we have already moved and + * pg_xlog content * - * 3) finally We need to backup configuration files (that could be on other directories, debian - * like systems likes to do that), so look at config_file, hba_file and ident_file but we - * can omit external_pid_file ;) + * 3) finally We need to backup configuration files (that could be on other + * directories, debian like systems likes to do that), so look at + * config_file, hba_file and ident_file but we can omit + * external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ - sprintf(master_control_file, "%s/global/pg_control", master_data_directory); - sprintf(local_control_file, "%s/global", dest_dir); + maxlen_snprintf(master_control_file, "%s/global/pg_control", + master_data_directory); + maxlen_snprintf(local_control_file, "%s/global", runtime_options.dest_dir); if (!create_directory(local_control_file)) { - fprintf(stderr, _("%s: couldn't create directory %s ... "), - progname, dest_dir); + log_err(_("%s: couldn't create directory %s ...\n"), + progname, runtime_options.dest_dir); goto stop_backup; } - r = copy_remote_files(host, remote_user, master_control_file, local_control_file, false); + log_info("standby clone: master control file '%s'\n", master_control_file); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_control_file, local_control_file, false); if (r != 0) + { + log_warning("standby clone: failed copying master control file '%s'\n", master_control_file); goto stop_backup; + } - r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, true); + log_info("standby clone: master data directory '%s'\n", master_data_directory); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_data_directory, runtime_options.dest_dir, true); if (r != 0) + { + log_warning("standby clone: failed copying master data directory '%s'\n", master_data_directory); goto stop_backup; + } /* - * Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate - * rsync option but besides we could someday make all these rsync happen concurrently + * Copy tablespace locations, i'm doing this separately because i couldn't + * find and appropiate rsync option but besides we could someday make all + * these rsync happen concurrently */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + " WHERE spcname NOT IN ('pg_default', 'pg_global')"); + log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); goto stop_backup; } for (i = 0; i < PQntuples(res); i++) { - r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), PQgetvalue(res, i, 0), true); + strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME); + log_info("standby clone: master tablespace '%s'\n", tblspc_dir); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + tblspc_dir, tblspc_dir, true); if (r != 0) + { + log_warning("standby clone: failed copying tablespace directory '%s'\n", tblspc_dir); goto stop_backup; + } } - r = copy_remote_files(host, remote_user, master_config_file, dest_dir, false); + log_info("standby clone: master config file '%s'\n", master_config_file); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_config_file, runtime_options.dest_dir, false); if (r != 0) + { + log_warning("standby clone: failed copying master config file '%s'\n", master_config_file); goto stop_backup; + } - r = copy_remote_files(host, remote_user, master_hba_file, dest_dir, false); + log_info("standby clone: master hba file '%s'\n", master_hba_file); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_hba_file, runtime_options.dest_dir, false); if (r != 0) + { + log_warning("standby clone: failed copying master hba file '%s'\n", master_hba_file); goto stop_backup; + } - r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, false); + log_info("standby clone: master ident file '%s'\n", master_ident_file); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_ident_file, runtime_options.dest_dir, false); if (r != 0) + { + log_warning("standby clone: failed copying master ident file '%s'\n", master_ident_file); goto stop_backup; + } stop_backup: - /* inform the master that we have finished the backup */ - conn = PQconnectdbParams(keywords, values, true); - if (!conn) - { - fprintf(stderr, _("%s: could not connect to master\n"), - progname); - return; - } - fprintf(stderr, "Finishing backup...\n"); + /* + * Inform the master that we have finished the backup. + * + * Don't have this one exit if it fails, so that a more informative + * error message will also appear about the backup not being stopped. + */ + log_info(_("%s connecting to master database to stop backup\n"), progname); + conn=establishDBConnectionByParams(keywords,values,false); + + log_notice("Finishing backup...\n"); + sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); + log_debug("standby clone: %s\n", sqlquery); - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn)); + log_err("Can't stop backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_STOP_BACKUP); } last_wal_segment = PQgetvalue(res, 0, 0); + + log_info(_("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); + + /* Finished with the database connection now */ PQclear(res); PQfinish(conn); - /* Now, if the rsync failed then exit */ - if (r != 0) - return; + /* + * Only free the first_wal_segment since it was copied out of the + * pqresult. + */ + free(first_wal_segment); + first_wal_segment = NULL; - if (verbose) - printf(_("%s requires primary to keep WAL files %s until at least %s\n"), - progname, first_wal_segment, last_wal_segment); + /* If the rsync failed then exit */ + if (r != 0) + exit(ERR_BAD_RSYNC); - /* we need to create the pg_xlog sub directory too, i'm reusing a variable here */ - sprintf(local_control_file, "%s/pg_xlog", dest_dir); + /* + * We need to create the pg_xlog sub directory too, I'm reusing a variable + * here. + */ + maxlen_snprintf(local_control_file, "%s/pg_xlog", runtime_options.dest_dir); if (!create_directory(local_control_file)) { - fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), - progname, dest_dir); + log_err(_("%s: couldn't create directory %s, you will need to do it manually...\n"), + progname, runtime_options.dest_dir); + r = ERR_NEEDS_XLOG; /* continue, but eventually exit returning error */ } /* Finally, write the recovery.conf file */ - create_recovery_file(dest_dir); + create_recovery_file(runtime_options.dest_dir); - /* We don't start the service because we still may want to move the directory */ - return; + /* + * We don't start the service yet because we still may want to + * move the directory + */ + log_info(_("%s standby clone complete\n"), progname); + exit(r); } static void do_standby_promote(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char script[MAXLEN]; PGconn *old_master_conn; int old_master_id; int r; char data_dir[MAXLEN]; - char recovery_file_path[MAXLEN]; - char recovery_done_path[MAXLEN]; + char recovery_file_path[MAXFILENAME]; + char recovery_done_path[MAXFILENAME]; char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } - /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + log_info(_("%s connecting to master database\n"), progname); + conn = establishDBConnection(options.conninfo, true); /* we need v9 or better */ + log_info(_("%s connected to master, checking its state\n"), progname); pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* Check we are in a standby node */ if (!is_standby(conn)) { - fprintf(stderr, "repmgr: The command should be executed in a standby node\n"); - return; + log_err("repmgr: The command should be executed on a standby node\n"); + exit(ERR_BAD_CONFIG); } /* we also need to check if there isn't any master already */ - old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, &old_master_id); + old_master_conn = getMasterConnection(conn, options.node, options.cluster_name, + &old_master_id, NULL); if (old_master_conn != NULL) { PQfinish(old_master_conn); - fprintf(stderr, "There is a master already in this cluster"); - return; + log_err("There is a master already in this cluster\n"); + exit(ERR_BAD_CONFIG); } - if (verbose) - printf(_("\n%s: Promoting standby...\n"), progname); + log_notice(_("%s: Promoting standby\n"), progname); /* Get the data directory full path and the last subdirectory */ - sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + sqlquery_snprintf(sqlquery, "SELECT setting " + " FROM pg_settings WHERE name = 'data_directory'"); + log_debug("standby promote: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); PQfinish(conn); - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); - sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); + log_info(_("%s: Marking recovery done\n"), progname); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); - /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + /* + * We assume the pg_ctl script is in the PATH. Restart and wait for + * the server to finish starting, so that the check below will + * find an active server rather than one starting up. This may + * hang for up the default timeout (60 seconds). + */ + log_notice(_("%s: restarting server using pg_ctl\n"), progname); + maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir); r = system(script); if (r != 0) { - fprintf(stderr, "Can't restart service\n"); - return; + log_err("Can't restart PostgreSQL server\n"); + exit(ERR_NO_RESTART); } /* reconnect to check we got promoted */ - /* - * XXX i'm removing this because it gives an annoying message saying couldn't connect - * but is just the server starting up - * conn = establishDBConnection(conninfo, true); - * if (is_standby(conn)) - * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); - * else - * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); - * PQfinish(conn); - */ - + log_info(_("%s connecting to now restarted database\n"), progname); + conn = establishDBConnection(options.conninfo, true); + if (is_standby(conn)) + { + log_err("\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); + } + else + { + log_err("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n", progname); + } + PQfinish(conn); return; } @@ -1073,15 +1189,11 @@ do_standby_promote(void) static void do_standby_follow(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; - + char script[MAXLEN]; + char master_conninfo[MAXLEN]; PGconn *master_conn; int master_id; @@ -1091,25 +1203,17 @@ do_standby_follow(void) char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } - /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + log_info(_("%s connecting to standby database\n"), progname); + conn = establishDBConnection(options.conninfo, true); /* Check we are in a standby node */ + log_info(_("%s connected to standby, checking its state\n"), progname); if (!is_standby(conn)) { - fprintf(stderr, "\n%s: The command should be executed in a standby node\n", progname); + log_err("\n%s: The command should be executed in a standby node\n", progname); return; + exit(ERR_BAD_CONFIG); } /* should be v9 or better */ @@ -1117,35 +1221,38 @@ do_standby_follow(void) if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* we also need to check if there is any master in the cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + log_info(_("%s connecting to master database\n"), progname); + master_conn = getMasterConnection(conn, options.node, + options.cluster_name, &master_id,(char *) &master_conninfo); if (master_conn == NULL) { PQfinish(conn); - fprintf(stderr, "There isn't a master to follow in this cluster"); - return; + log_err("There isn't a master to follow in this cluster\n"); + exit(ERR_BAD_CONFIG); } /* Check we are going to point to a master */ if (is_standby(master_conn)) { PQfinish(conn); - fprintf(stderr, "%s: The node to follow should be a master\n", progname); - return; + log_err("%s: The node to follow should be a master\n", progname); + exit(ERR_BAD_CONFIG); } /* should be v9 or better */ + log_info(_("%s connected to master, checking its state\n"), progname); pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); - return; + log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* master and standby version should match */ @@ -1153,9 +1260,9 @@ do_standby_follow(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), + log_err(_("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); - return; + exit(ERR_BAD_CONFIG); } /* @@ -1163,25 +1270,23 @@ do_standby_follow(void) * before closing the connection because we will need them to * recreate the recovery.conf file */ - host = malloc(20); - masterport = malloc(10); - strcpy(host, PQhost(master_conn)); - strcpy(masterport, PQport(master_conn)); + strncpy(runtime_options.host, PQhost(master_conn), MAXLEN); + strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN); PQfinish(master_conn); - if (verbose) - printf(_("\n%s: Changing standby's master...\n"), progname); + log_info(_("%s Changing standby's master"),progname); /* Get the data directory full path */ - sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + sqlquery_snprintf(sqlquery, "SELECT setting " + " FROM pg_settings WHERE name = 'data_directory'"); + log_debug("standby follow: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); + log_err("Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - return; + exit(ERR_BAD_CONFIG); } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); @@ -1189,85 +1294,121 @@ do_standby_follow(void) /* write the recovery.conf file */ if (!create_recovery_file(data_dir)) - return; + exit(ERR_BAD_CONFIG); /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { - fprintf(stderr, "Can't restart service\n"); + log_err("Can't restart service\n"); return; + exit(ERR_NO_RESTART); } return; } -static void -help(const char *progname) +void usage(void) +{ + log_err(_("\n\n%s: Replicator manager \n"), progname); + log_err(_("Try \"%s --help\" for more information.\n"), progname); +} + +void help(const char *progname) { printf(_("\n%s: Replicator manager \n"), progname); printf(_("Usage:\n")); - printf(_(" %s [OPTIONS] master {register}\n"), progname); - printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname); + printf(_(" %s [OPTIONS] master {register}\n"), progname); + printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), + progname); printf(_("\nGeneral options:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); printf(_("\nConnection options:\n")); - printf(_(" -d, --dbname=DBNAME database to connect to\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); - printf(_(" -p, --port=PORT database server port\n")); - printf(_(" -U, --username=USERNAME database user name to connect as\n")); + printf(_(" -d, --dbname=DBNAME database to connect to\n")); + printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); + printf(_(" -p, --port=PORT database server port\n")); + printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_("\nConfiguration options:\n")); - printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); - printf(_(" -f, --config_file=PATH path to the configuration file\n")); - printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); - printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); - printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); + printf(_(" -f, --config_file=PATH path to the configuration file\n")); + printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); + printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); + printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_(" --password-required read the PGPASSWORD environment variable and add it to connection information\n")); printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("or making follow another node and then exits.\n")); printf(_("COMMANDS:\n")); - printf(_(" master register - registers the master in a cluster\n")); - printf(_(" standby register - registers a standby in a cluster\n")); - printf(_(" standby clone [node] - allows creation of a new standby\n")); - printf(_(" standby promote - allows manual promotion of a specific standby into a ")); + printf(_(" master register - registers the master in a cluster\n")); + printf(_(" standby register - registers a standby in a cluster\n")); + printf(_(" standby clone [node] - allows creation of a new standby\n")); + printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_("new master in the event of a failover\n")); - printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); + printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); } +/* + * Creates a recovery file for a standby. + * + * Writes master_conninfo to recovery.conf if is non-NULL + */ static bool create_recovery_file(const char *data_dir) { FILE *recovery_file; - char recovery_file_path[MAXLEN]; - char line[MAXLEN]; + char recovery_file_path[MAXLEN]; + char line[MAXLEN]; + char *port = NULL; - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); recovery_file = fopen(recovery_file_path, "w"); if (recovery_file == NULL) { - fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n"); + log_err("could not create recovery.conf file, it could be necessary to create it manually\n"); return false; } - sprintf(line, "standby_mode = 'on'\n"); + maxlen_snprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) { - fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); + log_err("recovery file could not be written, it could be necessary to create it manually\n"); fclose(recovery_file); return false; } - sprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, ((masterport==NULL) ? "5432" : masterport)); + if (strlen(runtime_options.masterport) == 0) + /* Use a default PostgreSQL port if one was not specified */ + port = "5432"; + else + port = runtime_options.masterport; + + /* + * Template the primary_conninfo component of recovery.conf, depending on + * whether a password needs to be included. + */ + if (runtime_options.password_required) + { + char *password = getenv("PGPASSWORD"); + + maxlen_snprintf(line, + "primary_conninfo = 'host=%s port=%s password=%s'\n", + runtime_options.host, port, password); + } + else + maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", + runtime_options.host, port); + + if (fputs(line, recovery_file) == EOF) { - fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); + log_err("recovery file could not be written, it could be necessary to create it manually\n"); fclose(recovery_file); return false; } @@ -1280,45 +1421,51 @@ create_recovery_file(const char *data_dir) static int -copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) +copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory) { - char script[QUERY_STR_LEN]; - char options[QUERY_STR_LEN]; - char host_string[QUERY_STR_LEN]; - int r; + char script[MAXLEN]; + char rsync_flags[MAXLEN]; + char host_string[MAXLEN]; + int r; + + if (strnlen(options.rsync_options, MAXLEN) == 0) + maxlen_snprintf( + rsync_flags, "%s", + "--archive --checksum --compress --progress --rsh=ssh"); + else + maxlen_snprintf(rsync_flags, "%s", options.rsync_options); - sprintf(options, "--archive --checksum --compress --progress --rsh=ssh"); - if (force) - strcat(options, " --delete"); + if (runtime_options.force) + strcat(rsync_flags, " --delete"); - if (remote_user == NULL) + if (!remote_user[0]) { - sprintf(host_string,"%s",host); + maxlen_snprintf(host_string, "%s", host); } else { - sprintf(host_string,"%s@%s",remote_user,host); + maxlen_snprintf(host_string,"%s@%s",remote_user,host); } if (is_directory) { - strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); - sprintf(script, "rsync %s %s:%s/* %s", - options, host_string, remote_path, local_path); + strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); + maxlen_snprintf(script, "rsync %s %s:%s/* %s", + rsync_flags, host_string, remote_path, local_path); } else { - sprintf(script, "rsync %s %s:%s %s/.", - options, host_string, remote_path, local_path); + maxlen_snprintf(script, "rsync %s %s:%s %s/.", + rsync_flags, host_string, remote_path, local_path); } - if (verbose) - printf("rsync command line: '%s'\n",script); + log_info("rsync command line: '%s'\n", script); r = system(script); if (r != 0) - fprintf(stderr, _("Can't rsync from remote file or directory (%s:%s)\n"), + log_err(_("Can't rsync from remote file or directory (%s:%s)\n"), host_string, remote_path); return r; @@ -1341,17 +1488,17 @@ check_parameters_for_action(const int action) * all other parameters are at least useless and could be * confusing so reject them */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] || + runtime_options.dbname[0]) { - fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a MASTER REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You can't use connection parameters to the master when issuing a MASTER REGISTER command.\n"); + usage(); ok = false; } - if (dest_dir != NULL) + if (runtime_options.dest_dir[0]) { - fprintf(stderr, "\nYou don't need a destination directory for MASTER REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You don't need a destination directory for MASTER REGISTER command\n"); + usage(); ok = false; } break; @@ -1361,17 +1508,17 @@ check_parameters_for_action(const int action) * we don't need connection parameters to the master * because we can detect the master in repl_nodes */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] || + runtime_options.dbname[0]) { - fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You can't use connection parameters to the master when issuing a STANDBY REGISTER command.\n"); + usage(); ok = false; } - if (dest_dir != NULL) + if (runtime_options.dest_dir[0]) { - fprintf(stderr, "\nYou don't need a destination directory for STANDBY REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You don't need a destination directory for STANDBY REGISTER command\n"); + usage(); ok = false; } break; @@ -1382,17 +1529,17 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the promote action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] || + runtime_options.dbname[0]) { - fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY PROMOTE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You can't use connection parameters to the master when issuing a STANDBY PROMOTE command.\n"); + usage(); ok = false; } - if (dest_dir != NULL) + if (runtime_options.dest_dir[0]) { - fprintf(stderr, "\nYou don't need a destination directory for STANDBY PROMOTE command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You don't need a destination directory for STANDBY PROMOTE command\n"); + usage(); ok = false; } break; @@ -1403,32 +1550,45 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the follow action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] || + runtime_options.dbname[0]) { - fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY FOLLOW command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You can't use connection parameters to the master when issuing a STANDBY FOLLOW command.\n"); + usage(); ok = false; } - if (dest_dir != NULL) + if (runtime_options.dest_dir[0]) { - fprintf(stderr, "\nYou don't need a destination directory for STANDBY FOLLOW command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_err("You don't need a destination directory for STANDBY FOLLOW command\n"); + usage(); ok = false; } break; case STANDBY_CLONE: + /* + * Issue a friendly notice that the configuration file is not + * necessary nor read at all in when performing a STANDBY CLONE + * action. + */ + if (runtime_options.config_file[0]) + { + log_notice("Only command line parameters for the connection " + "to the master are used when issuing a STANDBY CLONE command. " + "The passed configuration file is neither required nor used for " + "its node configuration portions\n\n"); + } /* * To clone a master into a standby we need connection parameters - * repmgr.conf is useless because we don't have a server running - * in the standby + * repmgr.conf is useless because we don't have a server running in + * the standby; warn the user, but keep going. */ - if (config_file != NULL) + if (runtime_options.host == NULL) { - fprintf(stderr, "\nYou need to use connection parameters to the master when issuing a STANDBY CLONE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + log_notice("You need to use connection parameters to " + "the master when issuing a STANDBY CLONE command."); ok = false; } + need_a_node = false; break; } diff --git a/repmgr.conf b/repmgr.conf index f728750..cf9402f 100644 --- a/repmgr.conf +++ b/repmgr.conf @@ -1,3 +1,21 @@ +################################################### +# Replication Manager configuration file +################################################### + +# Cluster name cluster=test + +# Node ID node=2 + +# Connection information conninfo='host=192.168.204.104' +rsync_options=--archive --checksum --compress --progress --rsh=ssh + +# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG +# Default: NOTICE +loglevel=NOTICE + +# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER +# Default: STDERR +logfacility=STDERR diff --git a/repmgr.h b/repmgr.h index ec32b99..561265f 100644 --- a/repmgr.h +++ b/repmgr.h @@ -1,6 +1,6 @@ /* * repmgr.h - * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,15 +24,41 @@ #include "getopt_long.h" #include "libpq-fe.h" +#include "strutil.h" #include "dbutils.h" -#include "config.h" - +#include "errcode.h" #define PRIMARY_MODE 0 #define STANDBY_MODE 1 -#define MAXLEN 80 -#define CONFIG_FILE "repmgr.conf" -#define MAXVERSIONSTR 16 +#include "config.h" +#define MAXFILENAME 1024 +#define ERRBUFF_SIZE 512 + +#define DEFAULT_CONFIG_FILE "./repmgr.conf" +#define DEFAULT_WAL_KEEP_SEGMENTS "5000" +#define DEFAULT_DEST_DIR "." +#define DEFAULT_MASTER_PORT "5432" +#define DEFAULT_DBNAME "postgres" +#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_" + +/* Run time options type */ +typedef struct +{ + + char dbname[MAXLEN]; + char host[MAXLEN]; + char username[MAXLEN]; + char dest_dir[MAXFILENAME]; + char config_file[MAXFILENAME]; + char remote_user[MAXLEN]; + char wal_keep_segments[MAXLEN]; + bool verbose; + bool force; + bool password_required; + + char masterport[MAXLEN]; + +} t_runtime_options; #endif diff --git a/repmgr.sql b/repmgr.sql index f28e6d8..75ef448 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -1,3 +1,10 @@ +/* + * repmgr.sql + * + * Copyright (C) 2ndQuadrant, 2011 + * + */ + CREATE USER repmgr; CREATE SCHEMA repmgr; @@ -5,27 +12,25 @@ CREATE SCHEMA repmgr; * The table repl_nodes keeps information about all machines in * a cluster */ -drop table if exists repl_nodes cascade; CREATE TABLE repl_nodes ( - id integer primary key, - cluster text not null, -- Name to identify the cluster - conninfo text not null + id integer primary key, + cluster text not null, -- Name to identify the cluster + conninfo text not null ); ALTER TABLE repl_nodes OWNER TO repmgr; /* - * Keeps monitor info about every node and their relative "position" + * Keeps monitor info about every node and their relative "position" * to primary */ -drop table if exists repl_monitor cascade; CREATE TABLE repl_monitor ( primary_node INTEGER NOT NULL, standby_node INTEGER NOT NULL, - last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, - last_wal_primary_location TEXT NOT NULL, + last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, + last_wal_primary_location TEXT NOT NULL, last_wal_standby_location TEXT NOT NULL, - replication_lag BIGINT NOT NULL, - apply_lag BIGINT NOT NULL + replication_lag BIGINT NOT NULL, + apply_lag BIGINT NOT NULL ); ALTER TABLE repl_monitor OWNER TO repmgr; @@ -33,21 +38,20 @@ ALTER TABLE repl_monitor OWNER TO repmgr; /* * This view shows the latest monitor info about every node. * Interesting thing to see: - * replication_lag: in bytes (this is how far the latest xlog record + * replication_lag: in bytes (this is how far the latest xlog record * we have received is from master) * apply_lag: in bytes (this is how far the latest xlog record - * we have applied is from the latest record we + * we have applied is from the latest record we * have received) * time_lag: how many seconds are we from being up-to-date with master */ -drop view if exists repl_status; CREATE VIEW repl_status AS WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node ORDER BY last_monitor_time desc) FROM repl_monitor) -SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, - last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, - pg_size_pretty(apply_lag) apply_lag, +SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, + last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, + pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag FROM monitor_info a WHERE row_number = 1; diff --git a/repmgrd.c b/repmgrd.c index d44c8a7..c7005a9 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1,6 +1,6 @@ /* * repmgrd.c - Replication manager daemon - * Copyright (C) 2ndQuadrant, 2010 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This module connects to the nodes of a replication cluster and monitors * how far are they from master @@ -27,31 +27,40 @@ #include #include "repmgr.h" +#include "config.h" +#include "log.h" +#include "strutil.h" #include "libpq/pqsignal.h" -char myClusterName[MAXLEN]; /* Local info */ +t_configuration_options local_options; int myLocalMode = STANDBY_MODE; -int myLocalId = -1; -PGconn *myLocalConn; +PGconn *myLocalConn = NULL; /* Primary info */ -int primaryId; -char primaryConninfo[MAXLEN]; -PGconn *primaryConn; +t_configuration_options primary_options; -char sqlquery[8192]; +PGconn *primaryConn = NULL; + +char sqlquery[QUERY_STR_LEN]; const char *progname; -char *config_file = NULL; +char *config_file = DEFAULT_CONFIG_FILE; bool verbose = false; +char repmgr_schema[MAXLEN]; +/* + * should initialize with {0} to be ANSI complaint ? but this raises + * error with gcc -Wall + */ +t_configuration_options config = {}; -static void help(const char *progname); -static void checkClusterConfiguration(void); +static void help(const char* progname); +static void usage(void); +static void checkClusterConfiguration(PGconn *conn,PGconn *primary); static void checkNodeConfiguration(char *conninfo); static void CancelQuery(void); @@ -63,22 +72,22 @@ static void handle_sigint(SIGNAL_ARGS); static void setup_cancel_handler(void); #define CloseConnections() \ - if (PQisBusy(primaryConn) == 1) \ - CancelQuery(); \ - if (myLocalConn != NULL) \ - PQfinish(myLocalConn); \ - if (primaryConn != NULL) \ - PQfinish(primaryConn); + if (PQisBusy(primaryConn) == 1) \ + CancelQuery(); \ + if (myLocalConn != NULL) \ + PQfinish(myLocalConn); \ + if (primaryConn != NULL && primaryConn != myLocalConn) \ + PQfinish(primaryConn); /* * Every 3 seconds, insert monitor info */ -#define MonitorCheck() \ - for (;;) \ - { \ - MonitorExecute(); \ - sleep(3); \ - } +#define MonitorCheck() \ + for (;;) \ + { \ + MonitorExecute(); \ + sleep(3); \ + } int @@ -94,7 +103,6 @@ main(int argc, char **argv) int optindex; int c; - char conninfo[MAXLEN]; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); @@ -104,16 +112,15 @@ main(int argc, char **argv) if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); - exit(0); + exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); - exit(0); + exit(SUCCESS); } } - while ((c = getopt_long(argc, argv, "f:v", long_options, &optindex)) != -1) { switch (c) @@ -125,39 +132,41 @@ main(int argc, char **argv) verbose = true; break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); + usage(); + exit(ERR_BAD_CONFIG); } } setup_cancel_handler(); - if (config_file == NULL) - { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); - } - /* * Read the configuration file: repmgr.conf */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + parse_config(config_file, &local_options); + if (local_options.node == -1) { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); + log_err("Node information is missing. " + "Check the configuration file, or provide one if you have not done so.\n"); + exit(ERR_BAD_CONFIG); } - myLocalConn = establishDBConnection(conninfo, true); + logger_init(progname, local_options.loglevel, local_options.logfacility); + if (verbose) + logger_min_verbose(LOG_INFO); + + snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); + + log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); + myLocalConn = establishDBConnection(local_options.conninfo, true); /* should be v9 or better */ + log_info(_("%s Connected to database, checking its state\n"), progname); pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); - exit(1); + log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + exit(ERR_BAD_CONFIG); } /* @@ -167,28 +176,47 @@ main(int argc, char **argv) myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { - primaryId = myLocalId; - strcpy(primaryConninfo, conninfo); + primary_options.node = local_options.node; + strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + log_info(_("%s Connecting to primary for cluster '%s'\n"), + progname, local_options.cluster_name); + primaryConn = getMasterConnection(myLocalConn, local_options.node, + local_options.cluster_name, + &primary_options.node,NULL); if (primaryConn == NULL) - exit(1); + { + CloseConnections(); + exit(ERR_BAD_CONFIG); + } } - checkClusterConfiguration(); - checkNodeConfiguration(conninfo); + checkClusterConfiguration(myLocalConn,primaryConn); + checkNodeConfiguration(local_options.conninfo); if (myLocalMode == STANDBY_MODE) { + log_info(_("%s Starting continuous standby node monitoring\n"), progname); MonitorCheck(); } + else + { + log_info(_("%s This is a primary node, program not needed here; exiting'\n"), progname); + } + + /* Prevent a double-free */ + if (primaryConn == myLocalConn) + myLocalConn = NULL; /* close the connection to the database and cleanup */ CloseConnections(); + /* Shuts down logging system */ + logger_shutdown(); + return 0; } @@ -221,7 +249,7 @@ MonitorExecute(void) { if (PQstatus(primaryConn) != CONNECTION_OK) { - fprintf(stderr, "\n%s: Connection to master has been lost, trying to recover...\n", progname); + log_warning(_("Connection to master has been lost, trying to recover...\n")); /* wait 20 seconds between retries */ sleep(20); @@ -229,26 +257,29 @@ MonitorExecute(void) } else { - fprintf(stderr, "\n%s: Connection to master has been restored, continue monitoring.\n", progname); + if (connection_retries > 0) + { + log_notice(_("Connection to master has been restored, continue monitoring.\n")); + } break; } } if (PQstatus(primaryConn) != CONNECTION_OK) { - fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if " - "another node has been promoted.\n", progname); + log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n")); for (connection_retries = 0; connection_retries < 6; connection_retries++) { - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + primaryConn = getMasterConnection(myLocalConn, local_options.node, + local_options.cluster_name, &primary_options.node,NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ - fprintf(stderr, "\n%s: Connected to node %d, continue monitoring.\n", progname, primaryId); + log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node); break; } else { - fprintf(stderr, "\n%s: We haven't found a new master, waiting before retry...\n", progname); + log_err(_("We haven't found a new master, waiting before retry...\n")); /* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */ sleep(300); } @@ -256,17 +287,16 @@ MonitorExecute(void) } if (PQstatus(primaryConn) != CONNECTION_OK) { - fprintf(stderr, "\n%s: We couldn't reconnect for long enough, exiting...\n", progname); - exit(1); + log_err(_("We couldn't reconnect for long enough, exiting...\n")); + exit(ERR_DB_CON); } /* Check if we still are a standby, we could have been promoted */ if (!is_standby(myLocalConn)) { - fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n", - progname); + log_err(_("It seems like we have been promoted, so exit from monitoring...\n")); CloseConnections(); - exit(1); + exit(ERR_PROMOTED); } /* @@ -278,36 +308,37 @@ MonitorExecute(void) CancelQuery(); /* Get local xlog info */ - sprintf(sqlquery, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location()"); + sqlquery_snprintf( + sqlquery, + "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " + "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn)); PQclear(res); /* if there is any error just let it be and retry in next loop */ return; } - strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0)); - strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1)); - strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2)); + strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN); + strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN); + strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN); PQclear(res); /* Get primary xlog info */ - sprintf(sqlquery, "SELECT pg_current_xlog_location() "); + sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn)); + log_err("PQexec failed: %s\n", PQerrorMessage(primaryConn)); PQclear(res); return; } - strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0)); + strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); /* Calculate the lag */ @@ -318,57 +349,59 @@ MonitorExecute(void) /* * Build the SQL to execute on primary */ - sprintf(sqlquery, - "INSERT INTO repmgr_%s.repl_monitor " - "VALUES(%d, %d, '%s'::timestamp with time zone, " - " '%s', '%s', " - " %lld, %lld)", myClusterName, - primaryId, myLocalId, monitor_standby_timestamp, - last_wal_primary_location, - last_wal_standby_received, - (lsn_primary - lsn_standby_received), - (lsn_standby_received - lsn_standby_applied)); + sqlquery_snprintf(sqlquery, + "INSERT INTO %s.repl_monitor " + "VALUES(%d, %d, '%s'::timestamp with time zone, " + " '%s', '%s', " + " %lld, %lld)", repmgr_schema, + primary_options.node, local_options.node, monitor_standby_timestamp, + last_wal_primary_location, + last_wal_standby_received, + (lsn_primary - lsn_standby_received), + (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We * will check the result next time we pause for a monitor step. */ if (PQsendQuery(primaryConn, sqlquery) == 0) - fprintf(stderr, "Query could not be sent to primary. %s\n", - PQerrorMessage(primaryConn)); + log_warning("Query could not be sent to primary. %s\n", + PQerrorMessage(primaryConn)); } static void -checkClusterConfiguration(void) +checkClusterConfiguration(PGconn *conn, PGconn *primary) { PGresult *res; - sprintf(sqlquery, "SELECT oid FROM pg_class " - " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", - myClusterName); - res = PQexec(myLocalConn, sqlquery); + log_info(_("%s Checking cluster configuration with schema '%s'\n"), + progname, repmgr_schema); + sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " + " WHERE oid = '%s.repl_nodes'::regclass", + repmgr_schema); + res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + log_err("PQexec failed: %s\n", PQerrorMessage(conn)); PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); - exit(1); + CloseConnections(); + exit(ERR_DB_QUERY); } /* - * If there isn't any results then we have not configured a primary node yet - * in repmgr or the connection string is pointing to the wrong database. + * If there isn't any results then we have not configured a primary node + * yet in repmgr or the connection string is pointing to the wrong + * database. + * * XXX if we are the primary, should we try to create the tables needed? */ if (PQntuples(res) == 0) { - fprintf(stderr, "The replication cluster is not configured\n"); + log_err("The replication cluster is not configured\n"); PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); - exit(1); + CloseConnections(); + exit(ERR_BAD_CONFIG); } PQclear(res); } @@ -382,18 +415,20 @@ checkNodeConfiguration(char *conninfo) /* * Check if we have my node information in repl_nodes */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE id = %d AND cluster = '%s' ", - myClusterName, myLocalId, myClusterName); + log_info(_("%s Checking node %d in cluster '%s'\n"), + progname, local_options.node, local_options.cluster_name); + sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " + " WHERE id = %d AND cluster = '%s' ", + repmgr_schema, local_options.node, + local_options.cluster_name); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn)); PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); - exit(1); + CloseConnections(); + exit(ERR_BAD_CONFIG); } /* @@ -403,18 +438,22 @@ checkNodeConfiguration(char *conninfo) if (PQntuples(res) == 0) { PQclear(res); + /* Adding the node */ - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + log_info(_("%s Adding node %d to cluster '%s'\n"), + progname, local_options.node, local_options.cluster_name); + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " + "VALUES (%d, '%s', '%s')", + repmgr_schema, local_options.node, + local_options.cluster_name, + local_options.conninfo); if (!PQexec(primaryConn, sqlquery)) { - fprintf(stderr, "Cannot insert node details, %s\n", + log_err("Cannot insert node details, %s\n", PQerrorMessage(primaryConn)); - PQfinish(myLocalConn); - PQfinish(primaryConn); - exit(1); + CloseConnections(); + exit(ERR_BAD_CONFIG); } } PQclear(res); @@ -429,29 +468,33 @@ walLocationToBytes(char *wal_location) if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2) { - fprintf(stderr, "wrong log location format: %s\n", wal_location); + log_err("wrong log location format: %s\n", wal_location); return 0; } - return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff); + return (( (long long) xlogid * 16 * 1024 * 1024 * 255) + xrecoff); } -static void -help(const char *progname) +void usage(void) { - printf(_("\n%s: Replicator manager daemon \n"), progname); - printf(_("Usage:\n")); - printf(_(" %s [OPTIONS]\n"), progname); + log_err(_("%s: Replicator manager daemon \n"), progname); + log_err(_("Try \"%s --help\" for more information.\n"), progname); +} + + +void help(const char *progname) +{ + printf(_("Usage: %s [OPTIONS]\n"), progname); + printf(_("Replicator manager daemon for PostgreSQL.\n")); printf(_("\nOptions:\n")); printf(_(" --help show this help, then exit\n")); printf(_(" --version output version information, then exit\n")); printf(_(" --verbose output verbose activity information\n")); - printf(_(" -f, --config_file=PATH database to connect to\n")); + printf(_(" -f, --config_file=PATH configuration file\n")); printf(_("\n%s monitors a cluster of servers.\n"), progname); } - #ifndef WIN32 static void handle_sigint(SIGNAL_ARGS) @@ -459,6 +502,7 @@ handle_sigint(SIGNAL_ARGS) CloseConnections(); } + static void setup_cancel_handler(void) { @@ -470,13 +514,13 @@ setup_cancel_handler(void) static void CancelQuery(void) { - char errbuf[256]; + char errbuf[ERRBUFF_SIZE]; PGcancel *pgcancel; pgcancel = PQgetCancel(primaryConn); - if (!pgcancel || PQcancel(pgcancel, errbuf, 256) == 0) - fprintf(stderr, "Can't stop current query: %s", errbuf); + if (!pgcancel || PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0) + log_warning("Can't stop current query: %s\n", errbuf); PQfreeCancel(pgcancel); } diff --git a/strutil.c b/strutil.c new file mode 100644 index 0000000..6af395e --- /dev/null +++ b/strutil.c @@ -0,0 +1,87 @@ +/* + * strutil.c + * + * Copyright (C) 2ndQuadrant, 2011 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include + +#include "log.h" +#include "strutil.h" + +static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); + + +static int +xvsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int retval; + + retval = vsnprintf(str, size, format, ap); + + if (retval >= size) + { + log_err(_("Buffer of size not large enough to format entire string '%s'\n"), + str); + exit(ERR_STR_OVERFLOW); + } + + return retval; +} + + +int +xsnprintf(char *str, size_t size, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, size, format, arglist); + va_end(arglist); + + return retval; +} + + +int +sqlquery_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, QUERY_STR_LEN, format, arglist); + va_end(arglist); + + return retval; +} + + +int maxlen_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, MAXLEN, format, arglist); + va_end(arglist); + + return retval; +} diff --git a/strutil.h b/strutil.h new file mode 100644 index 0000000..ec39cbf --- /dev/null +++ b/strutil.h @@ -0,0 +1,38 @@ +/* + * strutil.h + * Copyright (C) 2ndQuadrant, 2010-2011 + * + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef _STRUTIL_H_ +#define _STRUTIL_H_ + +#include +#include + +#define QUERY_STR_LEN 8192 +#define MAXLEN 1024 +#define MAXLINELENGTH 4096 +#define MAXVERSIONSTR 16 +#define MAXCONNINFO 1024 + + +extern int xsnprintf(char *str, size_t size, const char *format, ...); +extern int sqlquery_snprintf(char *str, const char *format, ...); +extern int maxlen_snprintf(char *str, const char *format, ...); + +#endif /* _STRUTIL_H_ */ diff --git a/uninstall_repmgr.sql b/uninstall_repmgr.sql new file mode 100644 index 0000000..498a958 --- /dev/null +++ b/uninstall_repmgr.sql @@ -0,0 +1,13 @@ +/* + * uninstall_repmgr.sql + * + * Copyright (C) 2ndQuadrant, 2010-2011 + * + */ + +DROP TABLE IF EXISTS repl_nodes; +DROP TABLE IF EXISTS repl_monitor; +DROP VIEW IF EXISTS repl_status; + +DROP SCHEMA repmgr; +DROP USER repmgr;