diff --git a/.gitignore b/.gitignore index 349c501..71670b0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.o repmgr repmgrd +README.html diff --git a/Makefile b/Makefile index 655467a..dccbf2c 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,13 @@ # # Makefile +# # Copyright (c) 2ndQuadrant, 2010 +# Copyright (c) Heroku, 2010 + +repmgrd_OBJS = dbutils.o config.o repmgrd.o strutil.o +repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o strutil.o -repmgrd_OBJS = dbutils.o config.o repmgrd.o -repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o +DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) @@ -26,10 +30,19 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now +# is overriding pgxs install. install: $(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' +ifneq (,$(DATA)$(DATA_built)) + @for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \ + echo "$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'"; \ + $(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'; \ + done +endif + clean: rm -f *.o rm -f repmgrd diff --git a/README.rst b/README.rst index 18192c9..9dd5b2d 100644 --- a/README.rst +++ b/README.rst @@ -126,6 +126,35 @@ path either. The following recipe should work:: sudo PATH="/usr/pgsql-9.0/bin:$PATH" make USE_PGXS=1 install +Notes on Ubuntu, Debian or other Debian-based Builds +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Debian packages of PostgreSQL put ``pg_config`` into the development package +called ``postgresql-server-dev-$version``. + +When building repmgr against a Debian packages build, you may discover that some +development packages are needed as well. You will need the following development +packages installed:: + + sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev + +If your using Debian packages for PostgreSQL and are building repmgr with the +USE_PGXS option you also need to install the corresponding development package:: + + sudo apt-get install postgresql-server-dev-9.0 + +If you build and install repmgr manually it will not be on the system path. The +binaries will be installed in /usr/lib/postgresql/$version/bin/ which is not on +the default path. The reason behind this is that Ubuntu/Debian systems manage +multiple installed versions of PostgreSQL on the same system through a wrapper +called pg_wrapper and repmgr is not (yet) known to this wrapper. + +You can solve this in many different ways, the most Debian like is to make an +alternate for repmgr and repmgrd:: + + sudo update-alternatives --install /usr/bin/repmgr repmgr /usr/lib/postgresql/9.0/bin/repmgr 10 + sudo update-alternatives --install /usr/bin/repmgrd repmgrd /usr/lib/postgresql/9.0/bin/repmgrd 10 + Confirm software was built correctly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -136,8 +165,7 @@ is available by checking its version:: repmgr --version repmgrd --version -You may need to include -the full path of the binary instead, such as this RHEL example:: +You may need to include the full path of the binary instead, such as this RHEL example:: /usr/pgsql-9.0/bin/repmgr --version /usr/pgsql-9.0/bin/repmgrd --version diff --git a/check_dir.c b/check_dir.c index 26b0733..cdd7a7a 100644 --- a/check_dir.c +++ b/check_dir.c @@ -1,5 +1,6 @@ /* * check_dir.c - Directories management functions + * * Copyright (C) 2ndQuadrant, 2010 * * This program is free software: you can redistribute it and/or modify @@ -24,9 +25,12 @@ #include #include +/* NB: postgres_fe must be included BEFORE check_dir */ #include "postgres_fe.h" #include "check_dir.h" +#include "strutil.h" + static int mkdir_p(char *path, mode_t omode); @@ -64,7 +68,7 @@ check_dir(char *dir) } else { - result = 2; /* not empty */ + result = 2; /* not empty */ break; } } @@ -219,10 +223,11 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *dir) { - char path[8192]; - struct stat sb; + const size_t buf_sz = 8192; + char path[buf_sz]; + struct stat sb; - sprintf(path, "%s/PG_VERSION", dir); + xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); return (stat(path, &sb) == 0) ? true : false; } diff --git a/config.c b/config.c index 4499509..868ec72 100644 --- a/config.c +++ b/config.c @@ -17,12 +17,12 @@ * */ +#include "config.h" #include "repmgr.h" - -#define MAXLINELENGTH 4096 +#include "strutil.h" void -parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo) +parse_config(const char *config_file, repmgr_config *config) { char *s, buff[MAXLINELENGTH]; char name[MAXLEN]; @@ -30,9 +30,17 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni FILE *fp = fopen (config_file, "r"); - if (fp == NULL) - return; - + if (fp == NULL) { + fprintf(stderr, _("Could not find configuration file '%s'\n"), config_file); + exit(1); + } + + /* Initialize */ + memset(config->cluster_name, 0, sizeof(config->cluster_name)); + config->node = -1; + memset(config->conninfo, 0, sizeof(config->conninfo)); + memset(config->rsync_options, 0, sizeof(config->rsync_options)); + /* Read next line */ while ((s = fgets (buff, sizeof buff, fp)) != NULL) { @@ -45,18 +53,34 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni /* Copy into correct entry in parameters struct */ if (strcmp(name, "cluster") == 0) - strncpy (cluster_name, value, MAXLEN); + strncpy (config->cluster_name, value, MAXLEN); else if (strcmp(name, "node") == 0) - *node = atoi(value); + config->node = atoi(value); else if (strcmp(name, "conninfo") == 0) - strncpy (conninfo, value, MAXLEN); + strncpy (config->conninfo, value, MAXLEN); + else if (strcmp(name, "rsync_options") == 0) + strncpy (config->rsync_options, value, QUERY_STR_LEN); else printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value); } - /* Close file */ fclose (fp); + + /* Check config settings */ + if (strnlen(config->cluster_name, MAXLEN)==0) + { + fprintf(stderr, "Cluster name is missing. " + "Check the configuration file.\n"); + exit(1); + } + + if (config->node == -1) + { + fprintf(stderr, "Node information is missing. " + "Check the configuration file.\n"); + exit(1); + } } char * diff --git a/config.h b/config.h index 9b2ea3f..9241d1d 100644 --- a/config.h +++ b/config.h @@ -16,7 +16,22 @@ * along with this program. If not, see . * */ +#ifndef _CONFIG_H_ +#define _CONFIG_H_ -void parse_config(const char *config_file, char *cluster_name, int *node, char *service); +#include "strutil.h" + + +typedef struct +{ + char cluster_name[MAXLEN]; + int node; + char conninfo[MAXLEN]; + char rsync_options[QUERY_STR_LEN]; +} repmgr_config; + +void parse_config(const char *config_file, repmgr_config *config); void parse_line(char *buff, char *name, char *value); char *trim(char *s); + +#endif diff --git a/dbutils.c b/dbutils.c index 38b8c05..9756aa2 100644 --- a/dbutils.c +++ b/dbutils.c @@ -19,8 +19,7 @@ #include "repmgr.h" -#define MAXQUERY 8192 -#define MAXCONNINFO 1024 +#include "strutil.h" PGconn * establishDBConnection(const char *conninfo, const bool exit_on_error) @@ -32,7 +31,8 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) if ((PQstatus(conn) != CONNECTION_OK)) { fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + PQerrorMessage(conn)); + if (exit_on_error) { PQfinish(conn); @@ -44,7 +44,6 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) } - bool is_standby(PGconn *conn) { @@ -52,6 +51,7 @@ is_standby(PGconn *conn) bool result; res = PQexec(conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn)); @@ -79,11 +79,15 @@ pg_version(PGconn *conn, char* major_version) { PGresult *res; - int major_version1; - char *major_version2; + int major_version1; + char *major_version2; + + res = PQexec(conn, + "WITH pg_version(ver) AS " + "(SELECT split_part(version(), ' ', 2)) " + "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) " + "FROM pg_version"); - res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) " - "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); @@ -91,29 +95,33 @@ pg_version(PGconn *conn, char* major_version) PQfinish(conn); exit(1); } + major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version2 = PQgetvalue(res, 0, 1); - PQclear(res); if (major_version1 >= 9) { /* form a major version string */ - snprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, major_version2); + xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, + major_version2); } else strcpy(major_version, ""); + PQclear(res); + return major_version; } bool -guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value) +guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value) { PGresult *res; - char sqlquery[MAXQUERY]; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT true FROM pg_settings " + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " " WHERE name = '%s' AND setting %s '%s'", parameter, op, value); @@ -140,11 +148,13 @@ const char * get_cluster_size(PGconn *conn) { PGresult *res; - const char *size; - char sqlquery[MAXQUERY]; + const char *size; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " - " FROM pg_database "); + sqlquery_snprintf( + sqlquery, + "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " + " FROM pg_database "); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -162,26 +172,57 @@ get_cluster_size(PGconn *conn) /* * get a connection to master by reading repl_nodes, creating a connection * to each node (one at a time) and finding if it is a master or a standby + * + * NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to + * point to allocated memory of MAXCONNINFO in length, and the master server + * connection string is placed there. */ PGconn * -getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) +getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out) { - PGconn *master_conn = NULL; - PGresult *res1; - PGresult *res2; - char sqlquery[MAXQUERY]; - char master_conninfo[MAXCONNINFO]; + PGconn *master_conn = NULL; + PGresult *res1; + PGresult *res2; + char sqlquery[QUERY_STR_LEN]; + char master_conninfo_stack[MAXCONNINFO]; + char *master_conninfo = &*master_conninfo_stack; + char schema_str[MAXLEN]; + char schema_quoted[MAXLEN]; + int i; + /* + * If the caller wanted to get a copy of the connection info string, sub + * out the local stack pointer for the pointer passed by the caller. + */ + if (master_conninfo_out != NULL) + master_conninfo = master_conninfo_out; + + /* + * XXX: This is copied in at least two other procedures + * + * Assemble the unquoted schema name + */ + maxlen_snprintf(schema_str, "repmgr_%s", cluster); + { + char *identifier = PQescapeIdentifier(standby_conn, schema_str, + strlen(schema_str)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* find all nodes belonging to this cluster */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE cluster = '%s' and id <> %d", - cluster, cluster, id); + sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " + " WHERE cluster = '%s' and id <> %d", + schema_quoted, cluster, id); res1 = PQexec(standby_conn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn)); + fprintf(stderr, "Can't get nodes info: %s\n", + PQerrorMessage(standby_conn)); PQclear(res1); PQfinish(standby_conn); exit(1); @@ -193,18 +234,21 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) *master_id = atoi(PQgetvalue(res1, i, 0)); strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO); master_conn = establishDBConnection(master_conninfo, false); + if (PQstatus(master_conn) != CONNECTION_OK) continue; /* * I can't use the is_standby() function here because on error that - * function closes the connection i pass and exit, but i still need to close - * standby_conn + * function closes the connection i pass and exit, but i still need to + * close standby_conn */ res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res2) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn)); + fprintf(stderr, "Can't get recovery state from this node: %s\n", + PQerrorMessage(master_conn)); PQclear(res2); PQfinish(master_conn); continue; @@ -229,7 +273,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) /* If we finish this loop without finding a master then * we doesn't have the info or the master has failed (or we * reached max_connections or superuser_reserved_connections, - * anything else i'm missing?), + * anything else I'm missing?). + * * Probably we will need to check the error to know if we need * to start failover procedure or just fix some situation on the * standby. @@ -237,4 +282,3 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) PQclear(res1); return NULL; } - diff --git a/dbutils.h b/dbutils.h index cc5830d..4aa28c9 100644 --- a/dbutils.h +++ b/dbutils.h @@ -18,8 +18,10 @@ */ PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error); -bool is_standby(PGconn *conn); +bool is_standby(PGconn *conn); char *pg_version(PGconn *conn, char* major_version); -bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); -const char *get_cluster_size(PGconn *conn); -PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id); +bool guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value); +const char *get_cluster_size(PGconn *conn); +PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out); diff --git a/repmgr.c b/repmgr.c index 8e2ce89..cfee11a 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1,6 +1,6 @@ /* * repmgr.c - Command interpreter for the repmgr - * Copyright (C) 2ndQuadrant, 2010 + * * * This module is a command-line utility to easily setup a cluster of * hot standby servers for an HA environment @@ -32,22 +32,22 @@ #include #include "check_dir.h" +#include "strutil.h" #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" -#define NO_ACTION 0 /* Not a real action, just to initialize */ -#define MASTER_REGISTER 1 +#define NO_ACTION 0 /* Not a real action, just to initialize */ +#define MASTER_REGISTER 1 #define STANDBY_REGISTER 2 -#define STANDBY_CLONE 3 -#define STANDBY_PROMOTE 4 -#define STANDBY_FOLLOW 5 - -#define QUERY_STR_LEN 8192 +#define STANDBY_CLONE 3 +#define STANDBY_PROMOTE 4 +#define STANDBY_FOLLOW 5 static void help(const char *progname); -static bool create_recovery_file(const char *data_dir); -static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); +static bool create_recovery_file(const char *data_dir, char *master_conninfo); +static int copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static void do_master_register(void); @@ -77,6 +77,7 @@ char *masterport = NULL; char *server_mode = NULL; char *server_cmd = NULL; +repmgr_config config = {}; int main(int argc, char **argv) @@ -117,7 +118,8 @@ main(int argc, char **argv) } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, + &optindex)) != -1) { switch (c) { @@ -128,20 +130,20 @@ main(int argc, char **argv) host = optarg; break; case 'p': - masterport = optarg; - break; + masterport = optarg; + break; case 'U': username = optarg; break; case 'D': dest_dir = optarg; - break; + break; case 'f': config_file = optarg; - break; + break; case 'R': remote_user = optarg; - break; + break; case 'w': wal_keep_segments = optarg; break; @@ -152,7 +154,8 @@ main(int argc, char **argv) verbose = true; break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } @@ -168,9 +171,11 @@ main(int argc, char **argv) if (optind < argc) { server_mode = argv[optind++]; - if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0) + if (strcasecmp(server_mode, "STANDBY") != 0 && + strcasecmp(server_mode, "MASTER") != 0) { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } @@ -199,7 +204,8 @@ main(int argc, char **argv) action = STANDBY_FOLLOW; else { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } @@ -225,7 +231,8 @@ main(int argc, char **argv) default: fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"), progname, argv[optind + 1]); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } @@ -234,8 +241,10 @@ main(int argc, char **argv) if (config_file == NULL) { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); + const int buf_sz = 3 + sizeof(CONFIG_FILE); + + config_file = malloc(buf_sz); + xsnprintf(config_file, buf_sz, "./%s", CONFIG_FILE); } if (wal_keep_segments == NULL) @@ -254,6 +263,22 @@ main(int argc, char **argv) dbname = "postgres"; } + /* + * Read the configuration file: repmgr.conf, but only if we're not doing a + * STANDBY CLONE action: it is not necessary to have the configuration file + * in that case. + */ + if (action != STANDBY_CLONE) + { + parse_config(config_file, &config); + if (config.node == -1) + { + fprintf(stderr, "Node information is missing. " + "Check the configuration file.\n"); + exit(1); + } + } + keywords[2] = "user"; values[2] = username; keywords[3] = "dbname"; @@ -281,7 +306,8 @@ main(int argc, char **argv) do_standby_follow(); break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } @@ -292,36 +318,24 @@ main(int argc, char **argv) static void do_master_register(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char sqlquery[QUERY_STR_LEN]; + char schema_str[MAXLEN]; + char schema_quoted[MAXLEN]; bool schema_exists = false; - char master_version[MAXVERSIONSTR]; - - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } + char master_version[MAXVERSIONSTR]; - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(config.conninfo, true); /* master should be v9 or better */ pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -333,12 +347,25 @@ do_master_register(void) return; } + /* Assemble the unquoted schema name */ + maxlen_snprintf(schema_str, "repmgr_%s", config.cluster_name); + { + char *identifier = PQescapeIdentifier(conn, schema_str, + strlen(schema_str)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace " + "WHERE nspname = '%s'", schema_str); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn)); + fprintf(stderr, "Can't get info about schemas: %s\n", + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; @@ -348,7 +375,7 @@ do_master_register(void) { if (!force) /* and we are not forcing so error */ { - fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName); + fprintf(stderr, "Schema %s already exists.", schema_quoted); PQclear(res); PQfinish(conn); return; @@ -360,69 +387,75 @@ do_master_register(void) if (!schema_exists) { /* ok, create the schema */ - sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", schema_quoted); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n", - myClusterName, PQerrorMessage(conn)); + fprintf(stderr, "Cannot create the schema %s: %s\n", schema_quoted, + PQerrorMessage(conn)); PQfinish(conn); return; } /* ... the tables */ - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " - " id integer primary key, " - " cluster text not null, " - " conninfo text not null)", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " + " id integer primary key, " + " cluster text not null, " + " conninfo text not null)", schema_quoted); if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_nodes: %s\n", - myClusterName, PQerrorMessage(conn)); + fprintf(stderr, + config.cluster_name, PQerrorMessage(conn)); PQfinish(conn); return; } - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " - " primary_node INTEGER NOT NULL, " - " standby_node INTEGER NOT NULL, " - " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " - " last_wal_primary_location TEXT NOT NULL, " - " last_wal_standby_location TEXT NOT NULL, " - " replication_lag BIGINT NOT NULL, " - " apply_lag BIGINT NOT NULL) ", myClusterName); - if (!PQexec(conn, sqlquery)) - { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_monitor: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); - return; - } + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( " + " primary_node INTEGER NOT NULL, " + " standby_node INTEGER NOT NULL, " + " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " + " last_wal_primary_location TEXT NOT NULL, " + " last_wal_standby_location TEXT NOT NULL, " + " replication_lag BIGINT NOT NULL, " + " apply_lag BIGINT NOT NULL) ", + schema_quoted); + } - /* and the view */ - sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " - " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " - " ORDER BY last_monitor_time desc) " - " FROM repmgr_%s.repl_monitor) " - " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " - " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " - " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " - " FROM monitor_info a " - " WHERE row_number = 1", myClusterName, myClusterName); - if (!PQexec(conn, sqlquery)) - { - fprintf(stderr, "Cannot create the view repmgr_%s.repl_status: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); - return; - } + if (!PQexec(conn, sqlquery)) + { + fprintf(stderr, + config.cluster_name, PQerrorMessage(conn)); + PQfinish(conn); + return; + } + + /* and the view */ + sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " + " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " + " ORDER BY last_monitor_time desc) " + " FROM %s.repl_monitor) " + " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " + " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " + " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " + " FROM monitor_info a " + " WHERE row_number = 1", + schema_quoted, schema_quoted); + + if (!PQexec(conn, sqlquery)) + { + fprintf(stderr, + config.cluster_name, PQerrorMessage(conn)); + PQfinish(conn); + return; } else { PGconn *master_conn; - int id; + int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); + master_conn = getMasterConnection(conn, config.node, + config.cluster_name, &id, NULL); + if (master_conn != NULL) { PQfinish(master_conn); @@ -434,27 +467,28 @@ do_master_register(void) /* Now register the master */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sqlquery_snprintf(sqlquery, + "DELETE FROM %s.repl_nodes WHERE id = %d", + schema_quoted, config.node); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", - PQerrorMessage(conn)); + PQerrorMessage(conn)); PQfinish(conn); return; } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " + "VALUES (%d, '%s', '%s')", + schema_quoted, config.node, config.cluster_name, + config.conninfo); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(conn)); + PQerrorMessage(conn)); PQfinish(conn); return; } @@ -467,39 +501,29 @@ do_master_register(void) static void do_standby_register(void) { - PGconn *conn; + PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char sqlquery[QUERY_STR_LEN]; + char schema_str[MAXLEN]; + char schema_quoted[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } + conn = establishDBConnection(config.conninfo, true); - conn = establishDBConnection(conninfo, true); + /* XXX: A lot of copied code from do_master_register! Refactor */ /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -511,20 +535,35 @@ do_standby_register(void) return; } + /* Assemble the unquoted schema name */ + maxlen_snprintf(schema_str, "repmgr_%s", config.cluster_name); + { + char *identifier = PQescapeIdentifier(conn, schema_str, + strlen(schema_str)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace " + " WHERE nspname = '%s'", schema_str); + res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + fprintf(stderr, "Can't get info about tablespaces: %s\n", + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } - if (PQntuples(res) == 0) /* schema doesn't exists */ + /* schema doesn't exist */ + if (PQntuples(res) == 0) { - fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName); + fprintf(stderr, "Schema %s doesn't exist.\n", schema_quoted); PQclear(res); PQfinish(conn); return; @@ -532,7 +571,9 @@ do_standby_register(void) PQclear(res); /* check if there is a master in this cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + master_conn = getMasterConnection(conn, config.node, config.cluster_name, + &master_id, NULL); + if (!master_conn) return; @@ -542,7 +583,8 @@ do_standby_register(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -552,7 +594,7 @@ do_standby_register(void) PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), - progname, master_version, standby_version); + progname, master_version, standby_version); return; } @@ -560,28 +602,30 @@ do_standby_register(void) /* Now register the standby */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sqlquery_snprintf(sqlquery, + "DELETE FROM %s.repl_nodes " + " WHERE id = %d", + schema_quoted, config.node); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", - PQerrorMessage(master_conn)); + PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " + "VALUES (%d, '%s', '%s')", + schema_quoted, config.node, config.cluster_name, + config.conninfo); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(master_conn)); + PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; @@ -596,9 +640,9 @@ do_standby_register(void) static void do_standby_clone(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; int r = 0; int i; @@ -611,8 +655,8 @@ do_standby_clone(void) char master_control_file[MAXLEN]; char local_control_file[MAXLEN]; - const char *first_wal_segment = NULL; - const char *last_wal_segment = NULL; + char *first_wal_segment = NULL; + const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; @@ -626,46 +670,46 @@ do_standby_clone(void) /* Check this directory could be used as a PGDATA dir */ switch (check_dir(dest_dir)) { - case 0: - /* dest_dir not there, must create it */ + case 0: + /* dest_dir not there, must create it */ if (verbose) - printf(_("creating directory %s ... "), dest_dir); - fflush(stdout); + printf(_("creating directory %s ... "), dest_dir); + fflush(stdout); - if (!create_directory(dest_dir)) + if (!create_directory(dest_dir)) { - fprintf(stderr, _("%s: couldn't create directory %s ... "), + fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); return; } - break; - case 1: - /* Present but empty, fix permissions and use it */ + break; + case 1: + /* Present but empty, fix permissions and use it */ if (verbose) - printf(_("fixing permissions on existing directory %s ... "), - dest_dir); - fflush(stdout); + printf(_("fixing permissions on existing directory %s ... "), + dest_dir); + fflush(stdout); if (!set_directory_permissions(dest_dir)) - { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); + { + fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), + progname, dest_dir, strerror(errno)); return; - } - break; - case 2: - /* Present and not empty */ - fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, dest_dir); + } + break; + case 2: + /* Present and not empty */ + fprintf(stderr, + _("%s: directory \"%s\" exists but is not empty\n"), + progname, dest_dir); - pg_dir = is_pg_dir(dest_dir); - if (pg_dir && !force) + pg_dir = is_pg_dir(dest_dir); + if (pg_dir && !force) { fprintf(stderr, _("\nThis looks like a PostgreSQL directroy.\n" - "If you are sure you want to clone here, " - "please check there is no PostgreSQL server " - "running and use the --force option\n")); + "If you are sure you want to clone here, " + "please check there is no PostgreSQL server " + "running and use the --force option\n")); return; } else if (pg_dir && force) @@ -673,12 +717,12 @@ do_standby_clone(void) /* Let it continue */ break; } - else + else return; - default: - /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); + default: + /* Trouble accessing directory */ + fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), + progname, dest_dir, strerror(errno)); } /* Connection parameters for master only */ @@ -692,7 +736,7 @@ do_standby_clone(void) if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), - progname); + progname); return; } @@ -701,7 +745,8 @@ do_standby_clone(void) if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -723,7 +768,7 @@ do_standby_clone(void) if (!guc_setted(conn, "wal_keep_segments", ">=", wal_keep_segments)) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), wal_keep_segments, progname); + fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), progname, wal_keep_segments); return; } if (!guc_setted(conn, "archive_mode", "=", "on")) @@ -736,8 +781,13 @@ do_standby_clone(void) if (verbose) printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); - /* Check if the tablespace locations exists and that we can write to them */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + /* + * Check if the tablespace locations exists and that we can write to them. + */ + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + "WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -754,64 +804,70 @@ do_standby_clone(void) /* Check this directory could be used as a PGDATA dir */ switch (check_dir(tblspc_dir)) { - case 0: - /* tblspc_dir not there, must create it */ + case 0: + /* tblspc_dir not there, must create it */ if (verbose) - printf(_("creating directory \"%s\"... "), tblspc_dir); - fflush(stdout); + printf(_("creating directory \"%s\"... "), tblspc_dir); + fflush(stdout); - if (!create_directory(tblspc_dir)) + if (!create_directory(tblspc_dir)) { - fprintf(stderr, _("%s: couldn't create directory \"%s\"... "), + fprintf(stderr, + _("%s: couldn't create directory \"%s\"... "), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } - break; - case 1: - /* Present but empty, fix permissions and use it */ + break; + case 1: + /* Present but empty, fix permissions and use it */ if (verbose) - printf(_("fixing permissions on existing directory \"%s\"... "), - tblspc_dir); - fflush(stdout); - - if (!set_directory_permissions(tblspc_dir)) - { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), - progname, tblspc_dir, strerror(errno)); + printf(_("fixing permissions on existing directory \"%s\"... "), + tblspc_dir); + fflush(stdout); + + if (!set_directory_permissions(tblspc_dir)) + { + fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), + progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; - } - break; - case 2: - /* Present and not empty */ - if (!force) - { + } + break; + case 2: + /* Present and not empty */ + if (!force) + { + fprintf( + stderr, + _("%s: directory \"%s\" exists but is not empty\n"), + progname, tblspc_dir); + PQclear(res); + PQfinish(conn); + return; + } + default: + /* Trouble accessing directory */ fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, tblspc_dir); + _("%s: could not access directory \"%s\": %s\n"), + progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; - } - default: - /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), - progname, tblspc_dir, strerror(errno)); - PQclear(res); - PQfinish(conn); - return; } } fprintf(stderr, "Starting backup...\n"); /* Get the data directory full path and the configuration files location */ - sprintf(sqlquery, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + sqlquery_snprintf( + sqlquery, + "SELECT name, setting " + " FROM pg_settings " + " WHERE name IN ('data_directory', 'config_file', 'hba_file', " + " 'ident_file')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -839,7 +895,10 @@ do_standby_clone(void) * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); + sqlquery_snprintf( + sqlquery, + "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", + time(NULL)); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -848,60 +907,80 @@ do_standby_clone(void) PQfinish(conn); return; } - first_wal_segment = PQgetvalue(res, 0, 0); + + if (verbose) + { + char *first_wal_seg_pq = PQgetvalue(res, 0, 0); + size_t buf_sz = strlen(first_wal_seg_pq); + + first_wal_segment = malloc(buf_sz + 1); + xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq); + } + PQclear(res); /* * 1) first move global/pg_control * - * 2) then move data_directory ommiting the files we have already moved and pg_xlog - * content + * 2) then move data_directory ommiting the files we have already moved and + * pg_xlog content * - * 3) finally We need to backup configuration files (that could be on other directories, debian - * like systems likes to do that), so look at config_file, hba_file and ident_file but we - * can omit external_pid_file ;) + * 3) finally We need to backup configuration files (that could be on other + * directories, debian like systems likes to do that), so look at + * config_file, hba_file and ident_file but we can omit + * external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ - sprintf(master_control_file, "%s/global/pg_control", master_data_directory); - sprintf(local_control_file, "%s/global", dest_dir); + maxlen_snprintf(master_control_file, "%s/global/pg_control", + master_data_directory); + maxlen_snprintf(local_control_file, "%s/global", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), - progname, dest_dir); + progname, dest_dir); goto stop_backup; } - r = copy_remote_files(host, remote_user, master_control_file, local_control_file, false); + r = copy_remote_files(host, remote_user, master_control_file, + local_control_file, false); if (r != 0) goto stop_backup; - r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, true); + r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, + true); if (r != 0) goto stop_backup; /* - * Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate - * rsync option but besides we could someday make all these rsync happen concurrently + * Copy tablespace locations, i'm doing this separately because i couldn't + * find and appropiate rsync option but besides we could someday make all + * these rsync happen concurrently */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + " WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + fprintf(stderr, "Can't get info about tablespaces: %s\n", + PQerrorMessage(conn)); PQclear(res); goto stop_backup; } for (i = 0; i < PQntuples(res); i++) { - r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), PQgetvalue(res, i, 0), true); + r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), + PQgetvalue(res, i, 0), true); if (r != 0) goto stop_backup; } - r = copy_remote_files(host, remote_user, master_config_file, dest_dir, false); + r = copy_remote_files(host, remote_user, master_config_file, dest_dir, + false); if (r != 0) goto stop_backup; @@ -909,7 +988,8 @@ do_standby_clone(void) if (r != 0) goto stop_backup; - r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, false); + r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, + false); if (r != 0) goto stop_backup; @@ -919,13 +999,13 @@ do_standby_clone(void) if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), - progname); + progname); return; } fprintf(stderr, "Finishing backup...\n"); - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); + sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -935,6 +1015,21 @@ do_standby_clone(void) return; } last_wal_segment = PQgetvalue(res, 0, 0); + + if (verbose) + { + printf( + _("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); + + /* + * Only free the first_wal_segment since it was copied out of the + * pqresult. + */ + free(first_wal_segment); + first_wal_segment = NULL; + } + PQclear(res); PQfinish(conn); @@ -942,22 +1037,24 @@ do_standby_clone(void) if (r != 0) return; - if (verbose) - printf(_("%s requires primary to keep WAL files %s until at least %s\n"), - progname, first_wal_segment, last_wal_segment); - - /* we need to create the pg_xlog sub directory too, i'm reusing a variable here */ - sprintf(local_control_file, "%s/pg_xlog", dest_dir); + /* + * We need to create the pg_xlog sub directory too, I'm reusing a variable + * here. + */ + maxlen_snprintf(local_control_file, "%s/pg_xlog", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), - progname, dest_dir); + progname, dest_dir); } /* Finally, write the recovery.conf file */ - create_recovery_file(dest_dir); + create_recovery_file(dest_dir, NULL); - /* We don't start the service because we still may want to move the directory */ + /* + * We don't start the service because we still may want to move the + * directory + */ return; } @@ -965,14 +1062,10 @@ do_standby_clone(void) static void do_standby_promote(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char sqlquery[QUERY_STR_LEN]; + char script[MAXLEN]; PGconn *old_master_conn; int old_master_id; @@ -984,38 +1077,31 @@ do_standby_promote(void) char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } - /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(config.conninfo, true); /* we need v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); return; } /* Check we are in a standby node */ if (!is_standby(conn)) { - fprintf(stderr, "repmgr: The command should be executed in a standby node\n"); + fprintf(stderr, + "repmgr: The command should be executed in a standby node\n"); return; } /* we also need to check if there isn't any master already */ - old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, &old_master_id); + old_master_conn = getMasterConnection(conn, config.node, config.cluster_name, + &old_master_id, NULL); + if (old_master_conn != NULL) { PQfinish(old_master_conn); @@ -1027,12 +1113,13 @@ do_standby_promote(void) printf(_("\n%s: Promoting standby...\n"), progname); /* Get the data directory full path and the last subdirectory */ - sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + sqlquery_snprintf(sqlquery, "SELECT setting " + " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); + fprintf(stderr, "Can't get info about data directory: %s\n", + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; @@ -1041,12 +1128,12 @@ do_standby_promote(void) PQclear(res); PQfinish(conn); - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); - sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1055,16 +1142,17 @@ do_standby_promote(void) } /* reconnect to check we got promoted */ + /* - * XXX i'm removing this because it gives an annoying message saying couldn't connect - * but is just the server starting up - * conn = establishDBConnection(conninfo, true); - * if (is_standby(conn)) - * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); - * else - * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); - * PQfinish(conn); - */ + * XXX i'm removing this because it gives an annoying message saying + * couldn't connect but is just the server starting up + * conn = establishDBConnection(config.conninfo, true); + * if (is_standby(conn)) + * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); + * else + * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); + * PQfinish(conn); + */ return; } @@ -1073,15 +1161,12 @@ do_standby_promote(void) static void do_standby_follow(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; - - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char sqlquery[QUERY_STR_LEN]; + char script[MAXLEN]; + char master_conninfo[MAXLEN]; PGconn *master_conn; int master_id; @@ -1091,19 +1176,8 @@ do_standby_follow(void) char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; - /* - * Read the configuration file: repmgr.conf - */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) - { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(1); - } - /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(config.conninfo, true); /* Check we are in a standby node */ if (!is_standby(conn)) @@ -1122,7 +1196,9 @@ do_standby_follow(void) } /* we also need to check if there is any master in the cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + master_conn = getMasterConnection(conn, config.node, config.cluster_name, + &master_id, (char *) &master_conninfo); + if (master_conn == NULL) { PQfinish(conn); @@ -1134,7 +1210,8 @@ do_standby_follow(void) if (is_standby(master_conn)) { PQfinish(conn); - fprintf(stderr, "%s: The node to follow should be a master\n", progname); + fprintf(stderr, "%s: The node to follow should be a master\n", + progname); return; } @@ -1144,7 +1221,8 @@ do_standby_follow(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -1154,7 +1232,7 @@ do_standby_follow(void) PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), - progname, master_version, standby_version); + progname, master_version, standby_version); return; } @@ -1163,9 +1241,20 @@ do_standby_follow(void) * before closing the connection because we will need them to * recreate the recovery.conf file */ - host = malloc(20); + + /* + * Copy the hostname to the 'host' global variable from the master + * connection. + */ + { + char *pqhost = PQhost(master_conn); + const int host_buf_sz = strlen(pqhost); + + host = malloc(host_buf_sz + 1); + xsnprintf(host, host_buf_sz, "%s", pqhost); + } + masterport = malloc(10); - strcpy(host, PQhost(master_conn)); strcpy(masterport, PQport(master_conn)); PQfinish(master_conn); @@ -1173,12 +1262,13 @@ do_standby_follow(void) printf(_("\n%s: Changing standby's master...\n"), progname); /* Get the data directory full path */ - sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + sqlquery_snprintf(sqlquery, "SELECT setting " + " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); + fprintf(stderr, "Can't get info about data directory: %s\n", + PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; @@ -1188,12 +1278,12 @@ do_standby_follow(void) PQfinish(conn); /* write the recovery.conf file */ - if (!create_recovery_file(data_dir)) + if (!create_recovery_file(data_dir, master_conninfo)) return; /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1210,44 +1300,50 @@ help(const char *progname) { printf(_("\n%s: Replicator manager \n"), progname); printf(_("Usage:\n")); - printf(_(" %s [OPTIONS] master {register}\n"), progname); - printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname); + printf(_(" %s [OPTIONS] master {register}\n"), progname); + printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), + progname); printf(_("\nGeneral options:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); printf(_("\nConnection options:\n")); - printf(_(" -d, --dbname=DBNAME database to connect to\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); - printf(_(" -p, --port=PORT database server port\n")); - printf(_(" -U, --username=USERNAME database user name to connect as\n")); + printf(_(" -d, --dbname=DBNAME database to connect to\n")); + printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); + printf(_(" -p, --port=PORT database server port\n")); + printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_("\nConfiguration options:\n")); - printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); - printf(_(" -f, --config_file=PATH path to the configuration file\n")); - printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); - printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); - printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); + printf(_(" -f, --config_file=PATH path to the configuration file\n")); + printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); + printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); + printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("or making follow another node and then exits.\n")); printf(_("COMMANDS:\n")); - printf(_(" master register - registers the master in a cluster\n")); - printf(_(" standby register - registers a standby in a cluster\n")); - printf(_(" standby clone [node] - allows creation of a new standby\n")); - printf(_(" standby promote - allows manual promotion of a specific standby into a ")); + printf(_(" master register - registers the master in a cluster\n")); + printf(_(" standby register - registers a standby in a cluster\n")); + printf(_(" standby clone [node] - allows creation of a new standby\n")); + printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_("new master in the event of a failover\n")); - printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); + printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); } +/* + * Creates a recovery file for a standby. + * + * Writes master_conninfo to recovery.conf if is non-NULL + */ static bool -create_recovery_file(const char *data_dir) +create_recovery_file(const char *data_dir, char *master_conninfo) { FILE *recovery_file; char recovery_file_path[MAXLEN]; char line[MAXLEN]; - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); recovery_file = fopen(recovery_file_path, "w"); if (recovery_file == NULL) @@ -1256,7 +1352,7 @@ create_recovery_file(const char *data_dir) return false; } - sprintf(line, "standby_mode = 'on'\n"); + maxlen_snprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); @@ -1264,7 +1360,34 @@ create_recovery_file(const char *data_dir) return false; } - sprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, ((masterport==NULL) ? "5432" : masterport)); + /* + * Template a password into the connection string in recovery.conf. + * Sometimes this is passed by the user explicitly, and otherwise we try to + * get it into th environment + * + * XXX: This is pretty dirty, at least push this up to the caller rather + * than hitting environment variables at this level. + */ + if (master_conninfo == NULL) + { + char *password = getenv("PGPASSWORD"); + + if (password == NULL) + { + fprintf(stderr, + _("%s: Panic! PGPASSWORD not set, how can we get here?\n"), + progname); + exit(255); + } + + maxlen_snprintf(line, + "primary_conninfo = 'host=%s port=%s password=%s'\n", + host, ((masterport==NULL) ? "5432" : masterport), + password); + } + else + maxlen_snprintf(line, "primary_conninfo = '%s'\n", master_conninfo); + if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); @@ -1280,46 +1403,55 @@ create_recovery_file(const char *data_dir) static int -copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) +copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory) { - char script[QUERY_STR_LEN]; - char options[QUERY_STR_LEN]; - char host_string[QUERY_STR_LEN]; - int r; + char script[MAXLEN]; + char options[MAXLEN]; + char host_string[MAXLEN]; + int r; + + if (strnlen(config.rsync_options, MAXLEN) == 0) + maxlen_snprintf( + options, "%s", + "--archive --checksum --compress --progress --rsh=ssh"); + else + maxlen_snprintf(options, "%s", config.rsync_options); - sprintf(options, "--archive --checksum --compress --progress --rsh=ssh"); if (force) strcat(options, " --delete"); if (remote_user == NULL) { - sprintf(host_string,"%s",host); + maxlen_snprintf(host_string, "%s", host); } else { - sprintf(host_string,"%s@%s",remote_user,host); + maxlen_snprintf(host_string,"%s@%s",remote_user,host); } if (is_directory) { - strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); - sprintf(script, "rsync %s %s:%s/* %s", + strcat(options, + " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); + maxlen_snprintf(script, "rsync %s %s:%s/* %s", options, host_string, remote_path, local_path); } else { - sprintf(script, "rsync %s %s:%s %s/.", + maxlen_snprintf(script, "rsync %s %s:%s %s/.", options, host_string, remote_path, local_path); } if (verbose) - printf("rsync command line: '%s'\n",script); + printf("rsync command line: '%s'\n",script); r = system(script); if (r != 0) - fprintf(stderr, _("Can't rsync from remote file or directory (%s:%s)\n"), - host_string, remote_path); + fprintf(stderr, + _("Can't rsync from remote file or directory (%s:%s)\n"), + host_string, remote_path); return r; } @@ -1341,17 +1473,19 @@ check_parameters_for_action(const int action) * all other parameters are at least useless and could be * confusing so reject them */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a MASTER REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for MASTER REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1361,17 +1495,19 @@ check_parameters_for_action(const int action) * we don't need connection parameters to the master * because we can detect the master in repl_nodes */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1382,17 +1518,19 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the promote action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY PROMOTE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY PROMOTE command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1403,32 +1541,46 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the follow action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY FOLLOW command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY FOLLOW command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; - case STANDBY_CLONE: - /* - * To clone a master into a standby we need connection parameters - * repmgr.conf is useless because we don't have a server running - * in the standby - */ - if (config_file != NULL) - { - fprintf(stderr, "\nYou need to use connection parameters to the master when issuing a STANDBY CLONE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - ok = false; - } + case STANDBY_CLONE: + /* + * Issue a friendly notice that the configuration file is not + * necessary nor read at all in when performing a STANDBY CLONE + * action. + */ + if (config_file != NULL) + fprintf(stderr, "NOTICE: The passed configuration file is not " + "required nor used when performing the STANDBY CLONE " + "command.\n"); + + /* + * To clone a master into a standby we need connection parameters + * repmgr.conf is useless because we don't have a server running in + * the standby; warn the user, but keep going. + */ + if (host == NULL) + { + fprintf(stderr, "\nYou need to use connection parameters to " + "the master when issuing a STANDBY CLONE command."); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + ok = false; + } break; } diff --git a/repmgr.conf b/repmgr.conf index f728750..a707dc4 100644 --- a/repmgr.conf +++ b/repmgr.conf @@ -1,3 +1,4 @@ cluster=test node=2 conninfo='host=192.168.204.104' +rsync_options=--archive --checksum --compress --progress --rsh=ssh diff --git a/repmgr.h b/repmgr.h index ec32b99..770063b 100644 --- a/repmgr.h +++ b/repmgr.h @@ -1,5 +1,6 @@ /* * repmgr.h + * * Copyright (c) 2ndQuadrant, 2010 * * This program is free software: you can redistribute it and/or modify @@ -16,23 +17,22 @@ * along with this program. If not, see . * */ - -#ifndef _REPMGR_H_ -#define _REPMGR_H_ - #include "postgres_fe.h" #include "getopt_long.h" #include "libpq-fe.h" + +#ifndef _REPMGR_H_ +#define _REPMGR_H_ + #include "dbutils.h" -#include "config.h" #define PRIMARY_MODE 0 #define STANDBY_MODE 1 -#define MAXLEN 80 #define CONFIG_FILE "repmgr.conf" -#define MAXVERSIONSTR 16 + +#include "config.h" #endif diff --git a/repmgr.sql b/repmgr.sql index f28e6d8..4a30081 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -1,3 +1,10 @@ +/* + * repmgr.sql + * + * Copyright (c) Heroku, 2010 + * + */ + CREATE USER repmgr; CREATE SCHEMA repmgr; @@ -5,27 +12,25 @@ CREATE SCHEMA repmgr; * The table repl_nodes keeps information about all machines in * a cluster */ -drop table if exists repl_nodes cascade; CREATE TABLE repl_nodes ( - id integer primary key, - cluster text not null, -- Name to identify the cluster - conninfo text not null + id integer primary key, + cluster text not null, -- Name to identify the cluster + conninfo text not null ); ALTER TABLE repl_nodes OWNER TO repmgr; /* - * Keeps monitor info about every node and their relative "position" + * Keeps monitor info about every node and their relative "position" * to primary */ -drop table if exists repl_monitor cascade; CREATE TABLE repl_monitor ( primary_node INTEGER NOT NULL, standby_node INTEGER NOT NULL, - last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, - last_wal_primary_location TEXT NOT NULL, + last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, + last_wal_primary_location TEXT NOT NULL, last_wal_standby_location TEXT NOT NULL, - replication_lag BIGINT NOT NULL, - apply_lag BIGINT NOT NULL + replication_lag BIGINT NOT NULL, + apply_lag BIGINT NOT NULL ); ALTER TABLE repl_monitor OWNER TO repmgr; @@ -33,21 +38,20 @@ ALTER TABLE repl_monitor OWNER TO repmgr; /* * This view shows the latest monitor info about every node. * Interesting thing to see: - * replication_lag: in bytes (this is how far the latest xlog record + * replication_lag: in bytes (this is how far the latest xlog record * we have received is from master) * apply_lag: in bytes (this is how far the latest xlog record - * we have applied is from the latest record we + * we have applied is from the latest record we * have received) * time_lag: how many seconds are we from being up-to-date with master */ -drop view if exists repl_status; CREATE VIEW repl_status AS WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node ORDER BY last_monitor_time desc) FROM repl_monitor) -SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, - last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, - pg_size_pretty(apply_lag) apply_lag, +SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, + last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, + pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag FROM monitor_info a WHERE row_number = 1; diff --git a/repmgrd.c b/repmgrd.c index d44c8a7..f31c452 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1,5 +1,6 @@ /* * repmgrd.c - Replication manager daemon + * * Copyright (C) 2ndQuadrant, 2010 * * This module connects to the nodes of a replication cluster and monitors @@ -27,28 +28,28 @@ #include #include "repmgr.h" +#include "strutil.h" #include "libpq/pqsignal.h" -char myClusterName[MAXLEN]; - /* Local info */ -int myLocalMode = STANDBY_MODE; -int myLocalId = -1; -PGconn *myLocalConn; +int myLocalMode = STANDBY_MODE; +PGconn *myLocalConn = NULL; /* Primary info */ int primaryId; char primaryConninfo[MAXLEN]; -PGconn *primaryConn; +PGconn *primaryConn = NULL; -char sqlquery[8192]; +char sqlquery[QUERY_STR_LEN]; const char *progname; char *config_file = NULL; bool verbose = false; +// should initialize with {0} to be ANSI complaint ? but this raises error with gcc -Wall +repmgr_config config = {}; static void help(const char *progname); static void checkClusterConfiguration(void); @@ -62,23 +63,23 @@ static unsigned long long int walLocationToBytes(char *wal_location); static void handle_sigint(SIGNAL_ARGS); static void setup_cancel_handler(void); -#define CloseConnections() \ - if (PQisBusy(primaryConn) == 1) \ - CancelQuery(); \ - if (myLocalConn != NULL) \ - PQfinish(myLocalConn); \ - if (primaryConn != NULL) \ - PQfinish(primaryConn); +#define CloseConnections() \ + if (PQisBusy(primaryConn) == 1) \ + CancelQuery(); \ + if (myLocalConn != NULL) \ + PQfinish(myLocalConn); \ + if (primaryConn != NULL) \ + PQfinish(primaryConn); /* * Every 3 seconds, insert monitor info */ -#define MonitorCheck() \ - for (;;) \ - { \ - MonitorExecute(); \ - sleep(3); \ - } +#define MonitorCheck() \ + for (;;) \ + { \ + MonitorExecute(); \ + sleep(3); \ + } int @@ -94,7 +95,6 @@ main(int argc, char **argv) int optindex; int c; - char conninfo[MAXLEN]; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); @@ -125,7 +125,8 @@ main(int argc, char **argv) verbose = true; break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } @@ -134,29 +135,32 @@ main(int argc, char **argv) if (config_file == NULL) { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); + const size_t buf_sz = 3 + sizeof(CONFIG_FILE); + + config_file = malloc(buf_sz); + xsnprintf(config_file, buf_sz, "./%s", CONFIG_FILE); } /* * Read the configuration file: repmgr.conf */ - parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + parse_config(config_file, &config); + if (config.node == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } - myLocalConn = establishDBConnection(conninfo, true); + myLocalConn = establishDBConnection(config.conninfo, true); /* should be v9 or better */ pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); exit(1); } @@ -167,25 +171,32 @@ main(int argc, char **argv) myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { - primaryId = myLocalId; - strcpy(primaryConninfo, conninfo); + primaryId = config.node; + strcpy(primaryConninfo, config.conninfo); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + primaryConn = getMasterConnection(myLocalConn, config.node, + config.cluster_name, &primaryId, + NULL); + if (primaryConn == NULL) exit(1); } checkClusterConfiguration(); - checkNodeConfiguration(conninfo); + checkNodeConfiguration(config.conninfo); if (myLocalMode == STANDBY_MODE) { MonitorCheck(); } + /* Prevent a double-free */ + if (primaryConn == myLocalConn) + myLocalConn = NULL; + /* close the connection to the database and cleanup */ CloseConnections(); @@ -235,11 +246,15 @@ MonitorExecute(void) } if (PQstatus(primaryConn) != CONNECTION_OK) { - fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if " - "another node has been promoted.\n", progname); - for (connection_retries = 0; connection_retries < 6; connection_retries++) + fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ", + progname); + for (connection_retries = 0; connection_retries < 6; + connection_retries++) { - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + primaryConn = getMasterConnection(myLocalConn, config.node, + config.cluster_name, &primaryId, + NULL); + if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ @@ -278,9 +293,10 @@ MonitorExecute(void) CancelQuery(); /* Get local xlog info */ - sprintf(sqlquery, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location()"); + sqlquery_snprintf( + sqlquery, + "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " + "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -297,7 +313,7 @@ MonitorExecute(void) PQclear(res); /* Get primary xlog info */ - sprintf(sqlquery, "SELECT pg_current_xlog_location() "); + sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -318,16 +334,16 @@ MonitorExecute(void) /* * Build the SQL to execute on primary */ - sprintf(sqlquery, - "INSERT INTO repmgr_%s.repl_monitor " - "VALUES(%d, %d, '%s'::timestamp with time zone, " - " '%s', '%s', " - " %lld, %lld)", myClusterName, - primaryId, myLocalId, monitor_standby_timestamp, - last_wal_primary_location, - last_wal_standby_received, - (lsn_primary - lsn_standby_received), - (lsn_standby_received - lsn_standby_applied)); + sqlquery_snprintf(sqlquery, + "INSERT INTO repmgr_%s.repl_monitor " + "VALUES(%d, %d, '%s'::timestamp with time zone, " + " '%s', '%s', " + " %lld, %lld)", config.cluster_name, + primaryId, config.node, monitor_standby_timestamp, + last_wal_primary_location, + last_wal_standby_received, + (lsn_primary - lsn_standby_received), + (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We @@ -335,7 +351,7 @@ MonitorExecute(void) */ if (PQsendQuery(primaryConn, sqlquery) == 0) fprintf(stderr, "Query could not be sent to primary. %s\n", - PQerrorMessage(primaryConn)); + PQerrorMessage(primaryConn)); } @@ -344,9 +360,9 @@ checkClusterConfiguration(void) { PGresult *res; - sprintf(sqlquery, "SELECT oid FROM pg_class " + sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", - myClusterName); + config.cluster_name); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -358,8 +374,10 @@ checkClusterConfiguration(void) } /* - * If there isn't any results then we have not configured a primary node yet - * in repmgr or the connection string is pointing to the wrong database. + * If there isn't any results then we have not configured a primary node + * yet in repmgr or the connection string is pointing to the wrong + * database. + * * XXX if we are the primary, should we try to create the tables needed? */ if (PQntuples(res) == 0) @@ -379,12 +397,10 @@ checkNodeConfiguration(char *conninfo) { PGresult *res; - /* - * Check if we have my node information in repl_nodes - */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE id = %d AND cluster = '%s' ", - myClusterName, myLocalId, myClusterName); + /* Check if we have my node information in repl_nodes */ + sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " + " WHERE id = %d AND cluster = '%s' ", + config.cluster_name, config.node, config.cluster_name); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -403,15 +419,16 @@ checkNodeConfiguration(char *conninfo) if (PQntuples(res) == 0) { PQclear(res); + /* Adding the node */ - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + config.cluster_name, config.node, config.cluster_name, conninfo); if (!PQexec(primaryConn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(primaryConn)); + PQerrorMessage(primaryConn)); PQfinish(myLocalConn); PQfinish(primaryConn); exit(1); @@ -443,10 +460,10 @@ help(const char *progname) printf(_("Usage:\n")); printf(_(" %s [OPTIONS]\n"), progname); printf(_("\nOptions:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); - printf(_(" -f, --config_file=PATH database to connect to\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); + printf(_(" -f, --config_file=PATH database to connect to\n")); printf(_("\n%s monitors a cluster of servers.\n"), progname); } diff --git a/strutil.c b/strutil.c new file mode 100644 index 0000000..6286c31 --- /dev/null +++ b/strutil.c @@ -0,0 +1,72 @@ +/* + * strutil.c + * + * Copyright (c) Heroku, 2010 + * + */ + +#include +#include +#include + +#include "strutil.h" + +static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); + + +static int +xvsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int retval; + + retval = vsnprintf(str, size, format, ap); + + if (retval >= size) + { + fprintf(stderr, "Buffer not large enough to format entire string\n"); + exit(255); + } + + return retval; +} + + +int +xsnprintf(char *str, size_t size, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, size, format, arglist); + va_end(arglist); + + return retval; +} + + +int +sqlquery_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, QUERY_STR_LEN, format, arglist); + va_end(arglist); + + return retval; +} + + +int maxlen_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, MAXLEN, format, arglist); + va_end(arglist); + + return retval; +} diff --git a/strutil.h b/strutil.h new file mode 100644 index 0000000..41d9626 --- /dev/null +++ b/strutil.h @@ -0,0 +1,25 @@ +/* + * strutil.h + * + * Copyright (c) Heroku, 2010 + * + */ + +#ifndef _STRUTIL_H_ +#define _STRUTIL_H_ + +#include + + +#define QUERY_STR_LEN 8192 +#define MAXLEN 1024 +#define MAXLINELENGTH 4096 +#define MAXVERSIONSTR 16 +#define MAXCONNINFO 1024 + + +extern int xsnprintf(char *str, size_t size, const char *format, ...); +extern int sqlquery_snprintf(char *str, const char *format, ...); +extern int maxlen_snprintf(char *str, const char *format, ...); + +#endif /* _STRUTIL_H_ */ diff --git a/uninstall_repmgr.sql b/uninstall_repmgr.sql new file mode 100644 index 0000000..8cb1b4d --- /dev/null +++ b/uninstall_repmgr.sql @@ -0,0 +1,13 @@ +/* + * uninstall_repmgr.sql + * + * Copyright (c) Heroku, 2010 + * + */ + +DROP TABLE IF EXISTS repl_nodes; +DROP TABLE IF EXISTS repl_monitor; +DROP VIEW IF EXISTS repl_status; + +DROP SCHEMA repmgr; +DROP USER repmgr;