From af2edf10a0fe8e75c9378bdaf55c9b86760069b2 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Sun, 5 Dec 2010 23:31:22 -0800 Subject: [PATCH 01/14] Attack of whitespace pedantry pgsql conventions (tabs, four-spaces-wide, etc) applied all around. Also tried to fix some very tiny capitalization errors, auto-fill problems, and some inter-block vertical whitespacing issues. Long strings in repmgr.c were left intact, though. They are rather numerous and are less of a problem than tiny bits of function calls and comments wrapping over a line; the latter kind of problem has been mostly fixed. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- check_dir.c | 88 ++-- config.c | 66 +-- config.h | 3 +- dbutils.c | 174 ++++---- dbutils.h | 10 +- repmgr.c | 1136 +++++++++++++++++++++++++++------------------------ repmgr.sql | 26 +- repmgrd.c | 301 +++++++------- 8 files changed, 947 insertions(+), 857 deletions(-) diff --git a/check_dir.c b/check_dir.c index d63d7c2..7296261 100644 --- a/check_dir.c +++ b/check_dir.c @@ -31,63 +31,63 @@ static int mkdir_p(char *path, mode_t omode); int check_dir(char *dir) { - DIR *chkdir; - struct dirent *file; - int result = 1; - - errno = 0; - - chkdir = opendir(dir); - - if (!chkdir) - return (errno == ENOENT) ? 0 : -1; - - while ((file = readdir(chkdir)) != NULL) - { - if (strcmp(".", file->d_name) == 0 || - strcmp("..", file->d_name) == 0) - { - /* skip this and parent directory */ - continue; - } - else - { - result = 2; /* not empty */ - break; - } - } + DIR *chkdir; + struct dirent *file; + int result = 1; + + errno = 0; + + chkdir = opendir(dir); + + if (!chkdir) + return (errno == ENOENT) ? 0 : -1; + + while ((file = readdir(chkdir)) != NULL) + { + if (strcmp(".", file->d_name) == 0 || + strcmp("..", file->d_name) == 0) + { + /* skip this and parent directory */ + continue; + } + else + { + result = 2; /* not empty */ + break; + } + } #ifdef WIN32 - /* - * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in - * released version - */ - if (GetLastError() == ERROR_NO_MORE_FILES) - errno = 0; + /* + * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in + * released version + */ + if (GetLastError() == ERROR_NO_MORE_FILES) + errno = 0; #endif - closedir(chkdir); + closedir(chkdir); - if (errno != 0) - return -1; /* some kind of I/O error? */ + if (errno != 0) + return -1; /* some kind of I/O error? */ return result; } /* - * Create directory + * Create directory */ bool create_directory(char *dir) { - if (mkdir_p(dir, 0700) == 0) - return true; + if (mkdir_p(dir, 0700) == 0) + return true; - fprintf(stderr, _("Could not create directory \"%s\": %s\n"), - dir, strerror(errno)); + fprintf(stderr, _("Could not create directory \"%s\": %s\n"), + dir, strerror(errno)); - return false; + return false; } bool @@ -114,10 +114,10 @@ mkdir_p(char *path, mode_t omode) { struct stat sb; mode_t numask, - oumask; + oumask; int first, - last, - retval; + last, + retval; char *p; p = path; @@ -212,5 +212,5 @@ is_pg_dir(char *dir) sprintf(path, "%s/PG_VERSION", dir); - return (stat(path, &sb) == 0) ? true : false; + return (stat(path, &sb) == 0) ? true : false; } diff --git a/config.c b/config.c index 4d15f0b..4c4ecdb 100644 --- a/config.c +++ b/config.c @@ -8,13 +8,14 @@ #include "repmgr.h" void -parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo) +parse_config(const char *config_file, char *cluster_name, int *node, + char *conninfo) { char *s, buff[256]; FILE *fp = fopen (config_file, "r"); if (fp == NULL) - return; + return; /* Read next line */ while ((s = fgets (buff, sizeof buff, fp)) != NULL) @@ -22,46 +23,47 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni char name[MAXLEN]; char value[MAXLEN]; - /* Skip blank lines and comments */ - if (buff[0] == '\n' || buff[0] == '#') - continue; + /* Skip blank lines and comments */ + if (buff[0] == '\n' || buff[0] == '#') + continue; - /* Parse name/value pair from line */ + /* Parse name/value pair from line */ parse_line(buff, name, value); - /* Copy into correct entry in parameters struct */ - if (strcmp(name, "cluster") == 0) - strncpy (cluster_name, value, MAXLEN); - else if (strcmp(name, "node") == 0) - *node = atoi(value); - else if (strcmp(name, "conninfo") == 0) - strncpy (conninfo, value, MAXLEN); - else - printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value); - } + /* Copy into correct entry in parameters struct */ + if (strcmp(name, "cluster") == 0) + strncpy (cluster_name, value, MAXLEN); + else if (strcmp(name, "node") == 0) + *node = atoi(value); + else if (strcmp(name, "conninfo") == 0) + strncpy (conninfo, value, MAXLEN); + else + printf("WARNING: %s/%s: Unknown name/value pair!\n", + name, value); + } - /* Close file */ - fclose (fp); + /* Close file */ + fclose (fp); } char * trim (char *s) { - /* Initialize start, end pointers */ - char *s1 = s, *s2 = &s[strlen (s) - 1]; + /* Initialize start, end pointers */ + char *s1 = s, *s2 = &s[strlen (s) - 1]; - /* Trim and delimit right side */ - while ( (isspace (*s2)) && (s2 >= s1) ) - s2--; - *(s2+1) = '\0'; + /* Trim and delimit right side */ + while ( (isspace (*s2)) && (s2 >= s1) ) + s2--; + *(s2+1) = '\0'; - /* Trim left side */ - while ( (isspace (*s1)) && (s1 < s2) ) - s1++; + /* Trim left side */ + while ( (isspace (*s1)) && (s1 < s2) ) + s1++; - /* Copy finished string */ - strcpy (s, s1); - return s; + /* Copy finished string */ + strcpy (s, s1); + return s; } void @@ -86,7 +88,7 @@ parse_line(char *buff, char *name, char *value) i++; /* * Now the value - */ + */ j = 0; for ( ; i < MAXLEN; i++) if (buff[i] == '\'') @@ -96,5 +98,5 @@ parse_line(char *buff, char *name, char *value) else break; value[j] = '\0'; - trim(value); + trim(value); } diff --git a/config.h b/config.h index 139bfd3..ec23fff 100644 --- a/config.h +++ b/config.h @@ -4,6 +4,7 @@ * */ -void parse_config(const char *config_file, char *cluster_name, int *node, char *service); +void parse_config(const char *config_file, char *cluster_name, int *node, + char *service); void parse_line(char *buff, char *name, char *value); char *trim(char *s); diff --git a/dbutils.c b/dbutils.c index 8287e1b..bec62d6 100644 --- a/dbutils.c +++ b/dbutils.c @@ -12,19 +12,21 @@ PGconn * establishDBConnection(const char *conninfo, const bool exit_on_error) { PGconn *conn; - /* Make a connection to the database */ - conn = PQconnectdb(conninfo); - /* Check to see that the backend connection was successfully made */ - if ((PQstatus(conn) != CONNECTION_OK)) - { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + + /* Make a connection to the database */ + conn = PQconnectdb(conninfo); + + /* Check to see that the backend connection was successfully made */ + if ((PQstatus(conn) != CONNECTION_OK)) + { + fprintf(stderr, "Connection to database failed: %s", + PQerrorMessage(conn)); if (exit_on_error) { - PQfinish(conn); + PQfinish(conn); exit(1); } - } + } return conn; } @@ -34,29 +36,30 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) bool is_standby(PGconn *conn) { - PGresult *res; + PGresult *res; bool result; - res = PQexec(conn, "SELECT pg_is_in_recovery()"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); + res = PQexec(conn, "SELECT pg_is_in_recovery()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); exit(1); - } + } if (strcmp(PQgetvalue(res, 0, 0), "f") == 0) result = false; else - result = true; + result = true; PQclear(res); return result; } -/* +/* * If postgreSQL version is 9 or superior returns the major version * if 8 or inferior returns an empty string */ @@ -69,20 +72,26 @@ pg_version(PGconn *conn) int major_version1; char *major_version2; - res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) " - "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { + res = PQexec(conn, + "WITH pg_version(ver) AS " + "(SELECT split_part(version(), ' ', 2)) " + "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) " + "FROM pg_version"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); - PQclear(res); + PQclear(res); PQfinish(conn); exit(1); - } - major_version1 = atoi(PQgetvalue(res, 0, 0)); - major_version2 = PQgetvalue(res, 0, 1); - PQclear(res); + } + + major_version1 = atoi(PQgetvalue(res, 0, 0)); + major_version2 = PQgetvalue(res, 0, 1); + PQclear(res); major_version = malloc(10); + if (major_version1 >= 9) { /* form a major version string */ @@ -96,27 +105,28 @@ pg_version(PGconn *conn) bool -guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value) +guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value) { PGresult *res; char sqlquery[8192]; sprintf(sqlquery, "SELECT true FROM pg_settings " - " WHERE name = '%s' AND setting %s '%s'", - parameter, op, value); + " WHERE name = '%s' AND setting %s '%s'", + parameter, op, value); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); - PQclear(res); + PQclear(res); PQfinish(conn); exit(1); - } + } if (PQntuples(res) == 0) { PQclear(res); - return false; + return false; } PQclear(res); @@ -131,72 +141,78 @@ get_cluster_size(PGconn *conn) const char *size; char sqlquery[8192]; - sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " - " FROM pg_database "); + sprintf(sqlquery, + "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " + " FROM pg_database "); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); - PQclear(res); + PQclear(res); PQfinish(conn); exit(1); - } - size = PQgetvalue(res, 0, 0); + } + size = PQgetvalue(res, 0, 0); PQclear(res); return size; } /* - * get a connection to master by reading repl_nodes, creating a connection + * get a connection to master by reading repl_nodes, creating a connection * to each node (one at a time) and finding if it is a master or a standby */ PGconn * -getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) +getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id) { PGconn *master_conn = NULL; - PGresult *res1; - PGresult *res2; - char sqlquery[8192]; + PGresult *res1; + PGresult *res2; + char sqlquery[8192]; char master_conninfo[8192]; int i; /* find all nodes belonging to this cluster */ sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE cluster = '%s' and id <> %d", - cluster, cluster, id); - - res1 = PQexec(standby_conn, sqlquery); - if (PQresultStatus(res1) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn)); - PQclear(res1); + " WHERE cluster = '%s' and id <> %d", + cluster, cluster, id); + + res1 = PQexec(standby_conn, sqlquery); + if (PQresultStatus(res1) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get nodes info: %s\n", + PQerrorMessage(standby_conn)); + PQclear(res1); PQfinish(standby_conn); exit(1); - } + } for (i = 0; i < PQntuples(res1); i++) - { + { /* initialize with the values of the current node being processed */ *master_id = atoi(PQgetvalue(res1, i, 0)); strcpy(master_conninfo, PQgetvalue(res1, i, 2)); master_conn = establishDBConnection(master_conninfo, false); + if (PQstatus(master_conn) != CONNECTION_OK) continue; - /* - * I can't use the is_standby() function here because on error that - * function closes the connection i pass and exit, but i still need to close - * standby_conn + /* + * I can't use the is_standby() function here because on error that + * function closes the connection i pass and exit, but i still need to + * close standby_conn */ - res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); - if (PQresultStatus(res2) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn)); - PQclear(res2); + res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); + + if (PQresultStatus(res2) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get recovery state from this node: %s\n", + PQerrorMessage(master_conn)); + PQclear(res2); PQfinish(master_conn); continue; - } + } /* if false, this is the master */ if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0) @@ -212,17 +228,17 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) PQfinish(master_conn); *master_id = -1; } - } + } /* If we finish this loop without finding a master then - * we doesn't have the info or the master has failed (or we - * reached max_connections or superuser_reserved_connections, - * anything else i'm missing?), - * Probably we will need to check the error to know if we need - * to start failover procedure or just fix some situation on the - * standby. - */ + * we doesn't have the info or the master has failed (or we + * reached max_connections or superuser_reserved_connections, + * anything else I'm missing?). + * + * Probably we will need to check the error to know if we need + * to start failover procedure or just fix some situation on the + * standby. + */ PQclear(res1); return NULL; } - diff --git a/dbutils.h b/dbutils.h index a1f96a3..e8383e5 100644 --- a/dbutils.h +++ b/dbutils.h @@ -5,8 +5,10 @@ */ PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error); -bool is_standby(PGconn *conn); +bool is_standby(PGconn *conn); char *pg_version(PGconn *conn); -bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); -const char *get_cluster_size(PGconn *conn); -PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id); +bool guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value); +const char *get_cluster_size(PGconn *conn); +PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id); diff --git a/repmgr.c b/repmgr.c index 18874c3..950c676 100644 --- a/repmgr.c +++ b/repmgr.c @@ -5,9 +5,9 @@ * Command interpreter for the repmgr * This module is a command-line utility to easily setup a cluster of * hot standby servers for an HA environment - * + * * Commands implemented are. - * MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, + * MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, * STANDBY PROMOTE */ @@ -23,18 +23,19 @@ #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" -#define NO_ACTION 0 /* Not a real action, just to initialize */ -#define MASTER_REGISTER 1 +#define NO_ACTION 0 /* Not a real action, just to initialize */ +#define MASTER_REGISTER 1 #define STANDBY_REGISTER 2 -#define STANDBY_CLONE 3 -#define STANDBY_PROMOTE 4 -#define STANDBY_FOLLOW 5 +#define STANDBY_CLONE 3 +#define STANDBY_PROMOTE 4 +#define STANDBY_FOLLOW 5 -#define QUERY_STR_LEN 8192 +#define QUERY_STR_LEN 8192 static void help(const char *progname); static bool create_recovery_file(const char *data_dir); -static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); +static int copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static void do_master_register(void); @@ -54,7 +55,7 @@ char *username = NULL; char *dest_dir = NULL; char *config_file = NULL; char *remote_user = NULL; -char *wal_keep_segments = NULL; +char *wal_keep_segments = NULL; bool verbose = false; bool force = false; @@ -103,7 +104,8 @@ main(int argc, char **argv) } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, + &optindex)) != -1) { switch (c) { @@ -114,20 +116,20 @@ main(int argc, char **argv) host = optarg; break; case 'p': - masterport = optarg; - break; + masterport = optarg; + break; case 'U': username = optarg; break; case 'D': dest_dir = optarg; - break; + break; case 'f': config_file = optarg; - break; + break; case 'R': remote_user = optarg; - break; + break; case 'w': wal_keep_segments = optarg; break; @@ -138,69 +140,73 @@ main(int argc, char **argv) verbose = true; break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } - /* + /* * Now we need to obtain the action, this comes in one of these forms: - * MASTER REGISTER | - * STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} - * - * the node part is optional, if we receive it then we shouldn't - * have received a -h option - */ - if (optind < argc) - { - server_mode = argv[optind++]; - if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0) + * MASTER REGISTER | + * STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} + * + * the node part is optional, if we receive it then we shouldn't + * have received a -h option + */ + if (optind < argc) + { + server_mode = argv[optind++]; + if (strcasecmp(server_mode, "STANDBY") != 0 && + strcasecmp(server_mode, "MASTER") != 0) { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); - } + } } - if (optind < argc) + if (optind < argc) { - server_cmd = argv[optind++]; - if (strcasecmp(server_cmd, "REGISTER") == 0) + server_cmd = argv[optind++]; + if (strcasecmp(server_cmd, "REGISTER") == 0) { - /* - * we don't use this info in any other place so i will - * just execute the compare again instead of having an - * additional variable to hold a value that we will use - * no more - */ + /* + * we don't use this info in any other place so i will + * just execute the compare again instead of having an + * additional variable to hold a value that we will use + * no more + */ if (strcasecmp(server_mode, "MASTER") == 0) action = MASTER_REGISTER; - else if (strcasecmp(server_mode, "STANDBY") == 0) + else if (strcasecmp(server_mode, "STANDBY") == 0) action = STANDBY_REGISTER; } - else if (strcasecmp(server_cmd, "CLONE") == 0) + else if (strcasecmp(server_cmd, "CLONE") == 0) action = STANDBY_CLONE; - else if (strcasecmp(server_cmd, "PROMOTE") == 0) + else if (strcasecmp(server_cmd, "PROMOTE") == 0) action = STANDBY_PROMOTE; - else if (strcasecmp(server_cmd, "FOLLOW") == 0) + else if (strcasecmp(server_cmd, "FOLLOW") == 0) action = STANDBY_FOLLOW; else { - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); - } + } } /* For some actions we still can receive a last argument */ if (action == STANDBY_CLONE) { - if (optind < argc) + if (optind < argc) { if (host != NULL) { fprintf(stderr, _("Conflicting parameters you can't use -h while providing a node separately. Try \"%s --help\" for more information.\n"), progname); exit(1); - } - host = argv[optind++]; + } + host = argv[optind++]; } } @@ -211,7 +217,8 @@ main(int argc, char **argv) default: fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"), progname, argv[optind + 1]); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } @@ -230,24 +237,24 @@ main(int argc, char **argv) strcpy(wal_keep_segments, "5000"); } - if (dbname == NULL) - { - if (getenv("PGDATABASE")) - dbname = getenv("PGDATABASE"); - else if (getenv("PGUSER")) - dbname = getenv("PGUSER"); - else - dbname = "postgres"; - } - - keywords[2] = "user"; - values[2] = username; - keywords[3] = "dbname"; - values[3] = dbname; - keywords[4] = "application_name"; - values[4] = (char *) progname; - keywords[5] = NULL; - values[5] = NULL; + if (dbname == NULL) + { + if (getenv("PGDATABASE")) + dbname = getenv("PGDATABASE"); + else if (getenv("PGUSER")) + dbname = getenv("PGUSER"); + else + dbname = "postgres"; + } + + keywords[2] = "user"; + values[2] = username; + keywords[3] = "dbname"; + values[3] = dbname; + keywords[4] = "application_name"; + values[4] = (char *) progname; + keywords[5] = NULL; + values[5] = NULL; switch (action) { @@ -267,47 +274,49 @@ main(int argc, char **argv) do_standby_follow(); break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } - - return 0; + + return 0; } -static void +static void do_master_register(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char myClusterName[MAXLEN]; + int myLocalId = -1; + char conninfo[MAXLEN]; bool schema_exists = false; const char *master_version = NULL; /* * Read the configuration file: repmgr.conf - */ + */ parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(conninfo, true); /* master should be v9 or better */ master_version = pg_version(conn); if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -320,95 +329,101 @@ do_master_register(void) } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", + myClusterName); res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; + fprintf(stderr, "Can't get info about schemas: %s\n", + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; } - + if (PQntuples(res) > 0) /* schema exists */ { if (!force) /* and we are not forcing so error */ { - fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName); - PQclear(res); - PQfinish(conn); + fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName); + PQclear(res); + PQfinish(conn); return; } schema_exists = true; } PQclear(res); - + if (!schema_exists) { /* ok, create the schema */ sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); - if (!PQexec(conn, sqlquery)) + if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); + myClusterName, PQerrorMessage(conn)); + PQfinish(conn); return; } - + /* ... the tables */ - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " - " id integer primary key, " - " cluster text not null, " - " conninfo text not null)", myClusterName); - if (!PQexec(conn, sqlquery)) + sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " + " id integer primary key, " + " cluster text not null, " + " conninfo text not null)", myClusterName); + if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_nodes: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); + fprintf(stderr, + "Cannot create the table repmgr_%s.repl_nodes: %s\n", + myClusterName, PQerrorMessage(conn)); + PQfinish(conn); return; } - + sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " - " primary_node INTEGER NOT NULL, " - " standby_node INTEGER NOT NULL, " - " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " - " last_wal_primary_location TEXT NOT NULL, " - " last_wal_standby_location TEXT NOT NULL, " - " replication_lag BIGINT NOT NULL, " - " apply_lag BIGINT NOT NULL) ", myClusterName); - if (!PQexec(conn, sqlquery)) + " primary_node INTEGER NOT NULL, " + " standby_node INTEGER NOT NULL, " + " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " + " last_wal_primary_location TEXT NOT NULL, " + " last_wal_standby_location TEXT NOT NULL, " + " replication_lag BIGINT NOT NULL, " + " apply_lag BIGINT NOT NULL) ", + myClusterName); + if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the table repmgr_%s.repl_monitor: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); + fprintf(stderr, + "Cannot create the table repmgr_%s.repl_monitor: %s\n", + myClusterName, PQerrorMessage(conn)); + PQfinish(conn); return; } - /* and the view */ + /* and the view */ sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " - " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " - " ORDER BY last_monitor_time desc) " - " FROM repmgr_%s.repl_monitor) " - " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " - " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " - " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " - " FROM monitor_info a " - " WHERE row_number = 1", myClusterName, myClusterName); - if (!PQexec(conn, sqlquery)) + " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " + " ORDER BY last_monitor_time desc) " + " FROM repmgr_%s.repl_monitor) " + " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " + " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " + " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " + " FROM monitor_info a " + " WHERE row_number = 1", myClusterName, myClusterName); + if (!PQexec(conn, sqlquery)) { - fprintf(stderr, "Cannot create the view repmgr_%s.repl_status: %s\n", - myClusterName, PQerrorMessage(conn)); - PQfinish(conn); + fprintf(stderr, + "Cannot create the view repmgr_%s.repl_status: %s\n", + myClusterName, PQerrorMessage(conn)); + PQfinish(conn); return; } } else { PGconn *master_conn; - int id; + int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); + master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); if (master_conn != NULL) { PQfinish(master_conn); @@ -420,72 +435,73 @@ do_master_register(void) /* Now register the master */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " + " WHERE id = %d", + myClusterName, myLocalId); - if (!PQexec(conn, sqlquery)) - { - fprintf(stderr, "Cannot delete node details, %s\n", - PQerrorMessage(conn)); - PQfinish(conn); + if (!PQexec(conn, sqlquery)) + { + fprintf(stderr, "Cannot delete node details, %s\n", + PQerrorMessage(conn)); + PQfinish(conn); return; - } - } - - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); - - if (!PQexec(conn, sqlquery)) - { - fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(conn)); - PQfinish(conn); + } + } + + sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); + + if (!PQexec(conn, sqlquery)) + { + fprintf(stderr, "Cannot insert node details, %s\n", + PQerrorMessage(conn)); + PQfinish(conn); return; - } + } PQfinish(conn); return; } -static void +static void do_standby_register(void) { - PGconn *conn; + PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char myClusterName[MAXLEN]; + int myLocalId = -1; + char conninfo[MAXLEN]; const char *master_version = NULL; const char *standby_version = NULL; /* * Read the configuration file: repmgr.conf - */ + */ parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(conninfo, true); /* should be v9 or better */ standby_version = pg_version(conn); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -498,27 +514,30 @@ do_standby_register(void) } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); + sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", + myClusterName); res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; + fprintf(stderr, "Can't get info about tablespaces: %s\n", + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; } - + if (PQntuples(res) == 0) /* schema doesn't exists */ { - fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName); - PQclear(res); - PQfinish(conn); + fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName); + PQclear(res); + PQfinish(conn); return; } PQclear(res); - + /* check if there is a master in this cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + master_conn = getMasterConnection(conn, myLocalId, myClusterName, + &master_id); if (!master_conn) return; @@ -528,7 +547,8 @@ do_standby_register(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -537,8 +557,8 @@ do_standby_register(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), - progname, master_version, standby_version); + fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), + progname, master_version, standby_version); return; } @@ -546,32 +566,32 @@ do_standby_register(void) /* Now register the standby */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " + " WHERE id = %d", + myClusterName, myLocalId); - if (!PQexec(master_conn, sqlquery)) - { - fprintf(stderr, "Cannot delete node details, %s\n", - PQerrorMessage(master_conn)); - PQfinish(master_conn); - PQfinish(conn); + if (!PQexec(master_conn, sqlquery)) + { + fprintf(stderr, "Cannot delete node details, %s\n", + PQerrorMessage(master_conn)); + PQfinish(master_conn); + PQfinish(conn); return; - } - } - - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); - - if (!PQexec(master_conn, sqlquery)) - { - fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(master_conn)); - PQfinish(master_conn); - PQfinish(conn); + } + } + + sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); + + if (!PQexec(master_conn, sqlquery)) + { + fprintf(stderr, "Cannot insert node details, %s\n", + PQerrorMessage(master_conn)); + PQfinish(master_conn); + PQfinish(conn); return; - } + } PQfinish(master_conn); PQfinish(conn); @@ -579,12 +599,12 @@ do_standby_register(void) } -static void +static void do_standby_clone(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; int r = 0; int i; @@ -597,8 +617,8 @@ do_standby_clone(void) char master_control_file[MAXLEN]; char local_control_file[MAXLEN]; - const char *first_wal_segment = NULL; - const char *last_wal_segment = NULL; + const char *first_wal_segment = NULL; + const char *last_wal_segment = NULL; const char *master_version = NULL; @@ -610,48 +630,48 @@ do_standby_clone(void) } /* Check this directory could be used as a PGDATA dir */ - switch (check_dir(dest_dir)) - { - case 0: - /* dest_dir not there, must create it */ + switch (check_dir(dest_dir)) + { + case 0: + /* dest_dir not there, must create it */ if (verbose) - printf(_("creating directory %s ... "), dest_dir); - fflush(stdout); + printf(_("creating directory %s ... "), dest_dir); + fflush(stdout); - if (!create_directory(dest_dir)) + if (!create_directory(dest_dir)) { - fprintf(stderr, _("%s: couldn't create directory %s ... "), + fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); return; } - break; - case 1: - /* Present but empty, fix permissions and use it */ + break; + case 1: + /* Present but empty, fix permissions and use it */ if (verbose) - printf(_("fixing permissions on existing directory %s ... "), - dest_dir); - fflush(stdout); + printf(_("fixing permissions on existing directory %s ... "), + dest_dir); + fflush(stdout); if (!set_directory_permissions(dest_dir)) - { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); + { + fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), + progname, dest_dir, strerror(errno)); return; - } - break; - case 2: - /* Present and not empty */ - fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, dest_dir); - - pg_dir = is_pg_dir(dest_dir); - if (pg_dir && !force) + } + break; + case 2: + /* Present and not empty */ + fprintf(stderr, + _("%s: directory \"%s\" exists but is not empty\n"), + progname, dest_dir); + + pg_dir = is_pg_dir(dest_dir); + if (pg_dir && !force) { fprintf(stderr, _("\nThis looks like a PostgreSQL directroy.\n" - "If you are sure you want to clone here, " - "please check there is no PostgreSQL server " - "running and use the --force option\n")); + "If you are sure you want to clone here, " + "please check there is no PostgreSQL server " + "running and use the --force option\n")); return; } else if (pg_dir && force) @@ -659,35 +679,36 @@ do_standby_clone(void) /* Let it continue */ break; } - else + else return; - default: - /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), - progname, dest_dir, strerror(errno)); - } - - /* Connection parameters for master only */ - keywords[0] = "host"; - values[0] = host; - keywords[1] = "port"; - values[1] = masterport; + default: + /* Trouble accessing directory */ + fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), + progname, dest_dir, strerror(errno)); + } + + /* Connection parameters for master only */ + keywords[0] = "host"; + values[0] = host; + keywords[1] = "port"; + values[1] = masterport; /* We need to connect to check configuration and start a backup */ - conn = PQconnectdbParams(keywords, values, true); - if (!conn) - { - fprintf(stderr, _("%s: could not connect to master\n"), - progname); - return; - } + conn = PQconnectdbParams(keywords, values, true); + if (!conn) + { + fprintf(stderr, _("%s: could not connect to master\n"), + progname); + return; + } /* primary should be v9 or better */ master_version = pg_version(conn); if (strcmp(master_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -703,19 +724,19 @@ do_standby_clone(void) if (!guc_setted(conn, "wal_level", "=", "hot_standby")) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); + fprintf(stderr, _("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); return; } if (!guc_setted(conn, "wal_keep_segments", ">=", wal_keep_segments)) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), wal_keep_segments, progname); + fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), wal_keep_segments, progname); return; } if (!guc_setted(conn, "archive_mode", "=", "on")) { PQfinish(conn); - fprintf(stderr, _("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); + fprintf(stderr, _("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); return; } @@ -724,13 +745,13 @@ do_standby_clone(void) /* Check if the tablespace locations exists and that we can write to them */ sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; + fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; } for (i = 0; i < PQntuples(res); i++) { @@ -738,74 +759,77 @@ do_standby_clone(void) strcpy(tblspc_dir, PQgetvalue(res, i, 0)); /* Check this directory could be used as a PGDATA dir */ - switch (check_dir(tblspc_dir)) - { - case 0: - /* tblspc_dir not there, must create it */ + switch (check_dir(tblspc_dir)) + { + case 0: + /* tblspc_dir not there, must create it */ if (verbose) - printf(_("creating directory \"%s\"... "), tblspc_dir); - fflush(stdout); - - if (!create_directory(tblspc_dir)) + printf(_("creating directory \"%s\"... "), tblspc_dir); + fflush(stdout); + + if (!create_directory(tblspc_dir)) { - fprintf(stderr, _("%s: couldn't create directory \"%s\"... "), + fprintf(stderr, + _("%s: couldn't create directory \"%s\"... "), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } - break; - case 1: - /* Present but empty, fix permissions and use it */ + break; + case 1: + /* Present but empty, fix permissions and use it */ if (verbose) - printf(_("fixing permissions on existing directory \"%s\"... "), - tblspc_dir); - fflush(stdout); - - if (!set_directory_permissions(tblspc_dir)) - { - fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), - progname, tblspc_dir, strerror(errno)); + printf(_("fixing permissions on existing directory \"%s\"... "), + tblspc_dir); + fflush(stdout); + + if (!set_directory_permissions(tblspc_dir)) + { + fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), + progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; - } - break; - case 2: - /* Present and not empty */ - if (!force) - { + } + break; + case 2: + /* Present and not empty */ + if (!force) + { + fprintf( + stderr, + _("%s: directory \"%s\" exists but is not empty\n"), + progname, tblspc_dir); + PQclear(res); + PQfinish(conn); + return; + } + default: + /* Trouble accessing directory */ fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, tblspc_dir); + _("%s: could not access directory \"%s\": %s\n"), + progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); - return; - } - default: - /* Trouble accessing directory */ - fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), - progname, tblspc_dir, strerror(errno)); - PQclear(res); - PQfinish(conn); - return; + return; } } - + fprintf(stderr, "Starting backup...\n"); - + /* Get the data directory full path and the configuration files location */ sprintf(sqlquery, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; - } + " FROM pg_settings " + " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; + } for (i = 0; i < PQntuples(res); i++) { if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0) @@ -819,146 +843,162 @@ do_standby_clone(void) else fprintf(stderr, _("uknown parameter: %s"), PQgetvalue(res, i, 0)); } - PQclear(res); + PQclear(res); - /* - * inform the master we will start a backup and get the first XLog filename + /* + * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files - */ + */ sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; - } + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; + } first_wal_segment = PQgetvalue(res, 0, 0); PQclear(res); /* - * 1) first move global/pg_control + * 1) first move global/pg_control * - * 2) then move data_directory ommiting the files we have already moved and pg_xlog - * content + * 2) then move data_directory ommiting the files we have already moved and + * pg_xlog content * - * 3) finally We need to backup configuration files (that could be on other directories, debian - * like systems likes to do that), so look at config_file, hba_file and ident_file but we - * can omit external_pid_file ;) + * 3) finally We need to backup configuration files (that could be on other + * directories, debian like systems likes to do that), so look at + * config_file, hba_file and ident_file but we can omit + * external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ - sprintf(master_control_file, "%s/global/pg_control", master_data_directory); + sprintf(master_control_file, "%s/global/pg_control", + master_data_directory); sprintf(local_control_file, "%s/global", dest_dir); if (!create_directory(local_control_file)) - { - fprintf(stderr, _("%s: couldn't create directory %s ... "), - progname, dest_dir); + { + fprintf(stderr, _("%s: couldn't create directory %s ... "), + progname, dest_dir); goto stop_backup; } - r = copy_remote_files(host, remote_user, master_control_file, local_control_file, false); + r = copy_remote_files(host, remote_user, master_control_file, + local_control_file, false); if (r != 0) goto stop_backup; - r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, true); + r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, + true); if (r != 0) goto stop_backup; - /* - * Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate - * rsync option but besides we could someday make all these rsync happen concurrently - */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); - PQclear(res); + /* + * Copy tablespace locations, i'm doing this separately because i couldn't + * find and appropiate rsync option but besides we could someday make all + * these rsync happen concurrently + */ + sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get info about tablespaces: %s\n", + PQerrorMessage(conn)); + PQclear(res); goto stop_backup; - } - for (i = 0; i < PQntuples(res); i++) + } + for (i = 0; i < PQntuples(res); i++) { - r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), PQgetvalue(res, i, 0), true); + r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), + PQgetvalue(res, i, 0), true); if (r != 0) goto stop_backup; } - r = copy_remote_files(host, remote_user, master_config_file, dest_dir, false); + r = copy_remote_files(host, remote_user, master_config_file, dest_dir, + false); if (r != 0) goto stop_backup; - r = copy_remote_files(host, remote_user, master_hba_file, dest_dir, false); + r = copy_remote_files(host, remote_user, master_hba_file, dest_dir, false); if (r != 0) goto stop_backup; - r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, false); + r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, + false); if (r != 0) goto stop_backup; stop_backup: - /* inform the master that we have finished the backup */ - conn = PQconnectdbParams(keywords, values, true); - if (!conn) - { - fprintf(stderr, _("%s: could not connect to master\n"), - progname); - return; - } + /* inform the master that we have finished the backup */ + conn = PQconnectdbParams(keywords, values, true); + if (!conn) + { + fprintf(stderr, _("%s: could not connect to master\n"), + progname); + return; + } - fprintf(stderr, "Finishing backup...\n"); + fprintf(stderr, "Finishing backup...\n"); sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; - } + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; + } last_wal_segment = PQgetvalue(res, 0, 0); - PQclear(res); - PQfinish(conn); + PQclear(res); + PQfinish(conn); /* Now, if the rsync failed then exit */ if (r != 0) return; if (verbose) - printf(_("%s requires primary to keep WAL files %s until at least %s\n"), - progname, first_wal_segment, last_wal_segment); + printf( + _("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); - /* we need to create the pg_xlog sub directory too, i'm reusing a variable here */ + /* + * We need to create the pg_xlog sub directory too, I'm reusing a variable + * here. + */ sprintf(local_control_file, "%s/pg_xlog", dest_dir); if (!create_directory(local_control_file)) - { - fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), - progname, dest_dir); + { + fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), + progname, dest_dir); } /* Finally, write the recovery.conf file */ create_recovery_file(dest_dir); - /* We don't start the service because we still may want to move the directory */ + /* + * We don't start the service because we still may want to move the + * directory + */ return; } -static void +static void do_standby_promote(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; + char script[QUERY_STR_LEN]; - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char myClusterName[MAXLEN]; + int myLocalId = -1; + char conninfo[MAXLEN]; PGconn *old_master_conn; int old_master_id; @@ -972,36 +1012,39 @@ do_standby_promote(void) /* * Read the configuration file: repmgr.conf - */ + */ parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(conninfo, true); /* we need v9 or better */ standby_version = pg_version(conn); if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); return; } /* Check we are in a standby node */ if (!is_standby(conn)) { - fprintf(stderr, "repmgr: The command should be executed in a standby node\n"); + fprintf(stderr, + "repmgr: The command should be executed in a standby node\n"); return; } /* we also need to check if there isn't any master already */ - old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, &old_master_id); + old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, + &old_master_id); if (old_master_conn != NULL) { PQfinish(old_master_conn); @@ -1011,21 +1054,22 @@ do_standby_promote(void) if (verbose) printf(_("\n%s: Promoting standby...\n"), progname); - + /* Get the data directory full path and the last subdirectory */ sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; - } + " FROM pg_settings WHERE name = 'data_directory'"); + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get info about data directory: %s\n", + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; + } strcpy(data_dir, PQgetvalue(res, 0, 0)); - PQclear(res); - PQfinish(conn); + PQclear(res); + PQfinish(conn); sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); @@ -1034,39 +1078,41 @@ do_standby_promote(void) /* We assume the pg_ctl script is in the PATH */ sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); - if (r != 0) - { - fprintf(stderr, "Can't restart service\n"); + if (r != 0) + { + fprintf(stderr, "Can't restart service\n"); return; - } + } /* reconnect to check we got promoted */ - /* - * XXX i'm removing this because it gives an annoying message saying couldn't connect - * but is just the server starting up -* conn = establishDBConnection(conninfo, true); -* if (is_standby(conn)) -* fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); -* else -* fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); -* PQfinish(conn); - */ + + /* + * XXX i'm removing this because it gives an annoying message saying + * couldn't connect but is just the server starting up + * + * conn = establishDBConnection(conninfo, true); + * if (is_standby(conn)) + * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); + * else + * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); + * PQfinish(conn); + */ return; } -static void +static void do_standby_follow(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; + char script[QUERY_STR_LEN]; - char myClusterName[MAXLEN]; - int myLocalId = -1; - char conninfo[MAXLEN]; + char myClusterName[MAXLEN]; + int myLocalId = -1; + char conninfo[MAXLEN]; PGconn *master_conn; int master_id; @@ -1077,19 +1123,17 @@ do_standby_follow(void) const char *master_version = NULL; const char *standby_version = NULL; - /* - * Read the configuration file: repmgr.conf - */ + /* Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } /* We need to connect to check configuration */ - conn = establishDBConnection(conninfo, true); + conn = establishDBConnection(conninfo, true); /* Check we are in a standby node */ if (!is_standby(conn)) @@ -1103,12 +1147,13 @@ do_standby_follow(void) if (strcmp(standby_version, "") == 0) { PQfinish(conn); - fprintf(stderr, _("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* we also need to check if there is any master in the cluster */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); + master_conn = getMasterConnection(conn, myLocalId, myClusterName, + &master_id); if (master_conn == NULL) { PQfinish(conn); @@ -1120,7 +1165,8 @@ do_standby_follow(void) if (is_standby(master_conn)) { PQfinish(conn); - fprintf(stderr, "%s: The node to follow should be a master\n", progname); + fprintf(stderr, "%s: The node to follow should be a master\n", + progname); return; } @@ -1130,7 +1176,8 @@ do_standby_follow(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), + progname); return; } @@ -1139,14 +1186,14 @@ do_standby_follow(void) { PQfinish(conn); PQfinish(master_conn); - fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), - progname, master_version, standby_version); + fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), + progname, master_version, standby_version); return; } - /* - * set the host and masterport variables with the master ones - * before closing the connection because we will need them to + /* + * set the host and masterport variables with the master ones + * before closing the connection because we will need them to * recreate the recovery.conf file */ host = malloc(20); @@ -1157,72 +1204,74 @@ do_standby_follow(void) if (verbose) printf(_("\n%s: Changing standby's master...\n"), progname); - + /* Get the data directory full path */ sprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); - res = PQexec(conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - return; - } + " FROM pg_settings WHERE name = 'data_directory'"); + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "Can't get info about data directory: %s\n", + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + return; + } strcpy(data_dir, PQgetvalue(res, 0, 0)); - PQclear(res); - PQfinish(conn); + PQclear(res); + PQfinish(conn); /* write the recovery.conf file */ if (!create_recovery_file(data_dir)) - return; + return; /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); - if (r != 0) - { - fprintf(stderr, "Can't restart service\n"); + if (r != 0) + { + fprintf(stderr, "Can't restart service\n"); return; - } + } return; } -static void +static void help(const char *progname) { - printf(_("\n%s: Replicator manager \n"), progname); - printf(_("Usage:\n")); - printf(_(" %s [OPTIONS] master {register}\n"), progname); - printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname); - printf(_("\nGeneral options:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); + printf(_("\n%s: Replicator manager \n"), progname); + printf(_("Usage:\n")); + printf(_(" %s [OPTIONS] master {register}\n"), progname); + printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), + progname); + printf(_("\nGeneral options:\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); printf(_("\nConnection options:\n")); - printf(_(" -d, --dbname=DBNAME database to connect to\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); - printf(_(" -p, --port=PORT database server port\n")); - printf(_(" -U, --username=USERNAME database user name to connect as\n")); + printf(_(" -d, --dbname=DBNAME database to connect to\n")); + printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); + printf(_(" -p, --port=PORT database server port\n")); + printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_("\nConfiguration options:\n")); - printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); - printf(_(" -f, --config_file=PATH path to the configuration file\n")); - printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); - printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); - printf(_(" -F, --force force potentially dangerous operations to happen\n")); - + printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); + printf(_(" -f, --config_file=PATH path to the configuration file\n")); + printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); + printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); + printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_("\n%s performs some tasks like clone a node, promote it "), progname); - printf(_("or making follow another node and then exits.\n")); - printf(_("COMMANDS:\n")); - printf(_(" master register - registers the master in a cluster\n")); - printf(_(" standby register - registers a standby in a cluster\n")); - printf(_(" standby clone [node] - allows creation of a new standby\n")); - printf(_(" standby promote - allows manual promotion of a specific standby into a ")); - printf(_("new master in the event of a failover\n")); - printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); + printf(_("or making follow another node and then exits.\n")); + printf(_("COMMANDS:\n")); + printf(_(" master register - registers the master in a cluster\n")); + printf(_(" standby register - registers a standby in a cluster\n")); + printf(_(" standby clone [node] - allows creation of a new standby\n")); + printf(_(" standby promote - allows manual promotion of a specific standby into a ")); + printf(_("new master in the event of a failover\n")); + printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); } @@ -1235,45 +1284,47 @@ create_recovery_file(const char *data_dir) sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); - recovery_file = fopen(recovery_file_path, "w"); - if (recovery_file == NULL) - { - fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n"); + recovery_file = fopen(recovery_file_path, "w"); + if (recovery_file == NULL) + { + fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n"); return false; - } + } sprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) - { - fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); - fclose(recovery_file); + { + fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); + fclose(recovery_file); return false; - } + } - sprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, ((masterport==NULL) ? "5432" : masterport)); + sprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, + ((masterport==NULL) ? "5432" : masterport)); if (fputs(line, recovery_file) == EOF) - { - fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); - fclose(recovery_file); + { + fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); + fclose(recovery_file); return false; - } + } - /*FreeFile(recovery_file);*/ - fclose(recovery_file); + /*FreeFile(recovery_file);*/ + fclose(recovery_file); return true; } static int -copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) +copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory) { char script[QUERY_STR_LEN]; char options[QUERY_STR_LEN]; char host_string[QUERY_STR_LEN]; - int r; + int r; - sprintf(options, "--archive --checksum --compress --progress --rsh=ssh"); + sprintf(options, "--archive --checksum --compress --progress --rsh=ssh"); if (force) strcat(options, " --delete"); @@ -1288,24 +1339,26 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_ if (is_directory) { - strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); - sprintf(script, "rsync %s %s:%s/* %s", - options, host_string, remote_path, local_path); + strcat(options, + " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); + sprintf(script, "rsync %s %s:%s/* %s", + options, host_string, remote_path, local_path); } else { - sprintf(script, "rsync %s %s:%s %s/.", - options, host_string, remote_path, local_path); + sprintf(script, "rsync %s %s:%s %s/.", + options, host_string, remote_path, local_path); } if (verbose) - printf("rsync command line: '%s'\n",script); + printf("rsync command line: '%s'\n",script); r = system(script); - - if (r != 0) - fprintf(stderr, _("Can't rsync from remote file or directory (%s:%s)\n"), - host_string, remote_path); + + if (r != 0) + fprintf(stderr, + _("Can't rsync from remote file or directory (%s:%s)\n"), + host_string, remote_path); return r; } @@ -1327,16 +1380,18 @@ check_parameters_for_action(const int action) * all other parameters are at least useless and could be * confusing so reject them */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a MASTER REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for MASTER REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1346,16 +1401,18 @@ check_parameters_for_action(const int action) * we don't need connection parameters to the master * because we can detect the master in repl_nodes */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY REGISTER command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY REGISTER command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1366,16 +1423,18 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the promote action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY PROMOTE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY PROMOTE command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; @@ -1386,28 +1445,31 @@ check_parameters_for_action(const int action) * because we will try to detect the master in repl_nodes * if we can't find it then the follow action will be cancelled */ - if ((host != NULL) || (masterport != NULL) || (username != NULL) || - (dbname != NULL)) + if ((host != NULL) || (masterport != NULL) || + (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY FOLLOW command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; - } + } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY FOLLOW command"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; case STANDBY_CLONE: /* * To clone a master into a standby we need connection parameters - * repmgr.conf is useless because we don't have a server running + * repmgr.conf is useless because we don't have a server running * in the standby */ if (config_file != NULL) { fprintf(stderr, "\nYou need to use connection parameters to the master when issuing a STANDBY CLONE command."); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); ok = false; } break; diff --git a/repmgr.sql b/repmgr.sql index f28e6d8..42ccfb8 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -7,25 +7,25 @@ CREATE SCHEMA repmgr; */ drop table if exists repl_nodes cascade; CREATE TABLE repl_nodes ( - id integer primary key, - cluster text not null, -- Name to identify the cluster - conninfo text not null + id integer primary key, + cluster text not null, -- Name to identify the cluster + conninfo text not null ); ALTER TABLE repl_nodes OWNER TO repmgr; /* - * Keeps monitor info about every node and their relative "position" + * Keeps monitor info about every node and their relative "position" * to primary */ drop table if exists repl_monitor cascade; CREATE TABLE repl_monitor ( primary_node INTEGER NOT NULL, standby_node INTEGER NOT NULL, - last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, - last_wal_primary_location TEXT NOT NULL, + last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, + last_wal_primary_location TEXT NOT NULL, last_wal_standby_location TEXT NOT NULL, - replication_lag BIGINT NOT NULL, - apply_lag BIGINT NOT NULL + replication_lag BIGINT NOT NULL, + apply_lag BIGINT NOT NULL ); ALTER TABLE repl_monitor OWNER TO repmgr; @@ -33,10 +33,10 @@ ALTER TABLE repl_monitor OWNER TO repmgr; /* * This view shows the latest monitor info about every node. * Interesting thing to see: - * replication_lag: in bytes (this is how far the latest xlog record + * replication_lag: in bytes (this is how far the latest xlog record * we have received is from master) * apply_lag: in bytes (this is how far the latest xlog record - * we have applied is from the latest record we + * we have applied is from the latest record we * have received) * time_lag: how many seconds are we from being up-to-date with master */ @@ -45,9 +45,9 @@ CREATE VIEW repl_status AS WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node ORDER BY last_monitor_time desc) FROM repl_monitor) -SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, - last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, - pg_size_pretty(apply_lag) apply_lag, +SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, + last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, + pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag FROM monitor_info a WHERE row_number = 1; diff --git a/repmgrd.c b/repmgrd.c index 42e9fe7..a5e00ce 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -4,7 +4,7 @@ * * Replication manager daemon * This module connects to the nodes of a replication cluster and monitors - * how far are they from master + * how far are they from master */ #include @@ -17,11 +17,11 @@ #include "libpq/pqsignal.h" -char myClusterName[MAXLEN]; +char myClusterName[MAXLEN]; /* Local info */ -int myLocalMode = STANDBY_MODE; -int myLocalId = -1; +int myLocalMode = STANDBY_MODE; +int myLocalId = -1; PGconn *myLocalConn; /* Primary info */ @@ -49,23 +49,23 @@ static unsigned long long int walLocationToBytes(char *wal_location); static void handle_sigint(SIGNAL_ARGS); static void setup_cancel_handler(void); -#define CloseConnections() \ - if (PQisBusy(primaryConn) == 1) \ - CancelQuery(); \ - if (myLocalConn != NULL) \ - PQfinish(myLocalConn); \ - if (primaryConn != NULL) \ - PQfinish(primaryConn); +#define CloseConnections() \ + if (PQisBusy(primaryConn) == 1) \ + CancelQuery(); \ + if (myLocalConn != NULL) \ + PQfinish(myLocalConn); \ + if (primaryConn != NULL) \ + PQfinish(primaryConn); /* * Every 3 seconds, insert monitor info */ -#define MonitorCheck() \ - for (;;) \ - { \ - MonitorExecute(); \ - sleep(3); \ - } +#define MonitorCheck() \ + for (;;) \ + { \ + MonitorExecute(); \ + sleep(3); \ + } int @@ -80,7 +80,7 @@ main(int argc, char **argv) int optindex; int c; - char conninfo[MAXLEN]; + char conninfo[MAXLEN]; const char *standby_version = NULL; progname = get_progname(argv[0]); @@ -111,13 +111,14 @@ main(int argc, char **argv) verbose = true; break; default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); exit(1); } } setup_cancel_handler(); - + if (config_file == NULL) { config_file = malloc(5 + sizeof(CONFIG_FILE)); @@ -126,30 +127,31 @@ main(int argc, char **argv) /* * Read the configuration file: repmgr.conf - */ + */ parse_config(config_file, myClusterName, &myLocalId, conninfo); - if (myLocalId == -1) + if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); + "Check the configuration file.\n"); exit(1); } - myLocalConn = establishDBConnection(conninfo, true); + myLocalConn = establishDBConnection(conninfo, true); /* should be v9 or better */ standby_version = pg_version(myLocalConn); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); - fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); + fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), + progname); exit(1); } - /* - * Set my server mode, establish a connection to primary + /* + * Set my server mode, establish a connection to primary * and start monitor - */ + */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { @@ -160,7 +162,8 @@ main(int argc, char **argv) else { /* I need the id of the primary as well as a connection to it */ - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + primaryConn = getMasterConnection(myLocalConn, myLocalId, + myClusterName, &primaryId); if (primaryConn == NULL) exit(1); } @@ -169,13 +172,13 @@ main(int argc, char **argv) checkNodeConfiguration(conninfo); if (myLocalMode == STANDBY_MODE) { - MonitorCheck(); + MonitorCheck(); } - /* close the connection to the database and cleanup */ - CloseConnections(); + /* close the connection to the database and cleanup */ + CloseConnections(); - return 0; + return 0; } @@ -187,7 +190,7 @@ main(int argc, char **argv) static void MonitorExecute(void) { - PGresult *res; + PGresult *res; char monitor_standby_timestamp[MAXLEN]; char last_wal_primary_location[MAXLEN]; char last_wal_standby_received[MAXLEN]; @@ -199,9 +202,9 @@ MonitorExecute(void) int connection_retries; - /* - * Check if the master is still available, if after 5 minutes of retries - * we cannot reconnect, try to get a new master. + /* + * Check if the master is still available, if after 5 minutes of retries + * we cannot reconnect, try to get a new master. */ for (connection_retries = 0; connection_retries < 15; connection_retries++) { @@ -212,7 +215,7 @@ MonitorExecute(void) sleep(20); PQreset(primaryConn); - } + } else { fprintf(stderr, "\n%s: Connection to master has been restored, continue monitoring.\n", progname); @@ -221,11 +224,14 @@ MonitorExecute(void) } if (PQstatus(primaryConn) != CONNECTION_OK) { - fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ", progname); + fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ", + progname); fprintf(stderr, "%s: another node has been promoted.\n", progname); - for (connection_retries = 0; connection_retries < 6; connection_retries++) + for (connection_retries = 0; connection_retries < 6; + connection_retries++) { - primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); + primaryConn = getMasterConnection(myLocalConn, myLocalId, + myClusterName, &primaryId); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ @@ -248,53 +254,53 @@ MonitorExecute(void) /* Check if we still are a standby, we could have been promoted */ if (!is_standby(myLocalConn)) - { - fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n", + { + fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n", progname); CloseConnections(); exit(1); } - /* + /* * first check if there is a command being executed, * and if that is the case, cancel the query so i can - * insert the current record - */ + * insert the current record + */ if (PQisBusy(primaryConn) == 1) CancelQuery(); /* Get local xlog info */ - sprintf(sqlquery, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location()"); - - res = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); - PQclear(res); + sprintf(sqlquery, + "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " + "pg_last_xlog_replay_location()"); + + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + PQclear(res); /* if there is any error just let it be and retry in next loop */ - return; - } + return; + } - strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0)); - strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1)); - strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2)); - PQclear(res); + strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0)); + strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1)); + strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2)); + PQclear(res); /* Get primary xlog info */ - sprintf(sqlquery, "SELECT pg_current_xlog_location() "); + sprintf(sqlquery, "SELECT pg_current_xlog_location() "); - res = PQexec(primaryConn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn)); - PQclear(res); - return; - } + res = PQexec(primaryConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn)); + PQclear(res); + return; + } - strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0)); - PQclear(res); + strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0)); + PQclear(res); /* Calculate the lag */ lsn_primary = walLocationToBytes(last_wal_primary_location); @@ -305,15 +311,15 @@ MonitorExecute(void) * Build the SQL to execute on primary */ sprintf(sqlquery, - "INSERT INTO repmgr_%s.repl_monitor " - "VALUES(%d, %d, '%s'::timestamp with time zone, " - " '%s', '%s', " - " %lld, %lld)", myClusterName, - primaryId, myLocalId, monitor_standby_timestamp, - last_wal_primary_location, - last_wal_standby_received, - (lsn_primary - lsn_standby_received), - (lsn_standby_received - lsn_standby_applied)); + "INSERT INTO repmgr_%s.repl_monitor " + "VALUES(%d, %d, '%s'::timestamp with time zone, " + " '%s', '%s', " + " %lld, %lld)", myClusterName, + primaryId, myLocalId, monitor_standby_timestamp, + last_wal_primary_location, + last_wal_standby_received, + (lsn_primary - lsn_standby_received), + (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We @@ -321,39 +327,41 @@ MonitorExecute(void) */ if (PQsendQuery(primaryConn, sqlquery) == 0) fprintf(stderr, "Query could not be sent to primary. %s\n", - PQerrorMessage(primaryConn)); + PQerrorMessage(primaryConn)); } static void checkClusterConfiguration(void) { - PGresult *res; + PGresult *res; sprintf(sqlquery, "SELECT oid FROM pg_class " - " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", - myClusterName); - res = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); - PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); + " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", + myClusterName); + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + PQclear(res); + PQfinish(myLocalConn); + PQfinish(primaryConn); exit(1); - } + } /* - * If there isn't any results then we have not configured a primary node yet - * in repmgr or the connection string is pointing to the wrong database. + * If there isn't any results then we have not configured a primary node + * yet in repmgr or the connection string is pointing to the wrong + * database. + * * XXX if we are the primary, should we try to create the tables needed? */ if (PQntuples(res) == 0) { - fprintf(stderr, "The replication cluster is not configured\n"); - PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); + fprintf(stderr, "The replication cluster is not configured\n"); + PQclear(res); + PQfinish(myLocalConn); + PQfinish(primaryConn); exit(1); } PQclear(res); @@ -363,41 +371,40 @@ checkClusterConfiguration(void) static void checkNodeConfiguration(char *conninfo) { - PGresult *res; + PGresult *res; - /* - * Check if we have my node information in repl_nodes - */ + /* Check if we have my node information in repl_nodes */ sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE id = %d AND cluster = '%s' ", - myClusterName, myLocalId, myClusterName); - - res = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); - PQclear(res); - PQfinish(myLocalConn); - PQfinish(primaryConn); + " WHERE id = %d AND cluster = '%s' ", + myClusterName, myLocalId, myClusterName); + + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); + PQclear(res); + PQfinish(myLocalConn); + PQfinish(primaryConn); exit(1); - } + } /* * If there isn't any results then we have not configured this node yet - * in repmgr, if that is the case we will insert the node to the cluster + * in repmgr, if that is the case we will insert the node to the cluster */ if (PQntuples(res) == 0) { - PQclear(res); + PQclear(res); + /* Adding the node */ sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); - if (!PQexec(primaryConn, sqlquery)) + if (!PQexec(primaryConn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", - PQerrorMessage(primaryConn)); + PQerrorMessage(primaryConn)); PQfinish(myLocalConn); PQfinish(primaryConn); exit(1); @@ -407,33 +414,33 @@ checkNodeConfiguration(char *conninfo) } -static unsigned long long int +static unsigned long long int walLocationToBytes(char *wal_location) { - unsigned int xlogid; - unsigned int xrecoff; - - if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2) - { - fprintf(stderr, "wrong log location format: %s\n", wal_location); - return 0; - } - return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff); + unsigned int xlogid; + unsigned int xrecoff; + + if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2) + { + fprintf(stderr, "wrong log location format: %s\n", wal_location); + return 0; + } + return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff); } -static void +static void help(const char *progname) { - printf(_("\n%s: Replicator manager daemon \n"), progname); - printf(_("Usage:\n")); - printf(_(" %s [OPTIONS]\n"), progname); - printf(_("\nOptions:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); - printf(_(" -f, --config_file=PATH database to connect to\n")); - printf(_("\n%s monitors a cluster of servers.\n"), progname); + printf(_("\n%s: Replicator manager daemon \n"), progname); + printf(_("Usage:\n")); + printf(_(" %s [OPTIONS]\n"), progname); + printf(_("\nOptions:\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); + printf(_(" -f, --config_file=PATH database to connect to\n")); + printf(_("\n%s monitors a cluster of servers.\n"), progname); } @@ -442,13 +449,13 @@ help(const char *progname) static void handle_sigint(SIGNAL_ARGS) { - CloseConnections(); + CloseConnections(); } static void setup_cancel_handler(void) { - pqsignal(SIGINT, handle_sigint); + pqsignal(SIGINT, handle_sigint); } #endif @@ -456,13 +463,13 @@ setup_cancel_handler(void) static void CancelQuery(void) { - char errbuf[256]; - PGcancel *pgcancel; + char errbuf[256]; + PGcancel *pgcancel; - pgcancel = PQgetCancel(primaryConn); + pgcancel = PQgetCancel(primaryConn); - if (!pgcancel || PQcancel(pgcancel, errbuf, 256) == 0) - fprintf(stderr, "Can't stop current query: %s", errbuf); + if (!pgcancel || PQcancel(pgcancel, errbuf, 256) == 0) + fprintf(stderr, "Can't stop current query: %s", errbuf); - PQfreeCancel(pgcancel); + PQfreeCancel(pgcancel); } From 778303bb6eaad1e125ad2dd3385b6cd7bf255573 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Tue, 7 Dec 2010 21:30:20 -0800 Subject: [PATCH 02/14] Split up install/uninstall actions more like a standard contrib Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- repmgr.sql | 10 +++++++--- uninstall_repmgr.sql | 13 +++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 uninstall_repmgr.sql diff --git a/repmgr.sql b/repmgr.sql index 42ccfb8..4a30081 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -1,3 +1,10 @@ +/* + * repmgr.sql + * + * Copyright (c) Heroku, 2010 + * + */ + CREATE USER repmgr; CREATE SCHEMA repmgr; @@ -5,7 +12,6 @@ CREATE SCHEMA repmgr; * The table repl_nodes keeps information about all machines in * a cluster */ -drop table if exists repl_nodes cascade; CREATE TABLE repl_nodes ( id integer primary key, cluster text not null, -- Name to identify the cluster @@ -17,7 +23,6 @@ ALTER TABLE repl_nodes OWNER TO repmgr; * Keeps monitor info about every node and their relative "position" * to primary */ -drop table if exists repl_monitor cascade; CREATE TABLE repl_monitor ( primary_node INTEGER NOT NULL, standby_node INTEGER NOT NULL, @@ -40,7 +45,6 @@ ALTER TABLE repl_monitor OWNER TO repmgr; * have received) * time_lag: how many seconds are we from being up-to-date with master */ -drop view if exists repl_status; CREATE VIEW repl_status AS WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node ORDER BY last_monitor_time desc) diff --git a/uninstall_repmgr.sql b/uninstall_repmgr.sql new file mode 100644 index 0000000..8cb1b4d --- /dev/null +++ b/uninstall_repmgr.sql @@ -0,0 +1,13 @@ +/* + * uninstall_repmgr.sql + * + * Copyright (c) Heroku, 2010 + * + */ + +DROP TABLE IF EXISTS repl_nodes; +DROP TABLE IF EXISTS repl_monitor; +DROP VIEW IF EXISTS repl_status; + +DROP SCHEMA repmgr; +DROP USER repmgr; From 846c0b92e883879e5c44051a7d2fc78a0ffa04d6 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Tue, 7 Dec 2010 21:30:44 -0800 Subject: [PATCH 03/14] Install install/uninstall SQL also. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index 655467a..9c5c975 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,8 @@ # # Makefile +# # Copyright (c) 2ndQuadrant, 2010 +# Copyright (c) Heroku, 2010 repmgrd_OBJS = dbutils.o config.o repmgrd.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o @@ -26,10 +28,19 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now +# is overriding pgxs install. install: $(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' +ifneq (,$(DATA)$(DATA_built)) + @for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \ + echo "$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'"; \ + $(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'; \ + done +endif + clean: rm -f *.o rm -f repmgrd From 916c0492fbb67aa682f7e63bb2150b77948d2ad7 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Tue, 7 Dec 2010 21:31:49 -0800 Subject: [PATCH 04/14] sprintf to snprintf conversion Move out string operations to another file, and introduce a frontend to snprintf for various situations. This change is important for catching and eliminating sprintf overflows, which are as of now many times silently corrupting memory. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- Makefile | 6 ++-- check_dir.c | 12 +++++-- config.c | 4 +++ dbutils.c | 28 ++++++++++------- repmgr.c | 90 ++++++++++++++++++++++++++++------------------------- repmgr.h | 5 +-- repmgrd.c | 21 +++++++------ strutil.c | 72 ++++++++++++++++++++++++++++++++++++++++++ strutil.h | 19 +++++++++++ 9 files changed, 187 insertions(+), 70 deletions(-) create mode 100644 strutil.c create mode 100644 strutil.h diff --git a/Makefile b/Makefile index 9c5c975..dccbf2c 100644 --- a/Makefile +++ b/Makefile @@ -4,8 +4,10 @@ # Copyright (c) 2ndQuadrant, 2010 # Copyright (c) Heroku, 2010 -repmgrd_OBJS = dbutils.o config.o repmgrd.o -repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o +repmgrd_OBJS = dbutils.o config.o repmgrd.o strutil.o +repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o strutil.o + +DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) diff --git a/check_dir.c b/check_dir.c index 7296261..e0220b7 100644 --- a/check_dir.c +++ b/check_dir.c @@ -1,6 +1,8 @@ /* * check_dir.c + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * * Directories management functions */ @@ -12,9 +14,12 @@ #include #include +/* NB: postgres_fe must be included BEFORE check_dir */ #include "postgres_fe.h" #include "check_dir.h" +#include "strutil.h" + static int mkdir_p(char *path, mode_t omode); @@ -207,10 +212,11 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *dir) { - char path[8192]; - struct stat sb; + const size_t buf_sz = 8192; + char path[buf_sz]; + struct stat sb; - sprintf(path, "%s/PG_VERSION", dir); + xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); return (stat(path, &sb) == 0) ? true : false; } diff --git a/config.c b/config.c index 4c4ecdb..6ae49e5 100644 --- a/config.c +++ b/config.c @@ -1,12 +1,16 @@ /* * config.c + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * * Functions to parse the config file */ #include "repmgr.h" +#include "strutil.h" + void parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo) diff --git a/dbutils.c b/dbutils.c index bec62d6..e3f5577 100644 --- a/dbutils.c +++ b/dbutils.c @@ -7,6 +7,7 @@ */ #include "repmgr.h" +#include "strutil.h" PGconn * establishDBConnection(const char *conninfo, const bool exit_on_error) @@ -66,11 +67,13 @@ is_standby(PGconn *conn) char * pg_version(PGconn *conn) { - PGresult *res; - char *major_version; + PGresult *res; + + const size_t major_version_sz = 10; + char *major_version; - int major_version1; - char *major_version2; + int major_version1; + char *major_version2; res = PQexec(conn, "WITH pg_version(ver) AS " @@ -90,12 +93,13 @@ pg_version(PGconn *conn) major_version2 = PQgetvalue(res, 0, 1); PQclear(res); - major_version = malloc(10); + major_version = malloc(major_version_sz); if (major_version1 >= 9) { /* form a major version string */ - sprintf(major_version, "%d.%s", major_version1, major_version2); + xsnprintf(major_version, major_version_sz, "%d.%s", + major_version1, major_version2); } else strcpy(major_version, ""); @@ -109,9 +113,9 @@ guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value) { PGresult *res; - char sqlquery[8192]; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT true FROM pg_settings " + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " " WHERE name = '%s' AND setting %s '%s'", parameter, op, value); @@ -139,9 +143,9 @@ get_cluster_size(PGconn *conn) { PGresult *res; const char *size; - char sqlquery[8192]; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, + sqlquery_snprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " " FROM pg_database "); @@ -169,12 +173,12 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, PGconn *master_conn = NULL; PGresult *res1; PGresult *res2; - char sqlquery[8192]; + char sqlquery[QUERY_STR_LEN]; char master_conninfo[8192]; int i; /* find all nodes belonging to this cluster */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " " WHERE cluster = '%s' and id <> %d", cluster, cluster, id); diff --git a/repmgr.c b/repmgr.c index 950c676..61d1e85 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1,6 +1,8 @@ /* * repmgr.c + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * * Command interpreter for the repmgr * This module is a command-line utility to easily setup a cluster of @@ -19,6 +21,7 @@ #include #include "check_dir.h" +#include "strutil.h" #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" @@ -30,7 +33,6 @@ #define STANDBY_PROMOTE 4 #define STANDBY_FOLLOW 5 -#define QUERY_STR_LEN 8192 static void help(const char *progname); static bool create_recovery_file(const char *data_dir); @@ -227,8 +229,10 @@ main(int argc, char **argv) if (config_file == NULL) { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); + const int buf_sz = 3 + sizeof(CONFIG_FILE); + + config_file = malloc(buf_sz); + xsnprintf(config_file, buf_sz, "./%s", CONFIG_FILE); } if (wal_keep_segments == NULL) @@ -329,7 +333,8 @@ do_master_register(void) } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -357,7 +362,7 @@ do_master_register(void) if (!schema_exists) { /* ok, create the schema */ - sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); + sqlquery_snprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n", @@ -367,7 +372,7 @@ do_master_register(void) } /* ... the tables */ - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " + sqlquery_snprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " " id integer primary key, " " cluster text not null, " " conninfo text not null)", myClusterName); @@ -380,7 +385,7 @@ do_master_register(void) return; } - sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " + sqlquery_snprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " " primary_node INTEGER NOT NULL, " " standby_node INTEGER NOT NULL, " " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " @@ -399,7 +404,7 @@ do_master_register(void) } /* and the view */ - sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " + sqlquery_snprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " " ORDER BY last_monitor_time desc) " " FROM repmgr_%s.repl_monitor) " @@ -435,7 +440,7 @@ do_master_register(void) /* Now register the master */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " " WHERE id = %d", myClusterName, myLocalId); @@ -448,7 +453,7 @@ do_master_register(void) } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", myClusterName, myLocalId, myClusterName, conninfo); @@ -514,7 +519,7 @@ do_standby_register(void) } /* Check if there is a schema for this cluster */ - sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", + sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -566,7 +571,7 @@ do_standby_register(void) /* Now register the standby */ if (force) { - sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " " WHERE id = %d", myClusterName, myLocalId); @@ -580,7 +585,7 @@ do_standby_register(void) } } - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", myClusterName, myLocalId, myClusterName, conninfo); @@ -744,7 +749,7 @@ do_standby_clone(void) printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); /* Check if the tablespace locations exists and that we can write to them */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -819,7 +824,7 @@ do_standby_clone(void) fprintf(stderr, "Starting backup...\n"); /* Get the data directory full path and the configuration files location */ - sprintf(sqlquery, "SELECT name, setting " + sqlquery_snprintf(sqlquery, "SELECT name, setting " " FROM pg_settings " " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); res = PQexec(conn, sqlquery); @@ -849,7 +854,7 @@ do_standby_clone(void) * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); + sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -876,9 +881,9 @@ do_standby_clone(void) */ /* need to create the global sub directory */ - sprintf(master_control_file, "%s/global/pg_control", - master_data_directory); - sprintf(local_control_file, "%s/global", dest_dir); + maxlen_snprintf(master_control_file, "%s/global/pg_control", + master_data_directory); + maxlen_snprintf(local_control_file, "%s/global", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), @@ -901,7 +906,7 @@ do_standby_clone(void) * find and appropiate rsync option but besides we could someday make all * these rsync happen concurrently */ - sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -944,7 +949,7 @@ do_standby_clone(void) fprintf(stderr, "Finishing backup...\n"); - sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); + sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -970,7 +975,7 @@ do_standby_clone(void) * We need to create the pg_xlog sub directory too, I'm reusing a variable * here. */ - sprintf(local_control_file, "%s/pg_xlog", dest_dir); + maxlen_snprintf(local_control_file, "%s/pg_xlog", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), @@ -994,7 +999,7 @@ do_standby_promote(void) PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; + char script[MAXLEN]; char myClusterName[MAXLEN]; int myLocalId = -1; @@ -1056,7 +1061,7 @@ do_standby_promote(void) printf(_("\n%s: Promoting standby...\n"), progname); /* Get the data directory full path and the last subdirectory */ - sprintf(sqlquery, "SELECT setting " + sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -1071,12 +1076,12 @@ do_standby_promote(void) PQclear(res); PQfinish(conn); - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); - sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1108,7 +1113,7 @@ do_standby_follow(void) PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; + char script[MAXLEN]; char myClusterName[MAXLEN]; int myLocalId = -1; @@ -1206,7 +1211,7 @@ do_standby_follow(void) printf(_("\n%s: Changing standby's master...\n"), progname); /* Get the data directory full path */ - sprintf(sqlquery, "SELECT setting " + sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -1227,7 +1232,7 @@ do_standby_follow(void) /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1282,7 +1287,7 @@ create_recovery_file(const char *data_dir) char recovery_file_path[MAXLEN]; char line[MAXLEN]; - sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); recovery_file = fopen(recovery_file_path, "w"); if (recovery_file == NULL) @@ -1291,7 +1296,7 @@ create_recovery_file(const char *data_dir) return false; } - sprintf(line, "standby_mode = 'on'\n"); + maxlen_snprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); @@ -1299,7 +1304,7 @@ create_recovery_file(const char *data_dir) return false; } - sprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, + maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, ((masterport==NULL) ? "5432" : masterport)); if (fputs(line, recovery_file) == EOF) { @@ -1319,35 +1324,36 @@ static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) { - char script[QUERY_STR_LEN]; - char options[QUERY_STR_LEN]; - char host_string[QUERY_STR_LEN]; + char script[MAXLEN]; + char options[MAXLEN]; + char host_string[MAXLEN]; int r; - sprintf(options, "--archive --checksum --compress --progress --rsh=ssh"); + maxlen_snprintf(options, + "--archive --checksum --compress --progress --rsh=ssh"); if (force) strcat(options, " --delete"); if (remote_user == NULL) { - sprintf(host_string,"%s",host); + maxlen_snprintf(host_string, "%s", host); } else { - sprintf(host_string,"%s@%s",remote_user,host); + maxlen_snprintf(host_string,"%s@%s",remote_user,host); } if (is_directory) { strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); - sprintf(script, "rsync %s %s:%s/* %s", + maxlen_snprintf(script, "rsync %s %s:%s/* %s", options, host_string, remote_path, local_path); } else { - sprintf(script, "rsync %s %s:%s %s/.", - options, host_string, remote_path, local_path); + maxlen_snprintf(script, "rsync %s %s:%s %s/.", + options, host_string, remote_path, local_path); } if (verbose) diff --git a/repmgr.h b/repmgr.h index 4768f96..93e7ed4 100644 --- a/repmgr.h +++ b/repmgr.h @@ -1,6 +1,8 @@ /* - * dbutils.h + * repmgr.h + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * */ @@ -18,7 +20,6 @@ #define PRIMARY_MODE 0 #define STANDBY_MODE 1 -#define MAXLEN 80 #define CONFIG_FILE "repmgr.conf" #endif diff --git a/repmgrd.c b/repmgrd.c index a5e00ce..d4f4113 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -14,6 +14,7 @@ #include #include "repmgr.h" +#include "strutil.h" #include "libpq/pqsignal.h" @@ -29,7 +30,7 @@ int primaryId; char primaryConninfo[MAXLEN]; PGconn *primaryConn; -char sqlquery[8192]; +char sqlquery[QUERY_STR_LEN]; const char *progname; @@ -121,8 +122,10 @@ main(int argc, char **argv) if (config_file == NULL) { - config_file = malloc(5 + sizeof(CONFIG_FILE)); - sprintf(config_file, "./%s", CONFIG_FILE); + const size_t buf_sz = 3 + sizeof(CONFIG_FILE); + + config_file = malloc(buf_sz); + xsnprintf(config_file, buf_sz, "./%s", CONFIG_FILE); } /* @@ -270,7 +273,7 @@ MonitorExecute(void) CancelQuery(); /* Get local xlog info */ - sprintf(sqlquery, + sqlquery_snprintf(sqlquery, "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "pg_last_xlog_replay_location()"); @@ -289,7 +292,7 @@ MonitorExecute(void) PQclear(res); /* Get primary xlog info */ - sprintf(sqlquery, "SELECT pg_current_xlog_location() "); + sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -310,7 +313,7 @@ MonitorExecute(void) /* * Build the SQL to execute on primary */ - sprintf(sqlquery, + sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " @@ -336,7 +339,7 @@ checkClusterConfiguration(void) { PGresult *res; - sprintf(sqlquery, "SELECT oid FROM pg_class " + sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", myClusterName); res = PQexec(myLocalConn, sqlquery); @@ -374,7 +377,7 @@ checkNodeConfiguration(char *conninfo) PGresult *res; /* Check if we have my node information in repl_nodes */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " " WHERE id = %d AND cluster = '%s' ", myClusterName, myLocalId, myClusterName); @@ -397,7 +400,7 @@ checkNodeConfiguration(char *conninfo) PQclear(res); /* Adding the node */ - sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", myClusterName, myLocalId, myClusterName, conninfo); diff --git a/strutil.c b/strutil.c new file mode 100644 index 0000000..6286c31 --- /dev/null +++ b/strutil.c @@ -0,0 +1,72 @@ +/* + * strutil.c + * + * Copyright (c) Heroku, 2010 + * + */ + +#include +#include +#include + +#include "strutil.h" + +static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); + + +static int +xvsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int retval; + + retval = vsnprintf(str, size, format, ap); + + if (retval >= size) + { + fprintf(stderr, "Buffer not large enough to format entire string\n"); + exit(255); + } + + return retval; +} + + +int +xsnprintf(char *str, size_t size, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, size, format, arglist); + va_end(arglist); + + return retval; +} + + +int +sqlquery_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, QUERY_STR_LEN, format, arglist); + va_end(arglist); + + return retval; +} + + +int maxlen_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, MAXLEN, format, arglist); + va_end(arglist); + + return retval; +} diff --git a/strutil.h b/strutil.h new file mode 100644 index 0000000..c2f5d55 --- /dev/null +++ b/strutil.h @@ -0,0 +1,19 @@ +/* + * strutil.h + * + * Copyright (c) Heroku, 2010 + * + */ + +#ifndef _STRUTIL_H_ +#define _STRUTIL_H_ + +#define QUERY_STR_LEN 8192 +#define MAXLEN 80 + + +extern int xsnprintf(char *str, size_t size, const char *format, ...); +extern int sqlquery_snprintf(char *str, const char *format, ...); +extern int maxlen_snprintf(char *str, const char *format, ...); + +#endif /* _STRUTIL_H_ */ From 84b69b3bd4b879330284eca179a8d1ca8fdeb943 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Tue, 7 Dec 2010 21:37:56 -0800 Subject: [PATCH 05/14] Whitespace to adjust for longer snprintf identifier This is done in a separate patch to try and reduce the sound and fury of the patch that actually did the conversion from sprintf to snprintf-alikes. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- config.c | 2 +- dbutils.c | 15 +++++---- repmgr.c | 97 ++++++++++++++++++++++++++++++++----------------------- repmgrd.c | 39 +++++++++++----------- 4 files changed, 85 insertions(+), 68 deletions(-) diff --git a/config.c b/config.c index 6ae49e5..2aac8c1 100644 --- a/config.c +++ b/config.c @@ -43,7 +43,7 @@ parse_config(const char *config_file, char *cluster_name, int *node, strncpy (conninfo, value, MAXLEN); else printf("WARNING: %s/%s: Unknown name/value pair!\n", - name, value); + name, value); } /* Close file */ diff --git a/dbutils.c b/dbutils.c index e3f5577..9a771df 100644 --- a/dbutils.c +++ b/dbutils.c @@ -116,8 +116,8 @@ guc_setted(PGconn *conn, const char *parameter, const char *op, char sqlquery[QUERY_STR_LEN]; sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " - " WHERE name = '%s' AND setting %s '%s'", - parameter, op, value); + " WHERE name = '%s' AND setting %s '%s'", + parameter, op, value); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -145,9 +145,10 @@ get_cluster_size(PGconn *conn) const char *size; char sqlquery[QUERY_STR_LEN]; - sqlquery_snprintf(sqlquery, - "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " - " FROM pg_database "); + sqlquery_snprintf( + sqlquery, + "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " + " FROM pg_database "); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -179,8 +180,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, /* find all nodes belonging to this cluster */ sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE cluster = '%s' and id <> %d", - cluster, cluster, id); + " WHERE cluster = '%s' and id <> %d", + cluster, cluster, id); res1 = PQexec(standby_conn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) diff --git a/repmgr.c b/repmgr.c index 61d1e85..d9d9600 100644 --- a/repmgr.c +++ b/repmgr.c @@ -335,7 +335,7 @@ do_master_register(void) /* Check if there is a schema for this cluster */ sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", - myClusterName); + myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -373,9 +373,9 @@ do_master_register(void) /* ... the tables */ sqlquery_snprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " - " id integer primary key, " - " cluster text not null, " - " conninfo text not null)", myClusterName); + " id integer primary key, " + " cluster text not null, " + " conninfo text not null)", myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, @@ -386,14 +386,14 @@ do_master_register(void) } sqlquery_snprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " - " primary_node INTEGER NOT NULL, " - " standby_node INTEGER NOT NULL, " - " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " - " last_wal_primary_location TEXT NOT NULL, " - " last_wal_standby_location TEXT NOT NULL, " - " replication_lag BIGINT NOT NULL, " - " apply_lag BIGINT NOT NULL) ", - myClusterName); + " primary_node INTEGER NOT NULL, " + " standby_node INTEGER NOT NULL, " + " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " + " last_wal_primary_location TEXT NOT NULL, " + " last_wal_standby_location TEXT NOT NULL, " + " replication_lag BIGINT NOT NULL, " + " apply_lag BIGINT NOT NULL) ", + myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, @@ -405,14 +405,14 @@ do_master_register(void) /* and the view */ sqlquery_snprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " - " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " - " ORDER BY last_monitor_time desc) " - " FROM repmgr_%s.repl_monitor) " - " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " - " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " - " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " - " FROM monitor_info a " - " WHERE row_number = 1", myClusterName, myClusterName); + " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " + " ORDER BY last_monitor_time desc) " + " FROM repmgr_%s.repl_monitor) " + " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " + " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " + " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " + " FROM monitor_info a " + " WHERE row_number = 1", myClusterName, myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, @@ -441,8 +441,8 @@ do_master_register(void) if (force) { sqlquery_snprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + " WHERE id = %d", + myClusterName, myLocalId); if (!PQexec(conn, sqlquery)) { @@ -454,8 +454,8 @@ do_master_register(void) } sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); if (!PQexec(conn, sqlquery)) { @@ -519,8 +519,9 @@ do_standby_register(void) } /* Check if there is a schema for this cluster */ - sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", - myClusterName); + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", + myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -572,8 +573,8 @@ do_standby_register(void) if (force) { sqlquery_snprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " - " WHERE id = %d", - myClusterName, myLocalId); + " WHERE id = %d", + myClusterName, myLocalId); if (!PQexec(master_conn, sqlquery)) { @@ -586,8 +587,8 @@ do_standby_register(void) } sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); if (!PQexec(master_conn, sqlquery)) { @@ -748,8 +749,13 @@ do_standby_clone(void) if (verbose) printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); - /* Check if the tablespace locations exists and that we can write to them */ - sqlquery_snprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + /* + * Check if the tablespace locations exists and that we can write to them. + */ + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + "WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -824,9 +830,12 @@ do_standby_clone(void) fprintf(stderr, "Starting backup...\n"); /* Get the data directory full path and the configuration files location */ - sqlquery_snprintf(sqlquery, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + sqlquery_snprintf( + sqlquery, + "SELECT name, setting " + " FROM pg_settings " + " WHERE name IN ('data_directory', 'config_file', 'hba_file', " + " 'ident_file')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -854,7 +863,10 @@ do_standby_clone(void) * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ - sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); + sqlquery_snprintf( + sqlquery, + "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", + time(NULL)); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -906,7 +918,10 @@ do_standby_clone(void) * find and appropiate rsync option but besides we could someday make all * these rsync happen concurrently */ - sqlquery_snprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + " WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -1062,7 +1077,7 @@ do_standby_promote(void) /* Get the data directory full path and the last subdirectory */ sqlquery_snprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -1212,7 +1227,7 @@ do_standby_follow(void) /* Get the data directory full path */ sqlquery_snprintf(sqlquery, "SELECT setting " - " FROM pg_settings WHERE name = 'data_directory'"); + " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -1305,7 +1320,7 @@ create_recovery_file(const char *data_dir) } maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, - ((masterport==NULL) ? "5432" : masterport)); + ((masterport==NULL) ? "5432" : masterport)); if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); @@ -1348,7 +1363,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); maxlen_snprintf(script, "rsync %s %s:%s/* %s", - options, host_string, remote_path, local_path); + options, host_string, remote_path, local_path); } else { diff --git a/repmgrd.c b/repmgrd.c index d4f4113..03e3d78 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -61,7 +61,7 @@ static void setup_cancel_handler(void); /* * Every 3 seconds, insert monitor info */ -#define MonitorCheck() \ +#define MonitorCheck() \ for (;;) \ { \ MonitorExecute(); \ @@ -273,9 +273,10 @@ MonitorExecute(void) CancelQuery(); /* Get local xlog info */ - sqlquery_snprintf(sqlquery, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location()"); + sqlquery_snprintf( + sqlquery, + "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " + "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -314,15 +315,15 @@ MonitorExecute(void) * Build the SQL to execute on primary */ sqlquery_snprintf(sqlquery, - "INSERT INTO repmgr_%s.repl_monitor " - "VALUES(%d, %d, '%s'::timestamp with time zone, " - " '%s', '%s', " - " %lld, %lld)", myClusterName, - primaryId, myLocalId, monitor_standby_timestamp, - last_wal_primary_location, - last_wal_standby_received, - (lsn_primary - lsn_standby_received), - (lsn_standby_received - lsn_standby_applied)); + "INSERT INTO repmgr_%s.repl_monitor " + "VALUES(%d, %d, '%s'::timestamp with time zone, " + " '%s', '%s', " + " %lld, %lld)", myClusterName, + primaryId, myLocalId, monitor_standby_timestamp, + last_wal_primary_location, + last_wal_standby_received, + (lsn_primary - lsn_standby_received), + (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We @@ -340,8 +341,8 @@ checkClusterConfiguration(void) PGresult *res; sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " - " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", - myClusterName); + " WHERE oid = 'repmgr_%s.repl_nodes'::regclass", + myClusterName); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -378,8 +379,8 @@ checkNodeConfiguration(char *conninfo) /* Check if we have my node information in repl_nodes */ sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE id = %d AND cluster = '%s' ", - myClusterName, myLocalId, myClusterName); + " WHERE id = %d AND cluster = '%s' ", + myClusterName, myLocalId, myClusterName); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -401,8 +402,8 @@ checkNodeConfiguration(char *conninfo) /* Adding the node */ sqlquery_snprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " - "VALUES (%d, '%s', '%s')", - myClusterName, myLocalId, myClusterName, conninfo); + "VALUES (%d, '%s', '%s')", + myClusterName, myLocalId, myClusterName, conninfo); if (!PQexec(primaryConn, sqlquery)) { From 3f2094a242153d804f32addd1f3ae50db76cdc77 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Fri, 10 Dec 2010 12:47:09 -0800 Subject: [PATCH 06/14] Initialize connection pointers to NULL This makes some bugs easier to find. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- repmgrd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 03e3d78..c82ddd7 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1,6 +1,8 @@ /* * repmgrd.c + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * * Replication manager daemon * This module connects to the nodes of a replication cluster and monitors @@ -23,12 +25,12 @@ char myClusterName[MAXLEN]; /* Local info */ int myLocalMode = STANDBY_MODE; int myLocalId = -1; -PGconn *myLocalConn; +PGconn *myLocalConn = NULL; /* Primary info */ int primaryId; char primaryConninfo[MAXLEN]; -PGconn *primaryConn; +PGconn *primaryConn = NULL; char sqlquery[QUERY_STR_LEN]; From 6cea339697bde657e0e733dcb69f7c3d1b079e8e Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Fri, 10 Dec 2010 12:50:52 -0800 Subject: [PATCH 07/14] Fix a use-after-free A result is being cleared while there are still pointers that refer to datums in it. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- dbutils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index 9a771df..ebe8373 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1,6 +1,8 @@ /* * dbutils.c + * * Copyright (c) 2ndQuadrant, 2010 + * Copyright (c) Heroku, 2010 * * Database connection/management functions * @@ -91,7 +93,6 @@ pg_version(PGconn *conn) major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version2 = PQgetvalue(res, 0, 1); - PQclear(res); major_version = malloc(major_version_sz); @@ -104,6 +105,8 @@ pg_version(PGconn *conn) else strcpy(major_version, ""); + PQclear(res); + return major_version; } From 309bb92d95e9aac917bcd9d2ef28e5964666eb94 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Fri, 10 Dec 2010 12:51:42 -0800 Subject: [PATCH 08/14] Prevent a double-free This can occur because prior to this, there is a code path that looks like this: primaryConn = myLocalConn CloseConnections will subsequently try to free both with PQFinish. I'm not sure if this is the right fix -- it's more of hack -- without more information about design intention. One reasonable alternative would be to have CloseConnections perform this check itself. As-is I am pretty sure that this fix leaves a signal-race (when CloseConnections is called, without the check, in the interrupt handler) unfixed. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- repmgrd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/repmgrd.c b/repmgrd.c index c82ddd7..3798171 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -180,6 +180,10 @@ main(int argc, char **argv) MonitorCheck(); } + /* Prevent a double-free */ + if (primaryConn == myLocalConn) + myLocalConn = NULL; + /* close the connection to the database and cleanup */ CloseConnections(); From fc13d50e3797e3f37bab1909f3f3fb5bbea76024 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Fri, 10 Dec 2010 12:55:30 -0800 Subject: [PATCH 09/14] Canonicalize whitespace Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.c b/config.c index 2aac8c1..41e4bff 100644 --- a/config.c +++ b/config.c @@ -36,11 +36,11 @@ parse_config(const char *config_file, char *cluster_name, int *node, /* Copy into correct entry in parameters struct */ if (strcmp(name, "cluster") == 0) - strncpy (cluster_name, value, MAXLEN); + strncpy(cluster_name, value, MAXLEN); else if (strcmp(name, "node") == 0) *node = atoi(value); else if (strcmp(name, "conninfo") == 0) - strncpy (conninfo, value, MAXLEN); + strncpy(conninfo, value, MAXLEN); else printf("WARNING: %s/%s: Unknown name/value pair!\n", name, value); From ec73a07e2f654d653ae7b2cd17e722ce32cf0e78 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Fri, 10 Dec 2010 12:55:45 -0800 Subject: [PATCH 10/14] Make various buffers larger MAXLEN definitely needed to be bigger to properly format fairly common connection strings. In addition, the reliance on xsnprintf helps detect cases where even this buffer is not long enough. the buffer in parse_config has been made bigger in a bit of sloppy programming, but what really needs to happen is reporting when a line cannot be properly parsed/is too big for the buffer. That is just a kludge. Signed-off-by: Dan Farina Signed-off-by: Peter van Hardenberg --- config.c | 2 +- strutil.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config.c b/config.c index 41e4bff..1a4b5b0 100644 --- a/config.c +++ b/config.c @@ -15,7 +15,7 @@ void parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo) { - char *s, buff[256]; + char *s, buff[1024]; FILE *fp = fopen (config_file, "r"); if (fp == NULL) diff --git a/strutil.h b/strutil.h index c2f5d55..efefff7 100644 --- a/strutil.h +++ b/strutil.h @@ -9,7 +9,7 @@ #define _STRUTIL_H_ #define QUERY_STR_LEN 8192 -#define MAXLEN 80 +#define MAXLEN 1024 extern int xsnprintf(char *str, size_t size, const char *format, ...); From faddaed316cb39b64bc87dc166ba712c5f1639c3 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Wed, 22 Dec 2010 15:01:35 -0800 Subject: [PATCH 11/14] Avoid a use-after-free in verbose logging Previously, this print would use memory freed by PQClear previously. Signed-off-by: Dan Farina --- repmgr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/repmgr.c b/repmgr.c index 7eaca0f..b933601 100644 --- a/repmgr.c +++ b/repmgr.c @@ -974,6 +974,12 @@ do_standby_clone(void) return; } last_wal_segment = PQgetvalue(res, 0, 0); + + if (verbose) + printf( + _("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); + PQclear(res); PQfinish(conn); @@ -981,11 +987,6 @@ do_standby_clone(void) if (r != 0) return; - if (verbose) - printf( - _("%s requires primary to keep WAL files %s until at least %s\n"), - progname, first_wal_segment, last_wal_segment); - /* * We need to create the pg_xlog sub directory too, I'm reusing a variable * here. From 620974ba042b20e43bcac9cf22b11f25181fa763 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Wed, 22 Dec 2010 19:00:21 -0800 Subject: [PATCH 12/14] Fix unsafe string handling It looks like the old code would overflow in some cases. Signed-off-by: Dan Farina --- repmgr.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index b933601..9fc2c7b 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1217,9 +1217,20 @@ do_standby_follow(void) * before closing the connection because we will need them to * recreate the recovery.conf file */ - host = malloc(20); + + /* + * Copy the hostname to the 'host' global variable from the master + * connection. + */ + { + char *pqhost = PQhost(master_conn); + const int host_buf_sz = strlen(pqhost); + + host = malloc(host_buf_sz + 1); + xsnprintf(host, host_buf_sz, "%s", pqhost); + } + masterport = malloc(10); - strcpy(host, PQhost(master_conn)); strcpy(masterport, PQport(master_conn)); PQfinish(master_conn); From 29c39c21f6cfac32def1c7f0b82cca78e7bc2f47 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Wed, 22 Dec 2010 15:01:35 -0800 Subject: [PATCH 13/14] Avoid a use-after-free in verbose logging (again) Previously, this print would use memory freed by PQClear previously. Also allocate/free memory to prevent this tiny memory leak. Signed-off-by: Dan Farina --- repmgr.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index 9fc2c7b..c98e714 100644 --- a/repmgr.c +++ b/repmgr.c @@ -623,8 +623,8 @@ do_standby_clone(void) char master_control_file[MAXLEN]; char local_control_file[MAXLEN]; - const char *first_wal_segment = NULL; - const char *last_wal_segment = NULL; + char *first_wal_segment = NULL; + const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; @@ -875,6 +875,16 @@ do_standby_clone(void) PQfinish(conn); return; } + + if (verbose) + { + char *first_wal_seg_pq = PQgetvalue(res, 0, 0); + size_t buf_sz = strlen(first_wal_seg_pq); + + first_wal_segment = malloc(buf_sz + 1); + xsnprintf(first_wal_segment, buf_sz, "%s", first_wal_seg_pq); + } + first_wal_segment = PQgetvalue(res, 0, 0); PQclear(res); @@ -976,10 +986,19 @@ do_standby_clone(void) last_wal_segment = PQgetvalue(res, 0, 0); if (verbose) + { printf( _("%s requires primary to keep WAL files %s until at least %s\n"), progname, first_wal_segment, last_wal_segment); + /* + * Only free the first_wal_segment since it was copied out of the + * pqresult. + */ + free(first_wal_segment); + first_wal_segment = NULL; + } + PQclear(res); PQfinish(conn); From f969dca821c4fecb9f951eaa434c72ee4c63a136 Mon Sep 17 00:00:00 2001 From: Dan Farina Date: Wed, 22 Dec 2010 19:43:32 -0800 Subject: [PATCH 14/14] Hack to get passwords in recovery.conf Signed-off-by: Dan Farina --- dbutils.c | 16 ++++++++++++++-- dbutils.h | 4 ++-- repmgr.c | 45 +++++++++++++++++++++++++++++++++++++-------- repmgrd.c | 4 ++-- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/dbutils.c b/dbutils.c index c89fe17..25a14f1 100644 --- a/dbutils.c +++ b/dbutils.c @@ -163,18 +163,30 @@ get_cluster_size(PGconn *conn) /* * get a connection to master by reading repl_nodes, creating a connection * to each node (one at a time) and finding if it is a master or a standby + * + * NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to + * point to allocated memory of MAXCONNINFO in length, and the master server + * connection string is placed there. */ PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, - int *master_id) + int *master_id, char *master_conninfo_out) { PGconn *master_conn = NULL; PGresult *res1; PGresult *res2; char sqlquery[QUERY_STR_LEN]; - char master_conninfo[MAXCONNINFO]; + char master_conninfo_stack[MAXCONNINFO]; + char *master_conninfo = &*master_conninfo_stack; int i; + /* + * If the caller wanted to get a copy of the connection info string, sub + * out the local stack pointer for the pointer passed by the caller. + */ + if (master_conninfo_out != NULL) + master_conninfo = master_conninfo_out; + /* find all nodes belonging to this cluster */ sqlquery_snprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " " WHERE cluster = '%s' and id <> %d", diff --git a/dbutils.h b/dbutils.h index 5da99c3..10ee190 100644 --- a/dbutils.h +++ b/dbutils.h @@ -10,5 +10,5 @@ char *pg_version(PGconn *conn, char* major_version); bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); const char *get_cluster_size(PGconn *conn); -PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, - int *master_id); +PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out); diff --git a/repmgr.c b/repmgr.c index c98e714..4403215 100644 --- a/repmgr.c +++ b/repmgr.c @@ -35,7 +35,7 @@ static void help(const char *progname); -static bool create_recovery_file(const char *data_dir); +static bool create_recovery_file(const char *data_dir, char *master_conninfo); static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); @@ -428,7 +428,8 @@ do_master_register(void) int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); + master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id, + NULL); if (master_conn != NULL) { PQfinish(master_conn); @@ -543,7 +544,7 @@ do_standby_register(void) /* check if there is a master in this cluster */ master_conn = getMasterConnection(conn, myLocalId, myClusterName, - &master_id); + &master_id, NULL); if (!master_conn) return; @@ -1018,7 +1019,7 @@ do_standby_clone(void) } /* Finally, write the recovery.conf file */ - create_recovery_file(dest_dir); + create_recovery_file(dest_dir, NULL); /* * We don't start the service because we still may want to move the @@ -1084,7 +1085,7 @@ do_standby_promote(void) /* we also need to check if there isn't any master already */ old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, - &old_master_id); + &old_master_id, NULL); if (old_master_conn != NULL) { PQfinish(old_master_conn); @@ -1153,6 +1154,7 @@ do_standby_follow(void) char myClusterName[MAXLEN]; int myLocalId = -1; char conninfo[MAXLEN]; + char master_conninfo[MAXLEN]; PGconn *master_conn; int master_id; @@ -1193,7 +1195,7 @@ do_standby_follow(void) /* we also need to check if there is any master in the cluster */ master_conn = getMasterConnection(conn, myLocalId, myClusterName, - &master_id); + &master_id, master_conninfo); if (master_conn == NULL) { PQfinish(conn); @@ -1273,7 +1275,7 @@ do_standby_follow(void) PQfinish(conn); /* write the recovery.conf file */ - if (!create_recovery_file(data_dir)) + if (!create_recovery_file(data_dir, master_conninfo)) return; /* Finally, restart the service */ @@ -1326,8 +1328,13 @@ help(const char *progname) } +/* + * Creates a recovery file for a standby. + * + * Writes master_conninfo to recovery.conf if is non-NULL + */ static bool -create_recovery_file(const char *data_dir) +create_recovery_file(const char *data_dir, char *master_conninfo) { FILE *recovery_file; char recovery_file_path[MAXLEN]; @@ -1350,6 +1357,28 @@ create_recovery_file(const char *data_dir) return false; } + if (master_conninfo == NULL) + { + char *password = getenv("PGPASSWORD"); + + if (password == NULL) + { + fprintf(stderr, + _("%s: Panic! PGPASSWORD not set, how can we get here?\n"), + progname); + exit(255); + } + + maxlen_snprintf(line, + "primary_conninfo = 'host=%s port=%s' password=%s\n", + host, ((masterport==NULL) ? "5432" : masterport), + password); + } + else + { + maxlen_snprintf(line, "primary_conninfo = '%s'\n", master_conninfo); + } + maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", host, ((masterport==NULL) ? "5432" : masterport)); if (fputs(line, recovery_file) == EOF) diff --git a/repmgrd.c b/repmgrd.c index 02df064..1679056 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -168,7 +168,7 @@ main(int argc, char **argv) { /* I need the id of the primary as well as a connection to it */ primaryConn = getMasterConnection(myLocalConn, myLocalId, - myClusterName, &primaryId); + myClusterName, &primaryId, NULL); if (primaryConn == NULL) exit(1); } @@ -240,7 +240,7 @@ MonitorExecute(void) connection_retries++) { primaryConn = getMasterConnection(myLocalConn, myLocalId, - myClusterName, &primaryId); + myClusterName, &primaryId, NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */