Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 5 Aug 2010 19:16:31 +0000 (UTC)
From:      Pawel Jakub Dawidek <pjd@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r210886 - head/sbin/hastd
Message-ID:  <201008051916.o75JGV6J080816@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pjd
Date: Thu Aug  5 19:16:31 2010
New Revision: 210886
URL: http://svn.freebsd.org/changeset/base/210886

Log:
  Implement configuration reload on SIGHUP. This includes:
  - Load added resources.
  - Stop and forget removed resources.
  - Update modified resources in least intrusive way, ie. don't touch
    /dev/hast/<name> unless path to local component or provider name were
    modified.
  
  Obtained from:	Wheel Systems Sp. z o.o. http://www.wheelsystems.com
  MFC after:	1 month

Modified:
  head/sbin/hastd/hastd.c
  head/sbin/hastd/hastd.h
  head/sbin/hastd/primary.c

Modified: head/sbin/hastd/hastd.c
==============================================================================
--- head/sbin/hastd/hastd.c	Thu Aug  5 19:12:35 2010	(r210885)
+++ head/sbin/hastd/hastd.c	Thu Aug  5 19:16:31 2010	(r210886)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2009-2010 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$");
 #include "subr.h"
 
 /* Path to configuration file. */
-static const char *cfgpath = HAST_CONFIG;
+const char *cfgpath = HAST_CONFIG;
 /* Hastd configuration. */
 static struct hastd_config *cfg;
 /* Was SIGCHLD signal received? */
 static bool sigchld_received = false;
 /* Was SIGHUP signal received? */
-static bool sighup_received = false;
+bool sighup_received = false;
 /* Was SIGINT or SIGTERM signal received? */
 bool sigexit_received = false;
 /* PID file handle. */
@@ -169,12 +170,203 @@ child_exit(void)
 	}
 }
 
+static bool
+resource_needs_restart(const struct hast_resource *res0,
+    const struct hast_resource *res1)
+{
+
+	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+
+	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
+		return (true);
+	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
+		return (true);
+	if (res0->hr_role == HAST_ROLE_INIT ||
+	    res0->hr_role == HAST_ROLE_SECONDARY) {
+		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+			return (true);
+		if (res0->hr_replication != res1->hr_replication)
+			return (true);
+		if (res0->hr_timeout != res1->hr_timeout)
+			return (true);
+	}
+	return (false);
+}
+
+static bool
+resource_needs_reload(const struct hast_resource *res0,
+    const struct hast_resource *res1)
+{
+
+	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+	assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
+	assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
+
+	if (res0->hr_role != HAST_ROLE_PRIMARY)
+		return (false);
+
+	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+		return (true);
+	if (res0->hr_replication != res1->hr_replication)
+		return (true);
+	if (res0->hr_timeout != res1->hr_timeout)
+		return (true);
+	return (false);
+}
+
 static void
 hastd_reload(void)
 {
+	struct hastd_config *newcfg;
+	struct hast_resource *nres, *cres, *tres;
+	uint8_t role;
+
+	pjdlog_info("Reloading configuration...");
+
+	newcfg = yy_config_parse(cfgpath, false);
+	if (newcfg == NULL)
+		goto failed;
+
+	/*
+	 * Check if control address has changed.
+	 */
+	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
+		if (proto_server(newcfg->hc_controladdr,
+		    &newcfg->hc_controlconn) < 0) {
+			pjdlog_errno(LOG_ERR,
+			    "Unable to listen on control address %s",
+			    newcfg->hc_controladdr);
+			goto failed;
+		}
+	}
+	/*
+	 * Check if listen address has changed.
+	 */
+	if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
+		if (proto_server(newcfg->hc_listenaddr,
+		    &newcfg->hc_listenconn) < 0) {
+			pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
+			    newcfg->hc_listenaddr);
+			goto failed;
+		}
+	}
+	/*
+	 * Only when both control and listen sockets are successfully
+	 * initialized switch them to new configuration.
+	 */
+	if (newcfg->hc_controlconn != NULL) {
+		pjdlog_info("Control socket changed from %s to %s.",
+		    cfg->hc_controladdr, newcfg->hc_controladdr);
+		proto_close(cfg->hc_controlconn);
+		cfg->hc_controlconn = newcfg->hc_controlconn;
+		newcfg->hc_controlconn = NULL;
+		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
+		    sizeof(cfg->hc_controladdr));
+	}
+	if (newcfg->hc_listenconn != NULL) {
+		pjdlog_info("Listen socket changed from %s to %s.",
+		    cfg->hc_listenaddr, newcfg->hc_listenaddr);
+		proto_close(cfg->hc_listenconn);
+		cfg->hc_listenconn = newcfg->hc_listenconn;
+		newcfg->hc_listenconn = NULL;
+		strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
+		    sizeof(cfg->hc_listenaddr));
+	}
+
+	/*
+	 * Stop and remove resources that were removed from the configuration.
+	 */
+	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
+		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
+			if (strcmp(cres->hr_name, nres->hr_name) == 0)
+				break;
+		}
+		if (nres == NULL) {
+			control_set_role(cres, HAST_ROLE_INIT);
+			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+			pjdlog_info("Resource %s removed.", cres->hr_name);
+			free(cres);
+		}
+	}
+	/*
+	 * Move new resources to the current configuration.
+	 */
+	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+			if (strcmp(cres->hr_name, nres->hr_name) == 0)
+				break;
+		}
+		if (cres == NULL) {
+			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+			pjdlog_info("Resource %s added.", nres->hr_name);
+		}
+	}
+	/*
+	 * Deal with modified resources.
+	 * Depending on what has changed exactly we might want to perform
+	 * different actions.
+	 *
+	 * We do full resource restart in the following situations:
+	 * Resource role is INIT or SECONDARY.
+	 * Resource role is PRIMARY and path to local component or provider
+	 * name has changed.
+	 * In case of PRIMARY, the worker process will be killed and restarted,
+	 * which also means removing /dev/hast/<name> provider and
+	 * recreating it.
+	 *
+	 * We do just reload (send SIGHUP to worker process) if we act as
+	 * PRIMARY, but only remote address, replication mode and timeout
+	 * has changed. For those, there is no need to restart worker process.
+	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
+	 * replication mode has changed or simply set new timeout if only
+	 * timeout has changed.
+	 */
+	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+			if (strcmp(cres->hr_name, nres->hr_name) == 0)
+				break;
+		}
+		assert(cres != NULL);
+		if (resource_needs_restart(cres, nres)) {
+			pjdlog_info("Resource %s configuration was modified, restarting it.",
+			    cres->hr_name);
+			role = cres->hr_role;
+			control_set_role(cres, HAST_ROLE_INIT);
+			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+			free(cres);
+			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+			control_set_role(nres, role);
+		} else if (resource_needs_reload(cres, nres)) {
+			pjdlog_info("Resource %s configuration was modified, reloading it.",
+			    cres->hr_name);
+			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
+			    sizeof(cres->hr_remoteaddr));
+			cres->hr_replication = nres->hr_replication;
+			cres->hr_timeout = nres->hr_timeout;
+			if (cres->hr_workerpid != 0) {
+				if (kill(cres->hr_workerpid, SIGHUP) < 0) {
+					pjdlog_errno(LOG_WARNING,
+					    "Unable to send SIGHUP to worker process %u",
+					    (unsigned int)cres->hr_workerpid);
+				}
+			}
+		}
+	}
 
-	/* TODO */
-	pjdlog_warning("Configuration reload is not implemented.");
+	yy_config_free(newcfg);
+	pjdlog_info("Configuration reloaded successfully.");
+	return;
+failed:
+	if (newcfg != NULL) {
+		if (newcfg->hc_controlconn != NULL)
+			proto_close(newcfg->hc_controlconn);
+		if (newcfg->hc_listenconn != NULL)
+			proto_close(newcfg->hc_listenconn);
+		yy_config_free(newcfg);
+	}
+	pjdlog_warning("Configuration not reloaded.");
 }
 
 static void
@@ -402,10 +594,6 @@ main_loop(void)
 	fd_set rfds, wfds;
 	int cfd, lfd, maxfd, ret;
 
-	cfd = proto_descriptor(cfg->hc_controlconn);
-	lfd = proto_descriptor(cfg->hc_listenconn);
-	maxfd = cfd > lfd ? cfd : lfd;
-
 	for (;;) {
 		if (sigchld_received) {
 			sigchld_received = false;
@@ -416,6 +604,10 @@ main_loop(void)
 			hastd_reload();
 		}
 
+		cfd = proto_descriptor(cfg->hc_controlconn);
+		lfd = proto_descriptor(cfg->hc_listenconn);
+		maxfd = cfd > lfd ? cfd : lfd;
+
 		/* Setup descriptors for select(2). */
 		FD_ZERO(&rfds);
 		FD_SET(cfd, &rfds);

Modified: head/sbin/hastd/hastd.h
==============================================================================
--- head/sbin/hastd/hastd.h	Thu Aug  5 19:12:35 2010	(r210885)
+++ head/sbin/hastd/hastd.h	Thu Aug  5 19:16:31 2010	(r210886)
@@ -39,7 +39,8 @@
 
 #include "hast.h"
 
-extern bool sigexit_received;
+extern const char *cfgpath;
+extern bool sigexit_received, sighup_received;
 extern struct pidfh *pfh;
 
 void hastd_primary(struct hast_resource *res);

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c	Thu Aug  5 19:12:35 2010	(r210885)
+++ head/sbin/hastd/primary.c	Thu Aug  5 19:16:31 2010	(r210886)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2009 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$");
 #include "subr.h"
 #include "synch.h"
 
+/* The is only one remote component for now. */
+#define	ISREMOTE(no)	((no) == 1)
+
 struct hio {
 	/*
 	 * Number of components we are still waiting for.
@@ -424,6 +428,7 @@ init_environment(struct hast_resource *r
 	 */
 	signal(SIGINT, sighandler);
 	signal(SIGTERM, sighandler);
+	signal(SIGHUP, sighandler);
 }
 
 static void
@@ -1713,6 +1718,9 @@ sighandler(int sig)
 	case SIGTERM:
 		sigexit_received = true;
 		break;
+	case SIGHUP:
+		sighup_received = true;
+		break;
 	default:
 		assert(!"invalid condition");
 	}
@@ -1726,6 +1734,114 @@ sighandler(int sig)
 		mtx_unlock(&hio_guard_lock);
 }
 
+static void
+config_reload(void)
+{
+	struct hastd_config *newcfg;
+	struct hast_resource *res;
+	unsigned int ii, ncomps;
+	int modified;
+
+	pjdlog_info("Reloading configuration...");
+
+	ncomps = HAST_NCOMPONENTS;
+
+	newcfg = yy_config_parse(cfgpath, false);
+	if (newcfg == NULL)
+		goto failed;
+
+	TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) {
+		if (strcmp(res->hr_name, gres->hr_name) == 0)
+			break;
+	}
+	/*
+	 * If resource was removed from the configuration file, resource
+	 * name, provider name or path to local component was modified we
+	 * shouldn't be here. This means that someone modified configuration
+	 * file and send SIGHUP to us instead of main hastd process.
+	 * Log advice and ignore the signal.
+	 */
+	if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 ||
+	    strcmp(gres->hr_provname, res->hr_provname) != 0 ||
+	    strcmp(gres->hr_localpath, res->hr_localpath) != 0) {
+		pjdlog_warning("To reload configuration send SIGHUP to the main hastd process (pid %u).",
+		    (unsigned int)getppid());
+		goto failed;
+	}
+
+#define MODIFIED_REMOTEADDR	0x1
+#define MODIFIED_REPLICATION	0x2
+#define MODIFIED_TIMEOUT	0x4
+	modified = 0;
+	if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) {
+		/*
+		 * Don't copy res->hr_remoteaddr to gres just yet.
+		 * We want remote_close() to log disconnect from the old
+		 * addresses, not from the new ones.
+		 */
+		modified |= MODIFIED_REMOTEADDR;
+	}
+	if (gres->hr_replication != res->hr_replication) {
+		gres->hr_replication = res->hr_replication;
+		modified |= MODIFIED_REPLICATION;
+	}
+	if (gres->hr_timeout != res->hr_timeout) {
+		gres->hr_timeout = res->hr_timeout;
+		modified |= MODIFIED_TIMEOUT;
+	}
+	/*
+	 * If only timeout was modified we only need to change it without
+	 * reconnecting.
+	 */
+	if (modified == MODIFIED_TIMEOUT) {
+		for (ii = 0; ii < ncomps; ii++) {
+			if (!ISREMOTE(ii))
+				continue;
+			rw_rlock(&hio_remote_lock[ii]);
+			if (!ISCONNECTED(gres, ii)) {
+				rw_unlock(&hio_remote_lock[ii]);
+				continue;
+			}
+			rw_unlock(&hio_remote_lock[ii]);
+			if (proto_timeout(gres->hr_remotein,
+			    gres->hr_timeout) < 0) {
+				pjdlog_errno(LOG_WARNING,
+				    "Unable to set connection timeout");
+			}
+			if (proto_timeout(gres->hr_remoteout,
+			    gres->hr_timeout) < 0) {
+				pjdlog_errno(LOG_WARNING,
+				    "Unable to set connection timeout");
+			}
+		}
+	} else {
+		for (ii = 0; ii < ncomps; ii++) {
+			if (!ISREMOTE(ii))
+				continue;
+			remote_close(gres, ii);
+		}
+		if (modified & MODIFIED_REMOTEADDR) {
+			strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr,
+			    sizeof(gres->hr_remoteaddr));
+		}
+	}
+#undef	MODIFIED_REMOTEADDR
+#undef	MODIFIED_REPLICATION
+#undef	MODIFIED_TIMEOUT
+
+	pjdlog_info("Configuration reloaded successfully.");
+	return;
+failed:
+	if (newcfg != NULL) {
+		if (newcfg->hc_controlconn != NULL)
+			proto_close(newcfg->hc_controlconn);
+		if (newcfg->hc_listenconn != NULL)
+			proto_close(newcfg->hc_listenconn);
+		yy_config_free(newcfg);
+	}
+	pjdlog_warning("Configuration not reloaded.");
+}
+
 /*
  * Thread guards remote connections and reconnects when needed, handles
  * signals, etc.
@@ -1739,14 +1855,16 @@ guard_thread(void *arg)
 	int timeout;
 
 	ncomps = HAST_NCOMPONENTS;
-	/* The is only one remote component for now. */
-#define	ISREMOTE(no)	((no) == 1)
 
 	for (;;) {
 		if (sigexit_received) {
 			primary_exitx(EX_OK,
 			    "Termination signal received, exiting.");
 		}
+		if (sighup_received) {
+			sighup_received = false;
+			config_reload();
+		}
 		/*
 		 * If all the connection will be fine, we will sleep until
 		 * someone wakes us up.
@@ -1810,7 +1928,6 @@ guard_thread(void *arg)
 		(void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);
 		mtx_unlock(&hio_guard_lock);
 	}
-#undef	ISREMOTE
 	/* NOTREACHED */
 	return (NULL);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201008051916.o75JGV6J080816>