From owner-svn-src-all@FreeBSD.ORG Thu Aug 5 19:16:31 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 652941065676; Thu, 5 Aug 2010 19:16:31 +0000 (UTC) (envelope-from pjd@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 528DE8FC0A; Thu, 5 Aug 2010 19:16:31 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o75JGV1d080820; Thu, 5 Aug 2010 19:16:31 GMT (envelope-from pjd@svn.freebsd.org) Received: (from pjd@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o75JGV6J080816; Thu, 5 Aug 2010 19:16:31 GMT (envelope-from pjd@svn.freebsd.org) Message-Id: <201008051916.o75JGV6J080816@svn.freebsd.org> From: Pawel Jakub Dawidek Date: Thu, 5 Aug 2010 19:16:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r210886 - head/sbin/hastd X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 05 Aug 2010 19:16:31 -0000 Author: pjd Date: Thu Aug 5 19:16:31 2010 New Revision: 210886 URL: http://svn.freebsd.org/changeset/base/210886 Log: Implement configuration reload on SIGHUP. This includes: - Load added resources. - Stop and forget removed resources. - Update modified resources in least intrusive way, ie. don't touch /dev/hast/ unless path to local component or provider name were modified. Obtained from: Wheel Systems Sp. z o.o. http://www.wheelsystems.com MFC after: 1 month Modified: head/sbin/hastd/hastd.c head/sbin/hastd/hastd.h head/sbin/hastd/primary.c Modified: head/sbin/hastd/hastd.c ============================================================================== --- head/sbin/hastd/hastd.c Thu Aug 5 19:12:35 2010 (r210885) +++ head/sbin/hastd/hastd.c Thu Aug 5 19:16:31 2010 (r210886) @@ -1,5 +1,6 @@ /*- * Copyright (c) 2009-2010 The FreeBSD Foundation + * Copyright (c) 2010 Pawel Jakub Dawidek * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from @@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$"); #include "subr.h" /* Path to configuration file. */ -static const char *cfgpath = HAST_CONFIG; +const char *cfgpath = HAST_CONFIG; /* Hastd configuration. */ static struct hastd_config *cfg; /* Was SIGCHLD signal received? */ static bool sigchld_received = false; /* Was SIGHUP signal received? */ -static bool sighup_received = false; +bool sighup_received = false; /* Was SIGINT or SIGTERM signal received? */ bool sigexit_received = false; /* PID file handle. */ @@ -169,12 +170,203 @@ child_exit(void) } } +static bool +resource_needs_restart(const struct hast_resource *res0, + const struct hast_resource *res1) +{ + + assert(strcmp(res0->hr_name, res1->hr_name) == 0); + + if (strcmp(res0->hr_provname, res1->hr_provname) != 0) + return (true); + if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) + return (true); + if (res0->hr_role == HAST_ROLE_INIT || + res0->hr_role == HAST_ROLE_SECONDARY) { + if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) + return (true); + if (res0->hr_replication != res1->hr_replication) + return (true); + if (res0->hr_timeout != res1->hr_timeout) + return (true); + } + return (false); +} + +static bool +resource_needs_reload(const struct hast_resource *res0, + const struct hast_resource *res1) +{ + + assert(strcmp(res0->hr_name, res1->hr_name) == 0); + assert(strcmp(res0->hr_provname, res1->hr_provname) == 0); + assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); + + if (res0->hr_role != HAST_ROLE_PRIMARY) + return (false); + + if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) + return (true); + if (res0->hr_replication != res1->hr_replication) + return (true); + if (res0->hr_timeout != res1->hr_timeout) + return (true); + return (false); +} + static void hastd_reload(void) { + struct hastd_config *newcfg; + struct hast_resource *nres, *cres, *tres; + uint8_t role; + + pjdlog_info("Reloading configuration..."); + + newcfg = yy_config_parse(cfgpath, false); + if (newcfg == NULL) + goto failed; + + /* + * Check if control address has changed. + */ + if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { + if (proto_server(newcfg->hc_controladdr, + &newcfg->hc_controlconn) < 0) { + pjdlog_errno(LOG_ERR, + "Unable to listen on control address %s", + newcfg->hc_controladdr); + goto failed; + } + } + /* + * Check if listen address has changed. + */ + if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { + if (proto_server(newcfg->hc_listenaddr, + &newcfg->hc_listenconn) < 0) { + pjdlog_errno(LOG_ERR, "Unable to listen on address %s", + newcfg->hc_listenaddr); + goto failed; + } + } + /* + * Only when both control and listen sockets are successfully + * initialized switch them to new configuration. + */ + if (newcfg->hc_controlconn != NULL) { + pjdlog_info("Control socket changed from %s to %s.", + cfg->hc_controladdr, newcfg->hc_controladdr); + proto_close(cfg->hc_controlconn); + cfg->hc_controlconn = newcfg->hc_controlconn; + newcfg->hc_controlconn = NULL; + strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, + sizeof(cfg->hc_controladdr)); + } + if (newcfg->hc_listenconn != NULL) { + pjdlog_info("Listen socket changed from %s to %s.", + cfg->hc_listenaddr, newcfg->hc_listenaddr); + proto_close(cfg->hc_listenconn); + cfg->hc_listenconn = newcfg->hc_listenconn; + newcfg->hc_listenconn = NULL; + strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, + sizeof(cfg->hc_listenaddr)); + } + + /* + * Stop and remove resources that were removed from the configuration. + */ + TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + if (nres == NULL) { + control_set_role(cres, HAST_ROLE_INIT); + TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); + pjdlog_info("Resource %s removed.", cres->hr_name); + free(cres); + } + } + /* + * Move new resources to the current configuration. + */ + TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + if (cres == NULL) { + TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); + TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); + pjdlog_info("Resource %s added.", nres->hr_name); + } + } + /* + * Deal with modified resources. + * Depending on what has changed exactly we might want to perform + * different actions. + * + * We do full resource restart in the following situations: + * Resource role is INIT or SECONDARY. + * Resource role is PRIMARY and path to local component or provider + * name has changed. + * In case of PRIMARY, the worker process will be killed and restarted, + * which also means removing /dev/hast/ provider and + * recreating it. + * + * We do just reload (send SIGHUP to worker process) if we act as + * PRIMARY, but only remote address, replication mode and timeout + * has changed. For those, there is no need to restart worker process. + * If PRIMARY receives SIGHUP, it will reconnect if remote address or + * replication mode has changed or simply set new timeout if only + * timeout has changed. + */ + TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + assert(cres != NULL); + if (resource_needs_restart(cres, nres)) { + pjdlog_info("Resource %s configuration was modified, restarting it.", + cres->hr_name); + role = cres->hr_role; + control_set_role(cres, HAST_ROLE_INIT); + TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); + free(cres); + TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); + TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); + control_set_role(nres, role); + } else if (resource_needs_reload(cres, nres)) { + pjdlog_info("Resource %s configuration was modified, reloading it.", + cres->hr_name); + strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, + sizeof(cres->hr_remoteaddr)); + cres->hr_replication = nres->hr_replication; + cres->hr_timeout = nres->hr_timeout; + if (cres->hr_workerpid != 0) { + if (kill(cres->hr_workerpid, SIGHUP) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to send SIGHUP to worker process %u", + (unsigned int)cres->hr_workerpid); + } + } + } + } - /* TODO */ - pjdlog_warning("Configuration reload is not implemented."); + yy_config_free(newcfg); + pjdlog_info("Configuration reloaded successfully."); + return; +failed: + if (newcfg != NULL) { + if (newcfg->hc_controlconn != NULL) + proto_close(newcfg->hc_controlconn); + if (newcfg->hc_listenconn != NULL) + proto_close(newcfg->hc_listenconn); + yy_config_free(newcfg); + } + pjdlog_warning("Configuration not reloaded."); } static void @@ -402,10 +594,6 @@ main_loop(void) fd_set rfds, wfds; int cfd, lfd, maxfd, ret; - cfd = proto_descriptor(cfg->hc_controlconn); - lfd = proto_descriptor(cfg->hc_listenconn); - maxfd = cfd > lfd ? cfd : lfd; - for (;;) { if (sigchld_received) { sigchld_received = false; @@ -416,6 +604,10 @@ main_loop(void) hastd_reload(); } + cfd = proto_descriptor(cfg->hc_controlconn); + lfd = proto_descriptor(cfg->hc_listenconn); + maxfd = cfd > lfd ? cfd : lfd; + /* Setup descriptors for select(2). */ FD_ZERO(&rfds); FD_SET(cfd, &rfds); Modified: head/sbin/hastd/hastd.h ============================================================================== --- head/sbin/hastd/hastd.h Thu Aug 5 19:12:35 2010 (r210885) +++ head/sbin/hastd/hastd.h Thu Aug 5 19:16:31 2010 (r210886) @@ -39,7 +39,8 @@ #include "hast.h" -extern bool sigexit_received; +extern const char *cfgpath; +extern bool sigexit_received, sighup_received; extern struct pidfh *pfh; void hastd_primary(struct hast_resource *res); Modified: head/sbin/hastd/primary.c ============================================================================== --- head/sbin/hastd/primary.c Thu Aug 5 19:12:35 2010 (r210885) +++ head/sbin/hastd/primary.c Thu Aug 5 19:16:31 2010 (r210886) @@ -1,5 +1,6 @@ /*- * Copyright (c) 2009 The FreeBSD Foundation + * Copyright (c) 2010 Pawel Jakub Dawidek * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from @@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$"); #include "subr.h" #include "synch.h" +/* The is only one remote component for now. */ +#define ISREMOTE(no) ((no) == 1) + struct hio { /* * Number of components we are still waiting for. @@ -424,6 +428,7 @@ init_environment(struct hast_resource *r */ signal(SIGINT, sighandler); signal(SIGTERM, sighandler); + signal(SIGHUP, sighandler); } static void @@ -1713,6 +1718,9 @@ sighandler(int sig) case SIGTERM: sigexit_received = true; break; + case SIGHUP: + sighup_received = true; + break; default: assert(!"invalid condition"); } @@ -1726,6 +1734,114 @@ sighandler(int sig) mtx_unlock(&hio_guard_lock); } +static void +config_reload(void) +{ + struct hastd_config *newcfg; + struct hast_resource *res; + unsigned int ii, ncomps; + int modified; + + pjdlog_info("Reloading configuration..."); + + ncomps = HAST_NCOMPONENTS; + + newcfg = yy_config_parse(cfgpath, false); + if (newcfg == NULL) + goto failed; + + TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) { + if (strcmp(res->hr_name, gres->hr_name) == 0) + break; + } + /* + * If resource was removed from the configuration file, resource + * name, provider name or path to local component was modified we + * shouldn't be here. This means that someone modified configuration + * file and send SIGHUP to us instead of main hastd process. + * Log advice and ignore the signal. + */ + if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 || + strcmp(gres->hr_provname, res->hr_provname) != 0 || + strcmp(gres->hr_localpath, res->hr_localpath) != 0) { + pjdlog_warning("To reload configuration send SIGHUP to the main hastd process (pid %u).", + (unsigned int)getppid()); + goto failed; + } + +#define MODIFIED_REMOTEADDR 0x1 +#define MODIFIED_REPLICATION 0x2 +#define MODIFIED_TIMEOUT 0x4 + modified = 0; + if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) { + /* + * Don't copy res->hr_remoteaddr to gres just yet. + * We want remote_close() to log disconnect from the old + * addresses, not from the new ones. + */ + modified |= MODIFIED_REMOTEADDR; + } + if (gres->hr_replication != res->hr_replication) { + gres->hr_replication = res->hr_replication; + modified |= MODIFIED_REPLICATION; + } + if (gres->hr_timeout != res->hr_timeout) { + gres->hr_timeout = res->hr_timeout; + modified |= MODIFIED_TIMEOUT; + } + /* + * If only timeout was modified we only need to change it without + * reconnecting. + */ + if (modified == MODIFIED_TIMEOUT) { + for (ii = 0; ii < ncomps; ii++) { + if (!ISREMOTE(ii)) + continue; + rw_rlock(&hio_remote_lock[ii]); + if (!ISCONNECTED(gres, ii)) { + rw_unlock(&hio_remote_lock[ii]); + continue; + } + rw_unlock(&hio_remote_lock[ii]); + if (proto_timeout(gres->hr_remotein, + gres->hr_timeout) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to set connection timeout"); + } + if (proto_timeout(gres->hr_remoteout, + gres->hr_timeout) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to set connection timeout"); + } + } + } else { + for (ii = 0; ii < ncomps; ii++) { + if (!ISREMOTE(ii)) + continue; + remote_close(gres, ii); + } + if (modified & MODIFIED_REMOTEADDR) { + strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr, + sizeof(gres->hr_remoteaddr)); + } + } +#undef MODIFIED_REMOTEADDR +#undef MODIFIED_REPLICATION +#undef MODIFIED_TIMEOUT + + pjdlog_info("Configuration reloaded successfully."); + return; +failed: + if (newcfg != NULL) { + if (newcfg->hc_controlconn != NULL) + proto_close(newcfg->hc_controlconn); + if (newcfg->hc_listenconn != NULL) + proto_close(newcfg->hc_listenconn); + yy_config_free(newcfg); + } + pjdlog_warning("Configuration not reloaded."); +} + /* * Thread guards remote connections and reconnects when needed, handles * signals, etc. @@ -1739,14 +1855,16 @@ guard_thread(void *arg) int timeout; ncomps = HAST_NCOMPONENTS; - /* The is only one remote component for now. */ -#define ISREMOTE(no) ((no) == 1) for (;;) { if (sigexit_received) { primary_exitx(EX_OK, "Termination signal received, exiting."); } + if (sighup_received) { + sighup_received = false; + config_reload(); + } /* * If all the connection will be fine, we will sleep until * someone wakes us up. @@ -1810,7 +1928,6 @@ guard_thread(void *arg) (void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout); mtx_unlock(&hio_guard_lock); } -#undef ISREMOTE /* NOTREACHED */ return (NULL); }