From owner-svn-src-stable@FreeBSD.ORG Sun Apr 18 21:18:33 2010 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 9F1131065673; Sun, 18 Apr 2010 21:18:33 +0000 (UTC) (envelope-from pjd@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 8CBC68FC24; Sun, 18 Apr 2010 21:18:33 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o3ILIXLs096294; Sun, 18 Apr 2010 21:18:33 GMT (envelope-from pjd@svn.freebsd.org) Received: (from pjd@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o3ILIWMM096292; Sun, 18 Apr 2010 21:18:32 GMT (envelope-from pjd@svn.freebsd.org) Message-Id: <201004182118.o3ILIWMM096292@svn.freebsd.org> From: Pawel Jakub Dawidek Date: Sun, 18 Apr 2010 21:18:32 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r206811 - stable/8/sbin/hastd X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 18 Apr 2010 21:18:33 -0000 Author: pjd Date: Sun Apr 18 21:18:32 2010 New Revision: 206811 URL: http://svn.freebsd.org/changeset/base/206811 Log: MFC r204177,r205738,r206669,r206696,r206697: r204177: Changing proto_socketpair.c compilation and linking order revealed a problem - we should simply ignore proto_server() if address doesn't start with socketpair://, and not abort. r205738: Don't hold connection lock when doing reconnects as it makes I/Os wait for connection timeouts. Reported by: Kevin Day r206669: Increase ggate queue size to maximum value. HAST was not able to stand heavy random load. Reported by: Hiroyuki Yamagami r206696: Fix control socket leak when worker process exits. Submitted by: Mikolaj Golub r206697: Fix log size calculation which caused message truncation. Submitted by: Mikolaj Golub Modified: stable/8/sbin/hastd/hastd.c stable/8/sbin/hastd/pjdlog.c stable/8/sbin/hastd/primary.c stable/8/sbin/hastd/proto_socketpair.c Directory Properties: stable/8/sbin/hastd/ (props changed) Modified: stable/8/sbin/hastd/hastd.c ============================================================================== --- stable/8/sbin/hastd/hastd.c Sun Apr 18 21:14:49 2010 (r206810) +++ stable/8/sbin/hastd/hastd.c Sun Apr 18 21:18:32 2010 (r206811) @@ -137,6 +137,7 @@ child_exit(void) pjdlog_error("Worker process failed (pid=%u, status=%d).", (unsigned int)pid, WEXITSTATUS(status)); } + proto_close(res->hr_ctrl); res->hr_workerpid = 0; if (res->hr_role == HAST_ROLE_PRIMARY) { sleep(1); Modified: stable/8/sbin/hastd/pjdlog.c ============================================================================== --- stable/8/sbin/hastd/pjdlog.c Sun Apr 18 21:14:49 2010 (r206810) +++ stable/8/sbin/hastd/pjdlog.c Sun Apr 18 21:18:32 2010 (r206811) @@ -228,7 +228,7 @@ pjdlogv_common(int loglevel, int debugle len = snprintf(log, sizeof(log), "%s", pjdlog_prefix); if ((size_t)len < sizeof(log)) - len = vsnprintf(log + len, sizeof(log) - len, fmt, ap); + len += vsnprintf(log + len, sizeof(log) - len, fmt, ap); if (error != -1 && (size_t)len < sizeof(log)) { (void)snprintf(log + len, sizeof(log) - len, ": %s.", strerror(error)); Modified: stable/8/sbin/hastd/primary.c ============================================================================== --- stable/8/sbin/hastd/primary.c Sun Apr 18 21:14:49 2010 (r206810) +++ stable/8/sbin/hastd/primary.c Sun Apr 18 21:18:32 2010 (r206811) @@ -460,9 +460,11 @@ init_local(struct hast_resource *res) exit(EX_NOINPUT); } -static void -init_remote(struct hast_resource *res) +static bool +init_remote(struct hast_resource *res, struct proto_conn **inp, + struct proto_conn **outp) { + struct proto_conn *in, *out; struct nv *nvout, *nvin; const unsigned char *token; unsigned char *map; @@ -472,13 +474,17 @@ init_remote(struct hast_resource *res) uint32_t mapsize; size_t size; + assert((inp == NULL && outp == NULL) || (inp != NULL && outp != NULL)); + + in = out = NULL; + /* Prepare outgoing connection with remote node. */ - if (proto_client(res->hr_remoteaddr, &res->hr_remoteout) < 0) { + if (proto_client(res->hr_remoteaddr, &out) < 0) { primary_exit(EX_OSERR, "Unable to create connection to %s", res->hr_remoteaddr); } /* Try to connect, but accept failure. */ - if (proto_connect(res->hr_remoteout) < 0) { + if (proto_connect(out) < 0) { pjdlog_errno(LOG_WARNING, "Unable to connect to %s", res->hr_remoteaddr); goto close; @@ -496,7 +502,7 @@ init_remote(struct hast_resource *res) nv_free(nvout); goto close; } - if (hast_proto_send(res, res->hr_remoteout, nvout, NULL, 0) < 0) { + if (hast_proto_send(res, out, nvout, NULL, 0) < 0) { pjdlog_errno(LOG_WARNING, "Unable to send handshake header to %s", res->hr_remoteaddr); @@ -504,7 +510,7 @@ init_remote(struct hast_resource *res) goto close; } nv_free(nvout); - if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) { + if (hast_proto_recv_hdr(out, &nvin) < 0) { pjdlog_errno(LOG_WARNING, "Unable to receive handshake header from %s", res->hr_remoteaddr); @@ -536,12 +542,12 @@ init_remote(struct hast_resource *res) * Second handshake step. * Setup incoming connection with remote node. */ - if (proto_client(res->hr_remoteaddr, &res->hr_remotein) < 0) { + if (proto_client(res->hr_remoteaddr, &in) < 0) { pjdlog_errno(LOG_WARNING, "Unable to create connection to %s", res->hr_remoteaddr); } /* Try to connect, but accept failure. */ - if (proto_connect(res->hr_remotein) < 0) { + if (proto_connect(in) < 0) { pjdlog_errno(LOG_WARNING, "Unable to connect to %s", res->hr_remoteaddr); goto close; @@ -560,7 +566,7 @@ init_remote(struct hast_resource *res) nv_free(nvout); goto close; } - if (hast_proto_send(res, res->hr_remotein, nvout, NULL, 0) < 0) { + if (hast_proto_send(res, in, nvout, NULL, 0) < 0) { pjdlog_errno(LOG_WARNING, "Unable to send handshake header to %s", res->hr_remoteaddr); @@ -568,7 +574,7 @@ init_remote(struct hast_resource *res) goto close; } nv_free(nvout); - if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) { + if (hast_proto_recv_hdr(out, &nvin) < 0) { pjdlog_errno(LOG_WARNING, "Unable to receive handshake header from %s", res->hr_remoteaddr); @@ -611,7 +617,7 @@ init_remote(struct hast_resource *res) * Remote node have some dirty extents on its own, lets * download its activemap. */ - if (hast_proto_recv_data(res, res->hr_remoteout, nvin, map, + if (hast_proto_recv_data(res, out, nvin, map, mapsize) < 0) { pjdlog_errno(LOG_ERR, "Unable to receive remote activemap"); @@ -631,18 +637,29 @@ init_remote(struct hast_resource *res) (void)hast_activemap_flush(res); } pjdlog_info("Connected to %s.", res->hr_remoteaddr); + if (inp != NULL && outp != NULL) { + *inp = in; + *outp = out; + } else { + res->hr_remotein = in; + res->hr_remoteout = out; + } + return (true); +close: + proto_close(out); + if (in != NULL) + proto_close(in); + return (false); +} + +static void +sync_start(void) +{ + mtx_lock(&sync_lock); sync_inprogress = true; mtx_unlock(&sync_lock); cv_signal(&sync_cond); - return; -close: - proto_close(res->hr_remoteout); - res->hr_remoteout = NULL; - if (res->hr_remotein != NULL) { - proto_close(res->hr_remotein); - res->hr_remotein = NULL; - } } static void @@ -665,7 +682,7 @@ init_ggate(struct hast_resource *res) ggiocreate.gctl_mediasize = res->hr_datasize; ggiocreate.gctl_sectorsize = res->hr_local_sectorsize; ggiocreate.gctl_flags = 0; - ggiocreate.gctl_maxcount = 128; + ggiocreate.gctl_maxcount = G_GATE_MAX_QUEUE_SIZE; ggiocreate.gctl_timeout = 0; ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s", @@ -735,7 +752,8 @@ hastd_primary(struct hast_resource *res) setproctitle("%s (primary)", res->hr_name); init_local(res); - init_remote(res); + if (init_remote(res, NULL, NULL)) + sync_start(); init_ggate(res); init_environment(res); error = pthread_create(&td, NULL, ggate_recv_thread, res); @@ -1695,6 +1713,7 @@ static void * guard_thread(void *arg) { struct hast_resource *res = arg; + struct proto_conn *in, *out; unsigned int ii, ncomps; int timeout; @@ -1738,26 +1757,31 @@ guard_thread(void *arg) * connected. */ rw_unlock(&hio_remote_lock[ii]); - rw_wlock(&hio_remote_lock[ii]); - assert(res->hr_remotein == NULL); - assert(res->hr_remoteout == NULL); pjdlog_debug(2, "remote_guard: Reconnecting to %s.", res->hr_remoteaddr); - init_remote(res); - if (ISCONNECTED(res, ii)) { + in = out = NULL; + if (init_remote(res, &in, &out)) { + rw_wlock(&hio_remote_lock[ii]); + assert(res->hr_remotein == NULL); + assert(res->hr_remoteout == NULL); + assert(in != NULL && out != NULL); + res->hr_remotein = in; + res->hr_remoteout = out; + rw_unlock(&hio_remote_lock[ii]); pjdlog_info("Successfully reconnected to %s.", res->hr_remoteaddr); + sync_start(); } else { /* Both connections should be NULL. */ assert(res->hr_remotein == NULL); assert(res->hr_remoteout == NULL); + assert(in == NULL && out == NULL); pjdlog_debug(2, "remote_guard: Reconnect to %s failed.", res->hr_remoteaddr); timeout = RECONNECT_SLEEP; } - rw_unlock(&hio_remote_lock[ii]); } } (void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout); Modified: stable/8/sbin/hastd/proto_socketpair.c ============================================================================== --- stable/8/sbin/hastd/proto_socketpair.c Sun Apr 18 21:14:49 2010 (r206810) +++ stable/8/sbin/hastd/proto_socketpair.c Sun Apr 18 21:18:32 2010 (r206811) @@ -91,9 +91,12 @@ sp_connect(void *ctx __unused) } static int -sp_server(const char *addr __unused, void **ctxp __unused) +sp_server(const char *addr, void **ctxp __unused) { + if (strcmp(addr, "socketpair://") != 0) + return (-1); + assert(!"proto_server() not supported on socketpairs"); abort(); }