Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Apr 2010 21:18:32 +0000 (UTC)
From:      Pawel Jakub Dawidek <pjd@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r206811 - stable/8/sbin/hastd
Message-ID:  <201004182118.o3ILIWMM096292@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pjd
Date: Sun Apr 18 21:18:32 2010
New Revision: 206811
URL: http://svn.freebsd.org/changeset/base/206811

Log:
  MFC r204177,r205738,r206669,r206696,r206697:
  
  r204177:
  
  Changing proto_socketpair.c compilation and linking order revealed
  a problem - we should simply ignore proto_server() if address
  doesn't start with socketpair://, and not abort.
  
  r205738:
  
  Don't hold connection lock when doing reconnects as it makes I/Os wait for
  connection timeouts.
  
  Reported by:	Kevin Day <toasty@dragondata.com>
  
  r206669:
  
  Increase ggate queue size to maximum value.
  HAST was not able to stand heavy random load.
  
  Reported by:	Hiroyuki Yamagami
  
  r206696:
  
  Fix control socket leak when worker process exits.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r206697:
  
  Fix log size calculation which caused message truncation.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>

Modified:
  stable/8/sbin/hastd/hastd.c
  stable/8/sbin/hastd/pjdlog.c
  stable/8/sbin/hastd/primary.c
  stable/8/sbin/hastd/proto_socketpair.c
Directory Properties:
  stable/8/sbin/hastd/   (props changed)

Modified: stable/8/sbin/hastd/hastd.c
==============================================================================
--- stable/8/sbin/hastd/hastd.c	Sun Apr 18 21:14:49 2010	(r206810)
+++ stable/8/sbin/hastd/hastd.c	Sun Apr 18 21:18:32 2010	(r206811)
@@ -137,6 +137,7 @@ child_exit(void)
 			pjdlog_error("Worker process failed (pid=%u, status=%d).",
 			    (unsigned int)pid, WEXITSTATUS(status));
 		}
+		proto_close(res->hr_ctrl);
 		res->hr_workerpid = 0;
 		if (res->hr_role == HAST_ROLE_PRIMARY) {
 			sleep(1);

Modified: stable/8/sbin/hastd/pjdlog.c
==============================================================================
--- stable/8/sbin/hastd/pjdlog.c	Sun Apr 18 21:14:49 2010	(r206810)
+++ stable/8/sbin/hastd/pjdlog.c	Sun Apr 18 21:18:32 2010	(r206811)
@@ -228,7 +228,7 @@ pjdlogv_common(int loglevel, int debugle
 
 		len = snprintf(log, sizeof(log), "%s", pjdlog_prefix);
 		if ((size_t)len < sizeof(log))
-			len = vsnprintf(log + len, sizeof(log) - len, fmt, ap);
+			len += vsnprintf(log + len, sizeof(log) - len, fmt, ap);
 		if (error != -1 && (size_t)len < sizeof(log)) {
 			(void)snprintf(log + len, sizeof(log) - len, ": %s.",
 			    strerror(error));

Modified: stable/8/sbin/hastd/primary.c
==============================================================================
--- stable/8/sbin/hastd/primary.c	Sun Apr 18 21:14:49 2010	(r206810)
+++ stable/8/sbin/hastd/primary.c	Sun Apr 18 21:18:32 2010	(r206811)
@@ -460,9 +460,11 @@ init_local(struct hast_resource *res)
 		exit(EX_NOINPUT);
 }
 
-static void
-init_remote(struct hast_resource *res)
+static bool
+init_remote(struct hast_resource *res, struct proto_conn **inp,
+    struct proto_conn **outp)
 {
+	struct proto_conn *in, *out;
 	struct nv *nvout, *nvin;
 	const unsigned char *token;
 	unsigned char *map;
@@ -472,13 +474,17 @@ init_remote(struct hast_resource *res)
 	uint32_t mapsize;
 	size_t size;
 
+	assert((inp == NULL && outp == NULL) || (inp != NULL && outp != NULL));
+
+	in = out = NULL;
+
 	/* Prepare outgoing connection with remote node. */
-	if (proto_client(res->hr_remoteaddr, &res->hr_remoteout) < 0) {
+	if (proto_client(res->hr_remoteaddr, &out) < 0) {
 		primary_exit(EX_OSERR, "Unable to create connection to %s",
 		    res->hr_remoteaddr);
 	}
 	/* Try to connect, but accept failure. */
-	if (proto_connect(res->hr_remoteout) < 0) {
+	if (proto_connect(out) < 0) {
 		pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
 		    res->hr_remoteaddr);
 		goto close;
@@ -496,7 +502,7 @@ init_remote(struct hast_resource *res)
 		nv_free(nvout);
 		goto close;
 	}
-	if (hast_proto_send(res, res->hr_remoteout, nvout, NULL, 0) < 0) {
+	if (hast_proto_send(res, out, nvout, NULL, 0) < 0) {
 		pjdlog_errno(LOG_WARNING,
 		    "Unable to send handshake header to %s",
 		    res->hr_remoteaddr);
@@ -504,7 +510,7 @@ init_remote(struct hast_resource *res)
 		goto close;
 	}
 	nv_free(nvout);
-	if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) {
+	if (hast_proto_recv_hdr(out, &nvin) < 0) {
 		pjdlog_errno(LOG_WARNING,
 		    "Unable to receive handshake header from %s",
 		    res->hr_remoteaddr);
@@ -536,12 +542,12 @@ init_remote(struct hast_resource *res)
 	 * Second handshake step.
 	 * Setup incoming connection with remote node.
 	 */
-	if (proto_client(res->hr_remoteaddr, &res->hr_remotein) < 0) {
+	if (proto_client(res->hr_remoteaddr, &in) < 0) {
 		pjdlog_errno(LOG_WARNING, "Unable to create connection to %s",
 		    res->hr_remoteaddr);
 	}
 	/* Try to connect, but accept failure. */
-	if (proto_connect(res->hr_remotein) < 0) {
+	if (proto_connect(in) < 0) {
 		pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
 		    res->hr_remoteaddr);
 		goto close;
@@ -560,7 +566,7 @@ init_remote(struct hast_resource *res)
 		nv_free(nvout);
 		goto close;
 	}
-	if (hast_proto_send(res, res->hr_remotein, nvout, NULL, 0) < 0) {
+	if (hast_proto_send(res, in, nvout, NULL, 0) < 0) {
 		pjdlog_errno(LOG_WARNING,
 		    "Unable to send handshake header to %s",
 		    res->hr_remoteaddr);
@@ -568,7 +574,7 @@ init_remote(struct hast_resource *res)
 		goto close;
 	}
 	nv_free(nvout);
-	if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) {
+	if (hast_proto_recv_hdr(out, &nvin) < 0) {
 		pjdlog_errno(LOG_WARNING,
 		    "Unable to receive handshake header from %s",
 		    res->hr_remoteaddr);
@@ -611,7 +617,7 @@ init_remote(struct hast_resource *res)
 		 * Remote node have some dirty extents on its own, lets
 		 * download its activemap.
 		 */
-		if (hast_proto_recv_data(res, res->hr_remoteout, nvin, map,
+		if (hast_proto_recv_data(res, out, nvin, map,
 		    mapsize) < 0) {
 			pjdlog_errno(LOG_ERR,
 			    "Unable to receive remote activemap");
@@ -631,18 +637,29 @@ init_remote(struct hast_resource *res)
 		(void)hast_activemap_flush(res);
 	}
 	pjdlog_info("Connected to %s.", res->hr_remoteaddr);
+	if (inp != NULL && outp != NULL) {
+		*inp = in;
+		*outp = out;
+	} else {
+		res->hr_remotein = in;
+		res->hr_remoteout = out;
+	}
+	return (true);
+close:
+	proto_close(out);
+	if (in != NULL)
+		proto_close(in);
+	return (false);
+}
+
+static void
+sync_start(void)
+{
+
 	mtx_lock(&sync_lock);
 	sync_inprogress = true;
 	mtx_unlock(&sync_lock);
 	cv_signal(&sync_cond);
-	return;
-close:
-	proto_close(res->hr_remoteout);
-	res->hr_remoteout = NULL;
-	if (res->hr_remotein != NULL) {
-		proto_close(res->hr_remotein);
-		res->hr_remotein = NULL;
-	}
 }
 
 static void
@@ -665,7 +682,7 @@ init_ggate(struct hast_resource *res)
 	ggiocreate.gctl_mediasize = res->hr_datasize;
 	ggiocreate.gctl_sectorsize = res->hr_local_sectorsize;
 	ggiocreate.gctl_flags = 0;
-	ggiocreate.gctl_maxcount = 128;
+	ggiocreate.gctl_maxcount = G_GATE_MAX_QUEUE_SIZE;
 	ggiocreate.gctl_timeout = 0;
 	ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
 	snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s",
@@ -735,7 +752,8 @@ hastd_primary(struct hast_resource *res)
 	setproctitle("%s (primary)", res->hr_name);
 
 	init_local(res);
-	init_remote(res);
+	if (init_remote(res, NULL, NULL))
+		sync_start();
 	init_ggate(res);
 	init_environment(res);
 	error = pthread_create(&td, NULL, ggate_recv_thread, res);
@@ -1695,6 +1713,7 @@ static void *
 guard_thread(void *arg)
 {
 	struct hast_resource *res = arg;
+	struct proto_conn *in, *out;
 	unsigned int ii, ncomps;
 	int timeout;
 
@@ -1738,26 +1757,31 @@ guard_thread(void *arg)
 				 * connected.
 				 */
 				rw_unlock(&hio_remote_lock[ii]);
-				rw_wlock(&hio_remote_lock[ii]);
-				assert(res->hr_remotein == NULL);
-				assert(res->hr_remoteout == NULL);
 				pjdlog_debug(2,
 				    "remote_guard: Reconnecting to %s.",
 				    res->hr_remoteaddr);
-				init_remote(res);
-				if (ISCONNECTED(res, ii)) {
+				in = out = NULL;
+				if (init_remote(res, &in, &out)) {
+					rw_wlock(&hio_remote_lock[ii]);
+					assert(res->hr_remotein == NULL);
+					assert(res->hr_remoteout == NULL);
+					assert(in != NULL && out != NULL);
+					res->hr_remotein = in;
+					res->hr_remoteout = out;
+					rw_unlock(&hio_remote_lock[ii]);
 					pjdlog_info("Successfully reconnected to %s.",
 					    res->hr_remoteaddr);
+					sync_start();
 				} else {
 					/* Both connections should be NULL. */
 					assert(res->hr_remotein == NULL);
 					assert(res->hr_remoteout == NULL);
+					assert(in == NULL && out == NULL);
 					pjdlog_debug(2,
 					    "remote_guard: Reconnect to %s failed.",
 					    res->hr_remoteaddr);
 					timeout = RECONNECT_SLEEP;
 				}
-				rw_unlock(&hio_remote_lock[ii]);
 			}
 		}
 		(void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);

Modified: stable/8/sbin/hastd/proto_socketpair.c
==============================================================================
--- stable/8/sbin/hastd/proto_socketpair.c	Sun Apr 18 21:14:49 2010	(r206810)
+++ stable/8/sbin/hastd/proto_socketpair.c	Sun Apr 18 21:18:32 2010	(r206811)
@@ -91,9 +91,12 @@ sp_connect(void *ctx __unused)
 }
 
 static int
-sp_server(const char *addr __unused, void **ctxp __unused)
+sp_server(const char *addr, void **ctxp __unused)
 {
 
+	if (strcmp(addr, "socketpair://") != 0)
+		return (-1);
+
 	assert(!"proto_server() not supported on socketpairs");
 	abort();
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201004182118.o3ILIWMM096292>