Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 24 May 2010 15:26:13 -0700
From:      Sean Bruno <seanbru@yahoo-inc.com>
To:        freebsd-hackers <freebsd-hackers@freebsd.org>
Subject:   Exposing Zone Sleeps
Message-ID:  <1274739973.31299.23.camel@localhost.localdomain>

next in thread | raw e-mail | index | archive | help

--=-j4QQ/FoprjniytAV16+J
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 7bit

Find attached a patch against -CURRENT.

This update exposes a counter that indicates the number of times that we
sleep when attempting to allocate a slab from the keg.  In other words,
the number of times we BLOCK and wait, which is bad.

This allows differentiation between times when we failed to allocate and
it was ok and times where we were forced to sleep.  The current FAIL
counter does not make this distinction.

Exposes this information via uma_zone_t->uz_sleeps.

Add a new sysctl to retrieve this information.
Enhance vmstat -z to retrieve this information.

We've found this *extremely* useful here at Yahoo in the past and would
like to commit this if it is acceptable.

Tested on 32bit and 64bit architectures on 6/7/CURRENT.


--=-j4QQ/FoprjniytAV16+J
Content-Disposition: attachment; filename="sleep_stat.diff"
Content-Type: text/x-patch; name="sleep_stat.diff"; charset="UTF-8"
Content-Transfer-Encoding: 7bit

Index: usr.bin/vmstat/vmstat.c
===================================================================
--- usr.bin/vmstat/vmstat.c	(revision 208460)
+++ usr.bin/vmstat/vmstat.c	(working copy)
@@ -1294,16 +1294,17 @@
 				    memstat_strerror(error));
 		}
 	}
-	printf("%-20s %8s  %8s  %8s  %8s  %8s  %8s\n\n", "ITEM", "SIZE",
-	    "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES");
+	printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE",
+	    "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP");
 	for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
 	    mtp = memstat_mtl_next(mtp)) {
 		strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME);
 		strcat(name, ":");
-		printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name,
+		printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name,
 		    memstat_get_size(mtp), memstat_get_countlimit(mtp),
 		    memstat_get_count(mtp), memstat_get_free(mtp),
-		    memstat_get_numallocs(mtp), memstat_get_failures(mtp));
+		    memstat_get_numallocs(mtp), memstat_get_failures(mtp),
+		    memstat_get_sleeps(mtp));
 	}
 	memstat_mtl_free(mtlp);
 	printf("\n");
Index: lib/libmemstat/memstat.h
===================================================================
--- lib/libmemstat/memstat.h	(revision 208460)
+++ lib/libmemstat/memstat.h	(working copy)
@@ -139,6 +139,7 @@
 uint64_t	 memstat_get_count(const struct memory_type *mtp);
 uint64_t	 memstat_get_free(const struct memory_type *mtp);
 uint64_t	 memstat_get_failures(const struct memory_type *mtp);
+uint64_t	 memstat_get_sleeps(const struct memory_type *mtp);
 void		*memstat_get_caller_pointer(const struct memory_type *mtp,
 		    int index);
 void		 memstat_set_caller_pointer(struct memory_type *mtp,
Index: lib/libmemstat/memstat.c
===================================================================
--- lib/libmemstat/memstat.c	(revision 208460)
+++ lib/libmemstat/memstat.c	(working copy)
@@ -188,6 +188,7 @@
 	mtp->mt_count = 0;
 	mtp->mt_free = 0;
 	mtp->mt_failures = 0;
+	mtp->mt_sleeps = 0;
 
 	mtp->mt_zonefree = 0;
 	mtp->mt_kegfree = 0;
@@ -304,6 +305,13 @@
 	return (mtp->mt_failures);
 }
 
+uint64_t
+memstat_get_sleeps(const struct memory_type *mtp)
+{
+
+	return (mtp->mt_sleeps);
+}
+
 void *
 memstat_get_caller_pointer(const struct memory_type *mtp, int index)
 {
Index: lib/libmemstat/memstat_internal.h
===================================================================
--- lib/libmemstat/memstat_internal.h	(revision 208460)
+++ lib/libmemstat/memstat_internal.h	(working copy)
@@ -65,6 +65,7 @@
 	uint64_t	 mt_count;	/* Number of current allocations. */
 	uint64_t	 mt_free;	/* Number of cached free items. */
 	uint64_t	 mt_failures;	/* Number of allocation failures. */
+	uint64_t	 mt_sleeps;	/* Number of allocation sleeps. */
 
 	/*
 	 * Caller-owned memory.
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c	(revision 208460)
+++ lib/libmemstat/memstat_uma.c	(working copy)
@@ -208,6 +208,7 @@
 		mtp->mt_numallocs = uthp->uth_allocs;
 		mtp->mt_numfrees = uthp->uth_frees;
 		mtp->mt_failures = uthp->uth_fails;
+		mtp->mt_sleeps = uthp->uth_sleeps;
 
 		for (j = 0; j < maxcpus; j++) {
 			upsp = (struct uma_percpu_stat *)p;
@@ -402,6 +403,7 @@
 			mtp->mt_numallocs = uz.uz_allocs;
 			mtp->mt_numfrees = uz.uz_frees;
 			mtp->mt_failures = uz.uz_fails;
+			mtp->mt_sleeps = uz.uz_sleeps;
 			if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
 				goto skip_percpu;
 			for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h	(revision 208460)
+++ sys/vm/uma_int.h	(working copy)
@@ -327,6 +327,7 @@
 	u_int64_t	uz_allocs UMA_ALIGN; /* Total number of allocations */
 	u_int64_t	uz_frees;	/* Total number of frees */
 	u_int64_t	uz_fails;	/* Total number of alloc failures */
+	u_int64_t	uz_sleeps;	/* Total number of alloc sleeps */
 	uint16_t	uz_fills;	/* Outstanding bucket fills */
 	uint16_t	uz_count;	/* Highest value ub_ptr can have */
 
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h	(revision 208460)
+++ sys/vm/uma.h	(working copy)
@@ -600,7 +600,8 @@
 	u_int64_t	uth_allocs;	/* Zone: number of allocations. */
 	u_int64_t	uth_frees;	/* Zone: number of frees. */
 	u_int64_t	uth_fails;	/* Zone: number of alloc failures. */
-	u_int64_t	_uth_reserved1[3];	/* Reserved. */
+	u_int64_t	_uth_reserved1[2];	/* Reserved. */
+	u_int64_t	uth_sleeps;	/* Zone: number of alloc sleeps. */
 };
 
 struct uma_percpu_stat {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c	(revision 208460)
+++ sys/vm/uma_core.c	(working copy)
@@ -249,11 +249,15 @@
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
+static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
+SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
+    NULL, 0, sysctl_vm_zone, "A", "Zone Info");
+
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
@@ -1398,6 +1402,7 @@
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
+	zone->uz_sleeps = 0;
 	zone->uz_fills = zone->uz_count = 0;
 	zone->uz_flags = 0;
 	keg = arg->keg;
@@ -2285,6 +2290,7 @@
 		 */
 		if (full && !empty) {
 			zone->uz_flags |= UMA_ZFLAG_FULL;
+			zone->uz_sleeps++;
 			msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			continue;
@@ -3084,7 +3090,6 @@
 	}
 }
 
-#ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
@@ -3126,9 +3131,87 @@
 	if (freesp != NULL)
 		*freesp = frees;
 }
-#endif /* DDB */
 
+/*
+ * Sysctl handler for vm.zone
+ *
+ * stolen from vm_zone.c
+ */
 static int
+sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
+{
+	int error, len, cnt;
+	const int linesize = 128;	/* conservative */
+	int totalfree;
+	char *tmpbuf, *offset;
+	uma_zone_t z;
+	uma_keg_t zk;
+	char *p;
+	int cachefree;
+	uma_bucket_t bucket;
+	u_int64_t allocs, frees;
+
+	cnt = 0;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+		LIST_FOREACH(z, &zk->uk_zones, uz_link)
+			cnt++;
+	}
+	mtx_unlock(&uma_mtx);
+	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
+			M_TEMP, M_WAITOK);
+	len = snprintf(tmpbuf, linesize,
+	    "\nITEM            SIZE   LIMIT     USED    FREE      REQ   FAIL SLEEP\n\n");
+	if (cnt == 0)
+		tmpbuf[len - 1] = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
+	if (error || cnt == 0)
+		goto out;
+	offset = tmpbuf;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+	  LIST_FOREACH(z, &zk->uk_zones, uz_link) {
+		if (cnt == 0)	/* list may have changed size */
+			break;
+		ZONE_LOCK(z);
+		cachefree = 0;
+		if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
+			uma_zone_sumstat(z, &cachefree, &allocs, &frees);
+		} else {
+			allocs = z->uz_allocs;
+			frees = z->uz_frees;
+		}
+
+		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
+			cachefree += bucket->ub_cnt;
+		}
+		totalfree = zk->uk_free + cachefree;
+		len = snprintf(offset, linesize,
+		    "%-12.12s  %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n",
+		    /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size,
+		    /*LIMIT*/zk->uk_maxpages * zk->uk_ipers,
+		    /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
+		    /*FREE*/totalfree,
+		    /*REQ*/(unsigned long long)allocs,
+		    /*FAIL*/z->uz_fails,
+		    /*SLEEP*/z->uz_sleeps);
+		ZONE_UNLOCK(z);
+		for (p = offset + 12; p > offset && *p == ' '; --p)
+			/* nothing */ ;
+		p[1] = ':';
+		cnt--;
+		offset += len;
+	  }
+	}
+	mtx_unlock(&uma_mtx);
+	*offset++ = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
+out:
+	FREE(tmpbuf, M_TEMP);
+	return (error);
+}
+
+static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t kz;
@@ -3232,6 +3315,7 @@
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
+			uth.uth_sleeps = z->uz_sleeps;
 			if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
 				ZONE_UNLOCK(z);
 				mtx_unlock(&uma_mtx);

--=-j4QQ/FoprjniytAV16+J--




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1274739973.31299.23.camel>