Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 May 2010 10:01:28 -0400
From:      jhell <jhell@dataix.net>
To:        sbruno@freebsd.org
Cc:        freebsd-hackers <freebsd-hackers@freebsd.org>, Sean Bruno <seanbru@yahoo-inc.com>
Subject:   Re: Exposing Zone Sleeps
Message-ID:  <4BFBD838.40208@dataix.net>
In-Reply-To: <1274739973.31299.23.camel@localhost.localdomain>
References:  <1274739973.31299.23.camel@localhost.localdomain>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------070908020201070608080903
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 05/24/2010 18:26, Sean Bruno wrote:
> Find attached a patch against -CURRENT.
> 
> This update exposes a counter that indicates the number of times that we
> sleep when attempting to allocate a slab from the keg.  In other words,
> the number of times we BLOCK and wait, which is bad.
> 
> This allows differentiation between times when we failed to allocate and
> it was ok and times where we were forced to sleep.  The current FAIL
> counter does not make this distinction.
> 
> Exposes this information via uma_zone_t->uz_sleeps.
> 
> Add a new sysctl to retrieve this information.
> Enhance vmstat -z to retrieve this information.
> 
> We've found this *extremely* useful here at Yahoo in the past and would
> like to commit this if it is acceptable.
> 
> Tested on 32bit and 64bit architectures on 6/7/CURRENT.
> 

Hi Sean,

Nice work on this. I applied this to stable/8 r208530 and I am in the
process of compiling the kernel right now. Everything else has built &
runs as expected "i386". Attached is the adjusted patch which was one
modification to the line number for uz_sleeps in sys/vm/uma_int.h.

8 files changed, 106 insertions(+), 7 deletions(-)

For those wishing to apply this patch and test for them self:

cd /usr/src
patch </path/to/sleep_stat_stable8_r208530.diff
cd /usr/src/include
make obj && make depend && make includes && make install
cd /usr/src/lib/libmemstat
make obj && make depend && make includes && make install
cd /usr/src/usr.bin/vmstat
make obj && make depend && make install
cd /usr/src
make kernel KERNCONF=YOUR_KERN_CONF
reboot

Can't wait to see some results from this & I will report back with
either negative results of the build & run or positive results from the
stats collected.

If there is anything needed feel free to let me know and I will do what
is possible ASAP.

Thanks again,

- -- 

 jhell
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)

iQEcBAEBAgAGBQJL+9g3AAoJEJBXh4mJ2FR+1UoIAJnJ0gvQBOVjvccj2DueHedg
gk9SCeebEys2xjrqlTP3HrsaWl+zJcZoR6qJbJnSw3sIAkXbSAptaVH7xOx7o3vj
cngqEVGcq99w8NILFjCvpMJBIs7iBY0ZqRFgloIdoNdB1DNugwKNZVtvd17WUlWJ
MstE/kSGVmYVqIVARXx6ucEMrxI1wWgNOPDmI3dZWxDD/gZi5m3hvhyQt2Ub6oQu
kAagDeVIluk4fMHk5KkwQjJajciaaXLTd50FakhWcpMOH1sFd2Ks4eJRh3RI70Eo
UpgIrOJZWMFH7G9mtoYRk6hxd6Qgw+8lqoJP+P/i322wPJ4vPHciqaIOxWiERCA=
=acrJ
-----END PGP SIGNATURE-----

--------------070908020201070608080903
Content-Type: text/x-patch;
 name="sleep_stat_stable8_r208530.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="sleep_stat_stable8_r208530.diff"

M       usr.bin/vmstat/vmstat.c
M       lib/libmemstat/memstat.h
M       lib/libmemstat/memstat.c
M       lib/libmemstat/memstat_internal.h
M       lib/libmemstat/memstat_uma.c
M       sys/vm/uma_int.h
M       sys/vm/uma.h
M       sys/vm/uma_core.c
Index: usr.bin/vmstat/vmstat.c
===================================================================
--- usr.bin/vmstat/vmstat.c	(revision 208530)
+++ usr.bin/vmstat/vmstat.c	(working copy)
@@ -1286,16 +1286,17 @@
 				    memstat_strerror(error));
 		}
 	}
-	printf("%-20s %8s  %8s  %8s  %8s  %8s  %8s\n\n", "ITEM", "SIZE",
-	    "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES");
+	printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE",
+	    "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP");
 	for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
 	    mtp = memstat_mtl_next(mtp)) {
 		strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME);
 		strcat(name, ":");
-		printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name,
+		printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name,
 		    memstat_get_size(mtp), memstat_get_countlimit(mtp),
 		    memstat_get_count(mtp), memstat_get_free(mtp),
-		    memstat_get_numallocs(mtp), memstat_get_failures(mtp));
+		    memstat_get_numallocs(mtp), memstat_get_failures(mtp),
+		    memstat_get_sleeps(mtp));
 	}
 	memstat_mtl_free(mtlp);
 	printf("\n");
Index: lib/libmemstat/memstat.h
===================================================================
--- lib/libmemstat/memstat.h	(revision 208530)
+++ lib/libmemstat/memstat.h	(working copy)
@@ -139,6 +139,7 @@
 uint64_t	 memstat_get_count(const struct memory_type *mtp);
 uint64_t	 memstat_get_free(const struct memory_type *mtp);
 uint64_t	 memstat_get_failures(const struct memory_type *mtp);
+uint64_t	 memstat_get_sleeps(const struct memory_type *mtp);
 void		*memstat_get_caller_pointer(const struct memory_type *mtp,
 		    int index);
 void		 memstat_set_caller_pointer(struct memory_type *mtp,
Index: lib/libmemstat/memstat.c
===================================================================
--- lib/libmemstat/memstat.c	(revision 208530)
+++ lib/libmemstat/memstat.c	(working copy)
@@ -188,6 +188,7 @@
 	mtp->mt_count = 0;
 	mtp->mt_free = 0;
 	mtp->mt_failures = 0;
+	mtp->mt_sleeps = 0;
 
 	mtp->mt_zonefree = 0;
 	mtp->mt_kegfree = 0;
@@ -304,6 +305,13 @@
 	return (mtp->mt_failures);
 }
 
+uint64_t
+memstat_get_sleeps(const struct memory_type *mtp)
+{
+
+	return (mtp->mt_sleeps);
+}
+
 void *
 memstat_get_caller_pointer(const struct memory_type *mtp, int index)
 {
Index: lib/libmemstat/memstat_internal.h
===================================================================
--- lib/libmemstat/memstat_internal.h	(revision 208530)
+++ lib/libmemstat/memstat_internal.h	(working copy)
@@ -65,6 +65,7 @@
 	uint64_t	 mt_count;	/* Number of current allocations. */
 	uint64_t	 mt_free;	/* Number of cached free items. */
 	uint64_t	 mt_failures;	/* Number of allocation failures. */
+	uint64_t	 mt_sleeps;	/* Number of allocation sleeps. */
 
 	/*
 	 * Caller-owned memory.
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c	(revision 208530)
+++ lib/libmemstat/memstat_uma.c	(working copy)
@@ -208,6 +208,7 @@
 		mtp->mt_numallocs = uthp->uth_allocs;
 		mtp->mt_numfrees = uthp->uth_frees;
 		mtp->mt_failures = uthp->uth_fails;
+		mtp->mt_sleeps = uthp->uth_sleeps;
 
 		for (j = 0; j < maxcpus; j++) {
 			upsp = (struct uma_percpu_stat *)p;
@@ -402,6 +403,7 @@
 			mtp->mt_numallocs = uz.uz_allocs;
 			mtp->mt_numfrees = uz.uz_frees;
 			mtp->mt_failures = uz.uz_fails;
+			mtp->mt_sleeps = uz.uz_sleeps;
 			if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
 				goto skip_percpu;
 			for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h	(revision 208530)
+++ sys/vm/uma_int.h	(working copy)
@@ -315,6 +315,7 @@
 	u_int64_t	uz_allocs;	/* Total number of allocations */
 	u_int64_t	uz_frees;	/* Total number of frees */
 	u_int64_t	uz_fails;	/* Total number of alloc failures */
+	u_int64_t	uz_sleeps;	/* Total number of alloc sleeps */
 	u_int32_t	uz_flags;	/* Flags inherited from kegs */
 	u_int32_t	uz_size;	/* Size inherited from kegs */
 	uint16_t	uz_fills;	/* Outstanding bucket fills */
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h	(revision 208530)
+++ sys/vm/uma.h	(working copy)
@@ -600,7 +600,8 @@
 	u_int64_t	uth_allocs;	/* Zone: number of allocations. */
 	u_int64_t	uth_frees;	/* Zone: number of frees. */
 	u_int64_t	uth_fails;	/* Zone: number of alloc failures. */
-	u_int64_t	_uth_reserved1[3];	/* Reserved. */
+	u_int64_t	_uth_reserved1[2];	/* Reserved. */
+	u_int64_t	uth_sleeps;	/* Zone: number of alloc sleeps. */
 };
 
 struct uma_percpu_stat {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c	(revision 208530)
+++ sys/vm/uma_core.c	(working copy)
@@ -249,11 +249,15 @@
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
+static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
+SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
+    NULL, 0, sysctl_vm_zone, "A", "Zone Info");
+
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
@@ -1400,6 +1404,7 @@
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
+	zone->uz_sleeps = 0;
 	zone->uz_fills = zone->uz_count = 0;
 	zone->uz_flags = 0;
 	keg = arg->keg;
@@ -2287,6 +2292,7 @@
 		 */
 		if (full && !empty) {
 			zone->uz_flags |= UMA_ZFLAG_FULL;
+			zone->uz_sleeps++;
 			msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			continue;
@@ -3088,7 +3094,6 @@
 	}
 }
 
-#ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
@@ -3130,7 +3135,85 @@
 	if (freesp != NULL)
 		*freesp = frees;
 }
-#endif /* DDB */
+
+/*
+ * Sysctl handler for vm.zone
+ *
+ * stolen from vm_zone.c
+ */
+static int
+sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
+{
+	int error, len, cnt;
+	const int linesize = 128;	/* conservative */
+	int totalfree;
+	char *tmpbuf, *offset;
+	uma_zone_t z;
+	uma_keg_t zk;
+	char *p;
+	int cachefree;
+	uma_bucket_t bucket;
+	u_int64_t allocs, frees;
+
+	cnt = 0;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+		LIST_FOREACH(z, &zk->uk_zones, uz_link)
+			cnt++;
+	}
+	mtx_unlock(&uma_mtx);
+	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
+			M_TEMP, M_WAITOK);
+	len = snprintf(tmpbuf, linesize,
+	    "\nITEM            SIZE   LIMIT     USED    FREE      REQ   FAIL SLEEP\n\n");
+	if (cnt == 0)
+		tmpbuf[len - 1] = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
+	if (error || cnt == 0)
+		goto out;
+	offset = tmpbuf;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+	  LIST_FOREACH(z, &zk->uk_zones, uz_link) {
+		if (cnt == 0)	/* list may have changed size */
+			break;
+		ZONE_LOCK(z);
+		cachefree = 0;
+		if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
+			uma_zone_sumstat(z, &cachefree, &allocs, &frees);
+		} else {
+			allocs = z->uz_allocs;
+			frees = z->uz_frees;
+		}
+
+		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
+			cachefree += bucket->ub_cnt;
+		}
+		totalfree = zk->uk_free + cachefree;
+		len = snprintf(offset, linesize,
+		    "%-12.12s  %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n",
+		    /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size,
+		    /*LIMIT*/zk->uk_maxpages * zk->uk_ipers,
+		    /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
+		    /*FREE*/totalfree,
+		    /*REQ*/(unsigned long long)allocs,
+		    /*FAIL*/z->uz_fails,
+		    /*SLEEP*/z->uz_sleeps);
+		ZONE_UNLOCK(z);
+		for (p = offset + 12; p > offset && *p == ' '; --p)
+			/* nothing */ ;
+		p[1] = ':';
+		cnt--;
+		offset += len;
+	  }
+	}
+	mtx_unlock(&uma_mtx);
+	*offset++ = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
+out:
+	FREE(tmpbuf, M_TEMP);
+	return (error);
+}
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
@@ -3236,6 +3319,7 @@
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
+			uth.uth_sleeps = z->uz_sleeps;
 			if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
 				ZONE_UNLOCK(z);
 				mtx_unlock(&uma_mtx);

--------------070908020201070608080903
Content-Type: application/octet-stream;
	name="sleep_stat_stable8_r208530.diff.sig"
Content-Transfer-Encoding: base64
Content-Disposition: attachment; filename="sleep_stat_stable8_r208530.diff.sig"

iQEcBAABAgAGBQJL+9g3AAoJEJBXh4mJ2FR+2F8H/R5OVSvxtO1aaGF4aZ5775Jb8SA1VaII
h814V8oTMbvzhHx2rr5z5aPWFPes6OHL8WwZuY9BY4kwQ+KTyijJQRGpgm7keRlMBoJNcsAF
QIWHkbKFxgRTpBcEwXcfagnltEPXsdkdIB0pktIQTZkKBbxfXPLIQuv91b8ij+rssv63VyFb
yherZQmC6bEnkwtZ8+6q1x6S+RgzqSr/wXCyQVWhejCAhX320nCgtiScMfon+fDF4+tzawZq
fw5Fi+NUx8yyFRAMTWvwu0PSPUyyK60b6F6DhKG1hcYOrTk+qFvkSNXeBrQcVBXIlgJKUh8m
9jjBz3CDTOKDdrZSmapDs54=
--------------070908020201070608080903--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4BFBD838.40208>