Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 10 Sep 2017 17:46:03 +0000 (UTC)
From:      Alan Cox <alc@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r323391 - in head/sys: kern sys vm
Message-ID:  <201709101746.v8AHk3qv030733@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: alc
Date: Sun Sep 10 17:46:03 2017
New Revision: 323391
URL: https://svnweb.freebsd.org/changeset/base/323391

Log:
  To analyze the allocation of swap blocks by blist functions, add a method
  for analyzing the radix tree structures and reporting on the number, and
  sizes, of maximal intervals of free blocks.  The report includes the number
  of maximal intervals, and also the number of them in each of several size
  ranges, from small (size 1, or 3 to 4) to large (28657 to 46367) with size
  boundaries defined by Fibonacci numbers.  The report is written in the test
  tool with the 's' command, or in a running kernel by sysctl.
  
  The analysis of the radix tree frequently computes the position of the lone
  bit set in a u_daddr_t, a computation that also appears in leaf allocation.
  That computation has been moved into a function of its own, and optimized
  for cases where an inlined machine instruction can replace the usual binary
  search.
  
  Submitted by:	Doug Moore <dougm@rice.edu>
  MFC after:	1 week
  Differential Revision:	https://reviews.freebsd.org/D11906

Modified:
  head/sys/kern/subr_blist.c
  head/sys/sys/blist.h
  head/sys/vm/swap_pager.c

Modified: head/sys/kern/subr_blist.c
==============================================================================
--- head/sys/kern/subr_blist.c	Sun Sep 10 15:01:29 2017	(r323390)
+++ head/sys/kern/subr_blist.c	Sun Sep 10 17:46:03 2017	(r323391)
@@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/blist.h>
 #include <sys/malloc.h>
+#include <sys/sbuf.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 
@@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
+#include <sys/sbuf.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -144,6 +146,7 @@ static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
 #endif
 
 CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
+#define	BLIST_META_MASK	(BLIST_META_RADIX - 1)
 
 /*
  * For a subtree that can represent the state of up to 'radix' blocks, the
@@ -166,10 +169,38 @@ radix_to_skip(daddr_t radix)
 {
 
 	return (radix /
-	    ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1)));
+	    ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
 }
 
 /*
+ * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
+ * Assumes that the argument has only one bit set.
+ */
+static inline int
+bitpos(u_daddr_t mask)
+{
+	int hi, lo, mid;
+
+	switch (sizeof(mask)) {
+#ifdef HAVE_INLINE_FFSLL
+	case sizeof(long long):
+		return (ffsll(mask) - 1);
+#endif
+	default:
+		lo = 0;
+		hi = BLIST_BMAP_RADIX;
+		while (lo + 1 < hi) {
+			mid = (lo + hi) >> 1;
+			if ((mask >> mid) != 0)
+				lo = mid;
+			else
+				hi = mid;
+		}
+		return (lo);
+	}
+}
+
+/*
  * blist_create() - create a blist capable of handling up to the specified
  *		    number of blocks
  *
@@ -340,6 +371,192 @@ blist_print(blist_t bl)
 
 #endif
 
+static const u_daddr_t fib[] = {
+	1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
+	4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
+	514229, 832040, 1346269, 2178309, 3524578,
+};
+
+/*
+ * Use 'gap' to describe a maximal range of unallocated blocks/bits.
+ */
+struct gap_stats {
+	daddr_t	start;		/* current gap start, or SWAPBLK_NONE */
+	daddr_t	num;		/* number of gaps observed */
+	daddr_t	max;		/* largest gap size */
+	daddr_t	avg;		/* average gap size */
+	daddr_t	err;		/* sum - num * avg */
+	daddr_t	histo[nitems(fib)]; /* # gaps in each size range */
+	int	max_bucket;	/* last histo elt with nonzero val */
+};
+
+/*
+ * gap_stats_counting()    - is the state 'counting 1 bits'?
+ *                           or 'skipping 0 bits'?
+ */
+static inline bool
+gap_stats_counting(const struct gap_stats *stats)
+{
+
+	return (stats->start != SWAPBLK_NONE);
+}
+
+/*
+ * init_gap_stats()    - initialize stats on gap sizes
+ */
+static inline void
+init_gap_stats(struct gap_stats *stats)
+{
+
+	bzero(stats, sizeof(*stats));
+	stats->start = SWAPBLK_NONE;
+}
+
+/*
+ * update_gap_stats()    - update stats on gap sizes
+ */
+static void
+update_gap_stats(struct gap_stats *stats, daddr_t posn)
+{
+	daddr_t size;
+	int hi, lo, mid;
+
+	if (!gap_stats_counting(stats)) {
+		stats->start = posn;
+		return;
+	}
+	size = posn - stats->start;
+	stats->start = SWAPBLK_NONE;
+	if (size > stats->max)
+		stats->max = size;
+
+	/*
+	 * Find the fibonacci range that contains size,
+	 * expecting to find it in an early range.
+	 */
+	lo = 0;
+	hi = 1;
+	while (hi < nitems(fib) && fib[hi] <= size) {
+		lo = hi;
+		hi *= 2;
+	}
+	if (hi >= nitems(fib))
+		hi = nitems(fib);
+	while (lo + 1 != hi) {
+		mid = (lo + hi) >> 1;
+		if (fib[mid] <= size)
+			lo = mid;
+		else
+			hi = mid;
+	}
+	stats->histo[lo]++;
+	if (lo > stats->max_bucket)
+		stats->max_bucket = lo;
+	stats->err += size - stats->avg;
+	stats->num++;
+	stats->avg += stats->err / stats->num;
+	stats->err %= stats->num;
+}
+
+/*
+ * dump_gap_stats()    - print stats on gap sizes
+ */
+static inline void
+dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
+{
+	int i;
+
+	sbuf_printf(s, "number of maximal free ranges: %jd\n",
+	    (intmax_t)stats->num);
+	sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
+	sbuf_printf(s, "average maximal free range size: %jd\n",
+	    (intmax_t)stats->avg);
+	sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
+	sbuf_printf(s, "               count  |  size range\n");
+	sbuf_printf(s, "               -----  |  ----------\n");
+	for (i = 0; i < stats->max_bucket; i++) {
+		if (stats->histo[i] != 0) {
+			sbuf_printf(s, "%20jd  |  ",
+			    (intmax_t)stats->histo[i]);
+			if (fib[i] != fib[i + 1] - 1)
+				sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+				    (intmax_t)fib[i + 1] - 1);
+			else
+				sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
+		}
+	}
+	sbuf_printf(s, "%20jd  |  ", (intmax_t)stats->histo[i]);
+	if (stats->histo[i] > 1)
+		sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+		    (intmax_t)stats->max);
+	else
+		sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
+}
+
+/*
+ * blist_stats()    - dump radix tree stats
+ */
+void
+blist_stats(blist_t bl, struct sbuf *s)
+{
+	struct gap_stats gstats;
+	struct gap_stats *stats = &gstats;
+	daddr_t i, nodes, radix;
+	u_daddr_t bit, diff, mask;
+
+	init_gap_stats(stats);
+	nodes = 0;
+	i = bl->bl_radix;
+	while (i < bl->bl_radix + bl->bl_blocks) {
+		/*
+		 * Find max size subtree starting at i.
+		 */
+		radix = BLIST_BMAP_RADIX;
+		while (((i / radix) & BLIST_META_MASK) == 0)
+			radix *= BLIST_META_RADIX;
+
+		/*
+		 * Check for skippable subtrees starting at i.
+		 */
+		while (radix > BLIST_BMAP_RADIX) {
+			if (bl->bl_root[nodes].u.bmu_avail == 0) {
+				if (gap_stats_counting(stats))
+					update_gap_stats(stats, i);
+				break;
+			}
+			if (bl->bl_root[nodes].u.bmu_avail == radix) {
+				if (!gap_stats_counting(stats))
+					update_gap_stats(stats, i);
+				break;
+			}
+
+			/*
+			 * Skip subtree root.
+			 */
+			nodes++;
+			radix /= BLIST_META_RADIX;
+		}
+		if (radix == BLIST_BMAP_RADIX) {
+			/*
+			 * Scan leaf.
+			 */
+			mask = bl->bl_root[nodes].u.bmu_bitmap;
+			diff = mask ^ (mask << 1);
+			if (gap_stats_counting(stats))
+				diff ^= 1;
+			while (diff != 0) {
+				bit = diff & -diff;
+				update_gap_stats(stats, i + bitpos(bit));
+				diff ^= bit;
+			}
+		}
+		nodes += radix_to_skip(radix);
+		i += radix;
+	}
+	update_gap_stats(stats, i);
+	dump_gap_stats(stats, s);
+}
+
 /************************************************************************
  *			  ALLOCATION SUPPORT FUNCTIONS			*
  ************************************************************************
@@ -355,13 +572,13 @@ blist_print(blist_t bl)
  *
  *	This is the core of the allocator and is optimized for the
  *	BLIST_BMAP_RADIX block allocation case.  Otherwise, execution
- *	time is proportional to log2(count) + log2(BLIST_BMAP_RADIX).
+ *	time is proportional to log2(count) + bitpos time.
  */
 static daddr_t
 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
 {
 	u_daddr_t mask;
-	int count1, hi, lo, mid, num_shifts, range1, range_ext;
+	int count1, lo, num_shifts, range1, range_ext;
 
 	if (count == BLIST_BMAP_RADIX) {
 		/*
@@ -419,17 +636,10 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count
 	/*
 	 * The least significant set bit in mask marks the start of the first
 	 * available range of sufficient size.  Clear all the bits but that one,
-	 * and then perform a binary search to find its position.
+	 * and then find its position.
 	 */
 	mask &= -mask;
-	hi = BLIST_BMAP_RADIX - count1;
-	while (lo + 1 < hi) {
-		mid = (lo + hi) >> 1;
-		if ((mask >> mid) != 0)
-			lo = mid;
-		else
-			hi = mid;
-	}
+	lo = bitpos(mask);
 
 	/*
 	 * Set in mask exactly the bits being allocated, and clear them from
@@ -980,6 +1190,7 @@ main(int ac, char **av)
 	int size = 1024;
 	int i;
 	blist_t bl;
+	struct sbuf *s;
 
 	for (i = 1; i < ac; ++i) {
 		const char *ptr = av[i];
@@ -1014,6 +1225,13 @@ main(int ac, char **av)
 		case 'p':
 			blist_print(bl);
 			break;
+		case 's':
+			s = sbuf_new_auto();
+			blist_stats(bl, s);
+			sbuf_finish(s);
+			printf("%s", sbuf_data(s));
+			sbuf_delete(s);
+			break;
 		case 'a':
 			if (sscanf(buf + 1, "%lld", &count) == 1) {
 				daddr_t blk = blist_alloc(bl, count);
@@ -1041,6 +1259,7 @@ main(int ac, char **av)
 		case 'h':
 			puts(
 			    "p          -print\n"
+			    "s          -stats\n"
 			    "a %d       -allocate\n"
 			    "f %x %d    -free\n"
 			    "l %x %d    -fill\n"

Modified: head/sys/sys/blist.h
==============================================================================
--- head/sys/sys/blist.h	Sun Sep 10 15:01:29 2017	(r323390)
+++ head/sys/sys/blist.h	Sun Sep 10 17:46:03 2017	(r323391)
@@ -90,6 +90,8 @@ typedef struct blist {
 
 #define BLIST_MAX_ALLOC		BLIST_BMAP_RADIX
 
+struct sbuf;
+
 daddr_t	blist_alloc(blist_t blist, daddr_t count);
 daddr_t	blist_avail(blist_t blist);
 blist_t	blist_create(daddr_t blocks, int flags);
@@ -98,6 +100,7 @@ daddr_t	blist_fill(blist_t bl, daddr_t blkno, daddr_t 
 void	blist_free(blist_t blist, daddr_t blkno, daddr_t count);
 void	blist_print(blist_t blist);
 void	blist_resize(blist_t *pblist, daddr_t count, int freenew, int flags);
+void	blist_stats(blist_t blist, struct sbuf *s);
 
 #endif	/* _SYS_BLIST_H_ */
 

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c	Sun Sep 10 15:01:29 2017	(r323390)
+++ head/sys/vm/swap_pager.c	Sun Sep 10 17:46:03 2017	(r323391)
@@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
+#include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/blist.h>
@@ -323,6 +324,10 @@ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
     "Maximum running async swap ops");
+static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
+    CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
+    "Swap Fragmentation Info");
 
 static struct sx sw_alloc_sx;
 
@@ -777,6 +782,36 @@ swp_pager_freeswapspace(daddr_t blk, int npages)
 		}
 	}
 	panic("Swapdev not found");
+}
+
+/*
+ * SYSCTL_SWAP_FRAGMENTATION() -	produce raw swap space stats
+ */
+static int
+sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf sbuf;
+	struct swdevt *sp;
+	const char *devname;
+	int error;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+	mtx_lock(&sw_dev_mtx);
+	TAILQ_FOREACH(sp, &swtailq, sw_list) {
+		if (vn_isdisk(sp->sw_vp, NULL))
+			devname = devtoname(sp->sw_vp->v_rdev);
+		else
+			devname = "[file]";
+		sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
+		blist_stats(sp->sw_blist, &sbuf);
+	}
+	mtx_unlock(&sw_dev_mtx);
+	error = sbuf_finish(&sbuf);
+	sbuf_delete(&sbuf);
+	return (error);
 }
 
 /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709101746.v8AHk3qv030733>