Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 7 Oct 2017 16:55:44 +0000 (UTC)
From:      Alan Cox <alc@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r324384 - stable/11/sys/kern
Message-ID:  <201710071655.v97GtiTe077785@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: alc
Date: Sat Oct  7 16:55:44 2017
New Revision: 324384
URL: https://svnweb.freebsd.org/changeset/base/324384

Log:
  MFC r323656
    Modify blst_leaf_alloc to take only the cursor argument.
  
    Modify blst_leaf_alloc to find allocations that cross the boundary between
    one leaf node and the next when those two leaves descend from the same
    meta node.
  
    Update the hint field for leaves so that it represents a bound on how
    large an allocation can begin in that leaf, where it currently represents
    a bound on how large an allocation can be found within the boundaries of
    the leaf.
  
    The first phase of blst_leaf_alloc currently shrinks sequences of
    consecutive 1-bits in mask until each has been shrunken by count-1 bits,
    so that any bits remaining show where an allocation can begin, or until
    all the bits have disappeared, in which case the allocation fails. This
    change amends that so that the high-order bit is copied, as if, when the
    last block was free, it was followed by an endless stream of free
    blocks. It also amends the early stopping condition, so that the shrinking
    of 1-sequences stops early when there are none, or there is only one
    unbounded one remaining.
  
    The search for the first set bit is unchanged, and the code path
    thereafter is mostly unchanged unless the first set bit is in a position
    that makes some of those copied sign bits matter. In that case, we look
    for a next leaf, and at what blocks it can provide, to see if a
    cross-boundary allocation is possible.
  
    The hint is updated on a successful allocation that clears the last bit,
    but it not updated on a failed allocation that leaves the last bit
    set. So, as long as the last block is free, the hint value for the leaf is
    large. As long as the last block is free, and there's a next leaf, a large
    allocation can begin here, perhaps. A stricter rule than this would mean
    that allocations and frees in one leaf could require hint updates to the
    preceding leaf, and this change seeks to leave the freeing code
    unmodified.
  
    Define BLIST_BMAP_MASK, and use it for bit masking in blst_leaf_free and
    blist_leaf_fill, as well as in blst_leaf_alloc.
  
    Correct a panic message in blst_leaf_free.

Modified:
  stable/11/sys/kern/subr_blist.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/subr_blist.c
==============================================================================
--- stable/11/sys/kern/subr_blist.c	Sat Oct  7 16:48:42 2017	(r324383)
+++ stable/11/sys/kern/subr_blist.c	Sat Oct  7 16:55:44 2017	(r324384)
@@ -32,14 +32,17 @@
  *	try to interpret the meaning of a 'block' other than to return
  *	SWAPBLK_NONE on an allocation failure.
  *
- *	A radix tree is used to maintain the bitmap.  Two radix constants are
- *	involved:  One for the bitmaps contained in the leaf nodes (typically
- *	64), and one for the meta nodes (typically 16).  Both meta and leaf
- *	nodes have a hint field.  This field gives us a hint as to the largest
- *	free contiguous range of blocks under the node.  It may contain a
- *	value that is too high, but will never contain a value that is too
- *	low.  When the radix tree is searched, allocation failures in subtrees
- *	update the hint.
+ *	A radix tree controls access to pieces of the bitmap, and includes
+ *	auxiliary information at each interior node about the availabilty of
+ *	contiguous free blocks in the subtree rooted at that node.  Two radix
+ *	constants are involved: one for the size of the bitmaps contained in the
+ *	leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
+ *	each of the meta (interior) nodes (BLIST_META_RADIX).  Each subtree is
+ *	associated with a range of blocks.  The root of any subtree stores a
+ *	hint field that defines an upper bound on the size of the largest
+ *	allocation that can begin in the associated block range.  A hint is an
+ *	upper bound on a potential allocation, but not necessarily a tight upper
+ *	bound.
  *
  *	The radix tree also implements two collapsed states for meta nodes:
  *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
@@ -112,7 +115,7 @@ __FBSDID("$FreeBSD$");
 #define	bitcount64(x)	__bitcount64((uint64_t)(x))
 #define malloc(a,b,c)	calloc(a, 1)
 #define free(a,b)	free(a)
-#define CTASSERT(expr)
+static __inline int imax(int a, int b) { return (a > b ? a : b); }
 
 #include <sys/blist.h>
 
@@ -123,8 +126,7 @@ void panic(const char *ctl, ...);
 /*
  * static support functions
  */
-static daddr_t	blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count,
-		    daddr_t cursor);
+static daddr_t	blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
 static daddr_t	blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
 		    u_daddr_t radix);
 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
@@ -145,7 +147,9 @@ static void	blst_radix_print(blmeta_t *scan, daddr_t b
 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
 #endif
 
-CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
+_Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
+    "radix divisibility error");
+#define	BLIST_BMAP_MASK	(BLIST_BMAP_RADIX - 1)
 #define	BLIST_META_MASK	(BLIST_META_RADIX - 1)
 
 /*
@@ -575,33 +579,16 @@ blist_stats(blist_t bl, struct sbuf *s)
  *	time is proportional to log2(count) + bitpos time.
  */
 static daddr_t
-blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
+blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
 {
 	u_daddr_t mask;
-	int count1, lo, num_shifts, range1, range_ext;
+	int count1, hi, lo, num_shifts, range1, range_ext;
 
-	if (count == BLIST_BMAP_RADIX) {
-		/*
-		 * Optimize allocation of BLIST_BMAP_RADIX bits.  If this wasn't
-		 * a special case, then forming the final value of 'mask' below
-		 * would require special handling to avoid an invalid left shift
-		 * when count equals the number of bits in mask.
-		 */
-		if (~scan->u.bmu_bitmap != 0) {
-			scan->bm_bighint = BLIST_BMAP_RADIX - 1;
-			return (SWAPBLK_NONE);
-		}
-		if (cursor != blk)
-			return (SWAPBLK_NONE);
-		scan->u.bmu_bitmap = 0;
-		scan->bm_bighint = 0;
-		return (blk);
-	}
 	range1 = 0;
 	count1 = count - 1;
 	num_shifts = fls(count1);
 	mask = scan->u.bmu_bitmap;
-	while (mask != 0 && num_shifts > 0) {
+	while ((-mask & ~mask) != 0 && num_shifts > 0) {
 		/*
 		 * If bit i is set in mask, then bits in [i, i+range1] are set
 		 * in scan->u.bmu_bitmap.  The value of range1 is equal to
@@ -609,27 +596,32 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count
 		 * while preserving these invariants.  The updates to mask leave
 		 * fewer bits set, but each bit that remains set represents a
 		 * longer string of consecutive bits set in scan->u.bmu_bitmap.
+		 * If more updates to mask cannot clear more bits, because mask
+		 * is partitioned with all 0 bits preceding all 1 bits, the loop
+		 * terminates immediately.
 		 */
 		num_shifts--;
 		range_ext = range1 + ((count1 >> num_shifts) & 1);
-		mask &= mask >> range_ext;
+		/*
+		 * mask is a signed quantity for the shift because when it is
+		 * shifted right, the sign bit should copied; when the last
+		 * block of the leaf is free, pretend, for a while, that all the
+		 * blocks that follow it are also free.
+		 */
+		mask &= (daddr_t)mask >> range_ext;
 		range1 += range_ext;
 	}
 	if (mask == 0) {
 		/*
 		 * Update bighint.  There is no allocation bigger than range1
-		 * available in this leaf.
+		 * starting in this leaf.
 		 */
 		scan->bm_bighint = range1;
 		return (SWAPBLK_NONE);
 	}
 
-	/*
-	 * Discard any candidates that appear before the cursor.
-	 */
-	lo = cursor - blk;
-	mask &= ~(u_daddr_t)0 << lo;
-
+	/* Discard any candidates that appear before blk. */
+	mask &= (u_daddr_t)-1 << (blk & BLIST_BMAP_MASK);
 	if (mask == 0)
 		return (SWAPBLK_NONE);
 
@@ -641,13 +633,58 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count
 	mask &= -mask;
 	lo = bitpos(mask);
 
-	/*
-	 * Set in mask exactly the bits being allocated, and clear them from
-	 * the set of available bits.
-	 */
-	mask = (mask << count) - mask;
+	hi = lo + count;
+	if (hi > BLIST_BMAP_RADIX) {
+		/*
+		 * An allocation within this leaf is impossible, so a successful
+		 * allocation depends on the next leaf providing some of the blocks.
+		 */
+		if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) {
+			/*
+			 * The next leaf has a different meta-node parent, so it
+			 * is not necessarily initialized.  Update bighint,
+			 * comparing the range found at the end of mask to the
+			 * largest earlier range that could have been made to
+			 * vanish in the initial processing of mask.
+			 */
+			scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1);
+			return (SWAPBLK_NONE);
+		}
+		hi -= BLIST_BMAP_RADIX;
+		if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) {
+			/*
+			 * The next leaf doesn't have enough free blocks at the
+			 * beginning to complete the spanning allocation.  The
+			 * hint cannot be updated, because the same allocation
+			 * request could be satisfied later, by this leaf, if
+			 * the state of the next leaf changes, and without any
+			 * changes to this leaf.
+			 */
+			return (SWAPBLK_NONE);
+		}
+		/* Clear the first 'hi' bits in the next leaf, allocating them. */
+		scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi;
+		hi = BLIST_BMAP_RADIX;
+	}
+
+	/* Set the bits of mask at position 'lo' and higher. */
+	mask = -mask;
+	if (hi == BLIST_BMAP_RADIX) {
+		/*
+		 * Update bighint.  There is no allocation bigger than range1
+		 * available in this leaf after this allocation completes.
+		 */
+		scan->bm_bighint = range1;
+	} else {
+		/* Clear the bits of mask at position 'hi' and higher. */
+		mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
+		/* If this allocation uses all the bits, clear the hint. */
+		if (mask == scan->u.bmu_bitmap)
+			scan->bm_bighint = 0;
+	}
+	/* Clear the allocated bits from this leaf. */
 	scan->u.bmu_bitmap &= ~mask;
-	return (blk + lo);
+	return ((blk & ~BLIST_BMAP_MASK) + lo);
 }
 
 /*
@@ -665,9 +702,8 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_
 	int child;
 	bool scan_from_start;
 
-	blk = cursor & -radix;
 	if (radix == BLIST_BMAP_RADIX)
-		return (blst_leaf_alloc(scan, blk, count, cursor));
+		return (blst_leaf_alloc(scan, cursor, count));
 	if (scan->u.bmu_avail < count) {
 		/*
 		 * The meta node's hint must be too large if the allocation
@@ -677,6 +713,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_
 		scan->bm_bighint = scan->u.bmu_avail;
 		return (SWAPBLK_NONE);
 	}
+	blk = cursor & -radix;
 	skip = radix_to_skip(radix);
 	next_skip = skip / BLIST_META_RADIX;
 
@@ -715,7 +752,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_
 	for (i = 1 + child * next_skip; i < skip; i += next_skip) {
 		if (count <= scan[i].bm_bighint) {
 			/*
-			 * The allocation might fit in the i'th subtree.
+			 * The allocation might fit beginning in the i'th subtree.
 			 */
 			r = blst_meta_alloc(&scan[i],
 			    cursor > blk ? cursor : blk, count, radix);
@@ -748,22 +785,20 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_
 static void
 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
 {
+	u_daddr_t mask;
+	int n;
+
 	/*
 	 * free some data in this bitmap
-	 *
-	 * e.g.
-	 *	0000111111111110000
+	 * mask=0000111111111110000
 	 *          \_________/\__/
-	 *		v        n
+	 *		count   n
 	 */
-	int n = blk & (BLIST_BMAP_RADIX - 1);
-	u_daddr_t mask;
-
+	n = blk & BLIST_BMAP_MASK;
 	mask = ((u_daddr_t)-1 << n) &
 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
-
 	if (scan->u.bmu_bitmap & mask)
-		panic("blst_radix_free: freeing free block");
+		panic("freeing free block");
 	scan->u.bmu_bitmap |= mask;
 
 	/*
@@ -944,10 +979,11 @@ blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 
 static daddr_t
 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
 {
-	int n = blk & (BLIST_BMAP_RADIX - 1);
 	daddr_t nblks;
 	u_daddr_t mask;
+	int n;
 
+	n = blk & BLIST_BMAP_MASK;
 	mask = ((u_daddr_t)-1 << n) &
 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
 
@@ -1097,10 +1133,14 @@ blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t
 			count = 0;
 		} else {
 			/*
-			 * Add terminator and break out
+			 * Add terminator and break out.  Make terminator bitmap
+			 * zero to avoid a spanning leaf allocation that
+			 * includes the terminator.
 			 */
-			if (scan)
+			if (scan) {
 				scan[i].bm_bighint = (daddr_t)-1;
+				scan[i].u.bmu_bitmap = 0;
+			}
 			break;
 		}
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201710071655.v97GtiTe077785>