Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Jun 2019 20:29:13 +0000 (UTC)
From:      Conrad Meyer <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r349154 - in head: sys/dev/random tests/sys/devrandom
Message-ID:  <201906172029.x5HKTDb2011520@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Mon Jun 17 20:29:13 2019
New Revision: 349154
URL: https://svnweb.freebsd.org/changeset/base/349154

Log:
  random(4): Fortuna: allow increased concurrency
  
  Add experimental feature to increase concurrency in Fortuna.  As this
  diverges slightly from canonical Fortuna, and due to the security
  sensitivity of random(4), it is off by default.  To enable it, set the
  tunable kern.random.fortuna.concurrent_read="1".  The rest of this commit
  message describes the behavior when enabled.
  
  Readers continue to update shared Fortuna state under global mutex, as they
  do in the status quo implementation of the algorithm, but shift the actual
  PRF generation out from under the global lock.  This massively reduces the
  CPU time readers spend holding the global lock, allowing for increased
  concurrency on SMP systems and less bullying of the harvestq kthread.
  
  It is somewhat of a deviation from FS&K.  I think the primary difference is
  that the specific sequence of AES keys will differ if READ_RANDOM_UIO is
  accessed concurrently (as the 2nd thread to take the mutex will no longer
  receive a key derived from rekeying the first thread).  However, I believe
  the goals of rekeying AES are maintained: trivially, we continue to rekey
  every 1MB for the statistical property; and each consumer gets a
  forward-secret, independent AES key for their PRF.
  
  Since Chacha doesn't need to rekey for sequences of any length, this change
  makes no difference to the sequence of Chacha keys and PRF generated when
  Chacha is used in place of AES.
  
  On a GENERIC 4-thread VM (so, INVARIANTS/WITNESS, numbers not necessarily
  representative), 3x concurrent AES performance jumped from ~55 MiB/s per
  thread to ~197 MB/s per thread.  Concurrent Chacha20 at 3 threads went from
  roughly ~113 MB/s per thread to ~430 MB/s per thread.
  
  Prior to this change, the system was extremely unresponsive with 3-4
  concurrent random readers; each thread had high variance in latency and
  throughput, depending on who got lucky and won the lock.  "rand_harvestq"
  thread CPU use was high (double digits), seemingly due to spinning on the
  global lock.
  
  After the change, concurrent random readers and the system in general are
  much more responsive, and rand_harvestq CPU use dropped to basically zero.
  
  Tests are added to the devrandom suite to ensure the uint128_add64 primitive
  utilized by unlocked read functions to specification.
  
  Reviewed by:	markm
  Approved by:	secteam(delphij)
  Relnotes:	yes
  Differential Revision:	https://reviews.freebsd.org/D20313

Modified:
  head/sys/dev/random/fortuna.c
  head/sys/dev/random/fortuna.h
  head/sys/dev/random/hash.c
  head/sys/dev/random/hash.h
  head/sys/dev/random/uint128.h
  head/tests/sys/devrandom/uint128_test.c

Modified: head/sys/dev/random/fortuna.c
==============================================================================
--- head/sys/dev/random/fortuna.c	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/sys/dev/random/fortuna.c	Mon Jun 17 20:29:13 2019	(r349154)
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
 #include "unit_test.h"
 #endif /* _KERNEL */
 
+#include <crypto/chacha20/chacha.h>
 #include <crypto/rijndael/rijndael-api-fst.h>
 #include <crypto/sha2/sha256.h>
 
@@ -75,7 +76,10 @@ __FBSDID("$FreeBSD$");
 /* Defined in FS&K */
 #define	RANDOM_FORTUNA_NPOOLS 32		/* The number of accumulation pools */
 #define	RANDOM_FORTUNA_DEFPOOLSIZE 64		/* The default pool size/length for a (re)seed */
-#define	RANDOM_FORTUNA_MAX_READ (1 << 20)	/* Max bytes in a single read */
+#define	RANDOM_FORTUNA_MAX_READ (1 << 20)	/* Max bytes from AES before rekeying */
+#define	RANDOM_FORTUNA_BLOCKS_PER_KEY (1 << 16)	/* Max blocks from AES before rekeying */
+CTASSERT(RANDOM_FORTUNA_BLOCKS_PER_KEY * RANDOM_BLOCKSIZE ==
+    RANDOM_FORTUNA_MAX_READ);
 
 /*
  * The allowable range of RANDOM_FORTUNA_DEFPOOLSIZE. The default value is above.
@@ -120,6 +124,26 @@ static struct fortuna_state {
 	mtx_t fs_mtx;
 } fortuna_state;
 
+/*
+ * Experimental concurrent reads feature.  For now, disabled by default.  But
+ * we may enable it in the future.
+ *
+ * The benefit is improved concurrency in Fortuna.  That is reflected in two
+ * related aspects:
+ *
+ * 1. Concurrent devrandom readers can achieve similar throughput to a single
+ *    reader thread.
+ *
+ * 2. The rand_harvestq process spends much less time spinning when one or more
+ *    readers is processing a large request.  Partially this is due to
+ *    rand_harvestq / ra_event_processor design, which only passes one event at
+ *    a time to the underlying algorithm.  Each time, Fortuna must take its
+ *    global state mutex, potentially blocking on a reader.  Our adaptive
+ *    mutexes assume that a lock holder currently on CPU will release the lock
+ *    quickly, and spin if the owning thread is currently running.
+ */
+static bool fortuna_concurrent_read __read_frequently = false;
+
 #ifdef _KERNEL
 static struct sysctl_ctx_list random_clist;
 RANDOM_CHECK_UINT(fs_minpoolsize, RANDOM_FORTUNA_MINPOOLSIZE, RANDOM_FORTUNA_MAXPOOLSIZE);
@@ -176,6 +200,11 @@ random_fortuna_init_alg(void *unused __unused)
 		random_check_uint_fs_minpoolsize, "IU",
 		"Minimum pool size necessary to cause a reseed");
 	KASSERT(fortuna_state.fs_minpoolsize > 0, ("random: Fortuna threshold must be > 0 at startup"));
+
+	SYSCTL_ADD_BOOL(&random_clist, SYSCTL_CHILDREN(random_fortuna_o),
+	    OID_AUTO, "concurrent_read", CTLFLAG_RDTUN,
+	    &fortuna_concurrent_read, 0, "If non-zero, enable EXPERIMENTAL "
+	    "feature to improve concurrent Fortuna performance.");
 #endif
 
 	/*-
@@ -306,48 +335,6 @@ random_fortuna_reseed_internal(uint32_t *entropy_data,
 }
 
 /*-
- * FS&K - PseudoRandomData()
- *
- * If Chacha20 is used, output size is unrestricted.  If AES-CTR is used,
- * output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE.  The
- * reasoning for this is discussed in FS&K 9.4; the significant distinction
- * between the two ciphers is that AES has a *block* size of 128 bits while
- * Chacha has a *block* size of 512 bits.
- */
-static __inline void
-random_fortuna_genrandom(uint8_t *buf, size_t bytecount)
-{
-	uint8_t newkey[RANDOM_KEYSIZE];
-
-	RANDOM_RESEED_ASSERT_LOCK_OWNED();
-
-	/*-
-	 * FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used
-	 *      - r = first-n-bytes(GenerateBlocks(ceil(n/16)))
-	 *      - K = GenerateBlocks(2)
-	 */
-	KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ,
-	    ("%s: invalid large read request: %zu bytes", __func__,
-	     bytecount));
-
-	/*
-	 * This is where FS&K would invoke GenerateBlocks().  GenerateBlocks()
-	 * doesn't make a lot of sense or have much value if we use bytecount
-	 * for the API (which is useful for ciphers that do not require
-	 * block-sized output, like Chacha20).
-	 *
-	 * Just invoke our PRF abstraction directly, which is responsible for
-	 * updating fs_counter ('C').
-	 */
-	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
-	    buf, bytecount);
-	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
-	    newkey, sizeof(newkey));
-	randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
-	explicit_bzero(newkey, sizeof(newkey));
-}
-
-/*-
  * FS&K - RandomData() (Part 1)
  * Used to return processed entropy from the PRNG. There is a pre_read
  * required to be present (but it can be a stub) in order to allow
@@ -433,75 +420,267 @@ random_fortuna_pre_read(void)
 	explicit_bzero(temp, sizeof(temp));
 }
 
-/*-
- * FS&K - RandomData() (Part 2)
- * Main read from Fortuna, continued. May be called multiple times after
- * the random_fortuna_pre_read() above.
+/*
+ * This is basically GenerateBlocks() from FS&K.
  *
- * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
- * the responsibility of the algorithm to accommodate partial block reads, if a
- * block output mode is used.
+ * It differs in two ways:
+ *
+ * 1. Chacha20 is tolerant of non-block-multiple request sizes, so we do not
+ * need to handle any remainder bytes specially and can just pass the length
+ * directly to the PRF construction; and
+ *
+ * 2. Chacha20 is a 512-bit block size cipher (whereas AES has 128-bit block
+ * size, regardless of key size).  This means Chacha does not require re-keying
+ * every 1MiB.  This is implied by the math in FS&K 9.4 and mentioned
+ * explicitly in the conclusion, "If we had a block cipher with a 256-bit [or
+ * greater] block size, then the collisions would not have been an issue at
+ * all" (p. 144).
+ *
+ * 3. In conventional ("locked") mode, we produce a maximum of PAGE_SIZE output
+ * at a time before dropping the lock, to not bully the lock especially.  This
+ * has been the status quo since 2015 (r284959).
+ *
+ * The upstream caller random_fortuna_read is responsible for zeroing out
+ * sensitive buffers provided as parameters to this routine.
  */
-void
-random_fortuna_read(uint8_t *buf, size_t bytecount)
+enum {
+	FORTUNA_UNLOCKED = false,
+	FORTUNA_LOCKED = true
+};
+static void
+random_fortuna_genbytes(uint8_t *buf, size_t bytecount,
+    uint8_t newkey[static RANDOM_KEYSIZE], uint128_t *p_counter,
+    union randomdev_key *p_key, bool locked)
 {
 	uint8_t remainder_buf[RANDOM_BLOCKSIZE];
-	size_t read_directly_len, read_chunk;
+	size_t chunk_size;
 
+	if (locked)
+		RANDOM_RESEED_ASSERT_LOCK_OWNED();
+	else
+		RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
+
 	/*
-	 * The underlying AES generator expects multiples of RANDOM_BLOCKSIZE.
+	 * Easy case: don't have to worry about bullying the global mutex,
+	 * don't have to worry about rekeying Chacha; API is byte-oriented.
 	 */
-	if (random_chachamode)
-		read_directly_len = bytecount;
-	else
-		read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE);
+	if (!locked && random_chachamode) {
+		randomdev_keystream(p_key, p_counter, buf, bytecount);
+		return;
+	}
 
-	RANDOM_RESEED_LOCK();
-	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
-
-	while (read_directly_len > 0) {
+	if (locked) {
 		/*
-		 * 128-bit block ciphers like AES must be re-keyed at 1MB
-		 * intervals to avoid unacceptable statistical differentiation
-		 * from true random data.
-		 *
-		 * 512-bit block ciphers like Chacha20 do not have this
-		 * problem. (FS&K 9.4)
+		 * While holding the global lock, limit PRF generation to
+		 * mitigate, but not eliminate, bullying symptoms.
 		 */
-		if (random_chachamode)
-			read_chunk = read_directly_len;
-		else
-			read_chunk = MIN(read_directly_len,
-			    RANDOM_FORTUNA_MAX_READ);
-
+		chunk_size = PAGE_SIZE;
+	} else {
 		/*
-		 * For now, we hold the global Fortuna mutex, so yield
-		 * periodically to provide vague availability to other lock
-		 * users.  PAGE_SIZE is chosen to match existing behavior.
+		* 128-bit block ciphers like AES must be re-keyed at 1MB
+		* intervals to avoid unacceptable statistical differentiation
+		* from true random data (FS&K 9.4, p. 143-144).
+		*/
+		MPASS(!random_chachamode);
+		chunk_size = RANDOM_FORTUNA_MAX_READ;
+	}
+
+	chunk_size = MIN(bytecount, chunk_size);
+	if (!random_chachamode)
+		chunk_size = rounddown(chunk_size, RANDOM_BLOCKSIZE);
+
+	while (bytecount >= chunk_size) {
+		randomdev_keystream(p_key, p_counter, buf, chunk_size);
+
+		buf += chunk_size;
+		bytecount -= chunk_size;
+
+		/* We have to rekey if there is any data remaining to be
+		 * generated, in two scenarios:
+		 *
+		 * locked: we need to rekey before we unlock and release the
+		 * global state to another consumer; or
+		 *
+		 * unlocked: we need to rekey because we're in AES mode and are
+		 * required to rekey at chunk_size==1MB.  But we do not need to
+		 * rekey during the last trailing <1MB chunk.
 		 */
-		read_chunk = MIN(read_chunk, PAGE_SIZE);
+		if (bytecount > 0) {
+			if (locked || chunk_size == RANDOM_FORTUNA_MAX_READ) {
+				randomdev_keystream(p_key, p_counter, newkey,
+				    RANDOM_KEYSIZE);
+				randomdev_encrypt_init(p_key, newkey);
+			}
 
-		random_fortuna_genrandom(buf, read_chunk);
-		buf += read_chunk;
-		read_directly_len -= read_chunk;
-		bytecount -= read_chunk;
+			/*
+			 * If we're holding the global lock, yield it briefly
+			 * now.
+			 */
+			if (locked) {
+				RANDOM_RESEED_UNLOCK();
+				RANDOM_RESEED_LOCK();
+			}
 
-		/* Perform the actual yield. */
-		if (read_directly_len != 0) {
-			RANDOM_RESEED_UNLOCK();
-			RANDOM_RESEED_LOCK();
+			/*
+			 * At the trailing end, scale down chunk_size from 1MB or
+			 * PAGE_SIZE to all remaining full blocks (AES) or all
+			 * remaining bytes (Chacha).
+			 */
+			if (bytecount < chunk_size) {
+				if (random_chachamode)
+					chunk_size = bytecount;
+				else if (bytecount >= RANDOM_BLOCKSIZE)
+					chunk_size = rounddown(bytecount,
+					    RANDOM_BLOCKSIZE);
+				else
+					break;
+			}
 		}
 	}
 
-	if (bytecount > 0)
-		random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf));
+	/*
+	 * Generate any partial AES block remaining into a temporary buffer and
+	 * copy the desired substring out.
+	 */
+	if (bytecount > 0) {
+		MPASS(!random_chachamode);
 
-	RANDOM_RESEED_UNLOCK();
+		randomdev_keystream(p_key, p_counter, remainder_buf,
+		    sizeof(remainder_buf));
+	}
 
+	/*
+	 * In locked mode, re-key global K before dropping the lock, which we
+	 * don't need for memcpy/bzero below.
+	 */
+	if (locked) {
+		randomdev_keystream(p_key, p_counter, newkey, RANDOM_KEYSIZE);
+		randomdev_encrypt_init(p_key, newkey);
+		RANDOM_RESEED_UNLOCK();
+	}
+
 	if (bytecount > 0) {
 		memcpy(buf, remainder_buf, bytecount);
 		explicit_bzero(remainder_buf, sizeof(remainder_buf));
 	}
+}
+
+
+/*
+ * Handle only "concurrency-enabled" Fortuna reads to simplify logic.
+ *
+ * Caller (random_fortuna_read) is responsible for zeroing out sensitive
+ * buffers provided as parameters to this routine.
+ */
+static void
+random_fortuna_read_concurrent(uint8_t *buf, size_t bytecount,
+    uint8_t newkey[static RANDOM_KEYSIZE])
+{
+	union randomdev_key key_copy;
+	uint128_t counter_copy;
+	size_t blockcount;
+
+	MPASS(fortuna_concurrent_read);
+
+	/*
+	 * Compute number of blocks required for the PRF request ('delta C').
+	 * We will step the global counter 'C' by this number under lock, and
+	 * then actually consume the counter values outside the lock.
+	 *
+	 * This ensures that contemporaneous but independent requests for
+	 * randomness receive distinct 'C' values and thus independent PRF
+	 * results.
+	 */
+	if (random_chachamode) {
+		blockcount = howmany(bytecount, CHACHA_BLOCKLEN);
+	} else {
+		blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
+
+		/*
+		 * Need to account for the additional blocks generated by
+		 * rekeying when updating the global fs_counter.
+		 */
+		blockcount += RANDOM_KEYS_PER_BLOCK *
+		    (blockcount / RANDOM_FORTUNA_BLOCKS_PER_KEY);
+	}
+
+	RANDOM_RESEED_LOCK();
+	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+	/*
+	 * Technically, we only need mutual exclusion to update shared state
+	 * appropriately.  Nothing about updating the shared internal state
+	 * requires that we perform (most) expensive cryptographic keystream
+	 * generation under lock.  (We still need to generate 256 bits of
+	 * keystream to re-key between consumers.)
+	 *
+	 * Save the original counter and key values that will be used as the
+	 * PRF for this particular consumer.
+	 */
+	memcpy(&counter_copy, &fortuna_state.fs_counter, sizeof(counter_copy));
+	memcpy(&key_copy, &fortuna_state.fs_key, sizeof(key_copy));
+
+	/*
+	 * Step the counter as if we had generated 'bytecount' blocks for this
+	 * consumer.  I.e., ensure that the next consumer gets an independent
+	 * range of counter values once we drop the global lock.
+	 */
+	uint128_add64(&fortuna_state.fs_counter, blockcount);
+
+	/*
+	 * We still need to Rekey the global 'K' between independent calls;
+	 * this is no different from conventional Fortuna.  Note that
+	 * 'randomdev_keystream()' will step the fs_counter 'C' appropriately
+	 * for the blocks needed for the 'newkey'.
+	 *
+	 * (This is part of PseudoRandomData() in FS&K, 9.4.4.)
+	 */
+	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
+	    newkey, RANDOM_KEYSIZE);
+	randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
+
+	/*
+	 * We have everything we need to generate a unique PRF for this
+	 * consumer without touching global state.
+	 */
+	RANDOM_RESEED_UNLOCK();
+
+	random_fortuna_genbytes(buf, bytecount, newkey, &counter_copy,
+	    &key_copy, FORTUNA_UNLOCKED);
+	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
+
+	explicit_bzero(&counter_copy, sizeof(counter_copy));
+	explicit_bzero(&key_copy, sizeof(key_copy));
+}
+
+/*-
+ * FS&K - RandomData() (Part 2)
+ * Main read from Fortuna, continued. May be called multiple times after
+ * the random_fortuna_pre_read() above.
+ *
+ * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
+ * the responsibility of the algorithm to accommodate partial block reads, if a
+ * block output mode is used.
+ */
+void
+random_fortuna_read(uint8_t *buf, size_t bytecount)
+{
+	uint8_t newkey[RANDOM_KEYSIZE];
+
+	if (fortuna_concurrent_read) {
+		random_fortuna_read_concurrent(buf, bytecount, newkey);
+		goto out;
+	}
+
+	RANDOM_RESEED_LOCK();
+	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+
+	random_fortuna_genbytes(buf, bytecount, newkey,
+	    &fortuna_state.fs_counter, &fortuna_state.fs_key, FORTUNA_LOCKED);
+	/* Returns unlocked */
+	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
+
+out:
+	explicit_bzero(newkey, sizeof(newkey));
 }
 
 #ifdef _KERNEL

Modified: head/sys/dev/random/fortuna.h
==============================================================================
--- head/sys/dev/random/fortuna.h	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/sys/dev/random/fortuna.h	Mon Jun 17 20:29:13 2019	(r349154)
@@ -36,12 +36,14 @@ typedef struct mtx mtx_t;
 #define	RANDOM_RESEED_LOCK(x)			mtx_lock(&fortuna_state.fs_mtx)
 #define	RANDOM_RESEED_UNLOCK(x)			mtx_unlock(&fortuna_state.fs_mtx)
 #define	RANDOM_RESEED_ASSERT_LOCK_OWNED(x)	mtx_assert(&fortuna_state.fs_mtx, MA_OWNED)
+#define	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED()	mtx_assert(&fortuna_state.fs_mtx, MA_NOTOWNED)
 #else
 #define	RANDOM_RESEED_INIT_LOCK(x)		mtx_init(&fortuna_state.fs_mtx, mtx_plain)
 #define	RANDOM_RESEED_DEINIT_LOCK(x)		mtx_destroy(&fortuna_state.fs_mtx)
 #define	RANDOM_RESEED_LOCK(x)			mtx_lock(&fortuna_state.fs_mtx)
 #define	RANDOM_RESEED_UNLOCK(x)			mtx_unlock(&fortuna_state.fs_mtx)
 #define	RANDOM_RESEED_ASSERT_LOCK_OWNED(x)
+#define	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED()
 #endif
 
 #endif /* SYS_DEV_RANDOM_FORTUNA_H_INCLUDED */

Modified: head/sys/dev/random/hash.c
==============================================================================
--- head/sys/dev/random/hash.c	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/sys/dev/random/hash.c	Mon Jun 17 20:29:13 2019	(r349154)
@@ -74,7 +74,7 @@ _Static_assert(CHACHA_STATELEN == RANDOM_BLOCKSIZE, ""
  * Benefits include somewhat faster keystream generation compared with
  * unaccelerated AES-ICM.
  */
-bool random_chachamode = false;
+bool random_chachamode __read_frequently = false;
 #ifdef _KERNEL
 SYSCTL_BOOL(_kern_random, OID_AUTO, use_chacha20_cipher, CTLFLAG_RDTUN,
     &random_chachamode, 0,

Modified: head/sys/dev/random/hash.h
==============================================================================
--- head/sys/dev/random/hash.h	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/sys/dev/random/hash.h	Mon Jun 17 20:29:13 2019	(r349154)
@@ -32,6 +32,10 @@
 #include <crypto/chacha20/_chacha.h>
 #include <dev/random/uint128.h>
 
+#ifndef _KERNEL
+#define	__read_frequently
+#endif
+
 /* Keys are formed from cipher blocks */
 #define	RANDOM_KEYSIZE		32	/* (in bytes) == 256 bits */
 #define	RANDOM_KEYSIZE_WORDS	(RANDOM_KEYSIZE/sizeof(uint32_t))

Modified: head/sys/dev/random/uint128.h
==============================================================================
--- head/sys/dev/random/uint128.h	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/sys/dev/random/uint128.h	Mon Jun 17 20:29:13 2019	(r349154)
@@ -65,6 +65,21 @@ uint128_increment(uint128_t *big_uintp)
 #endif
 }
 
+static __inline void
+uint128_add64(uint128_t *big_uintp, uint64_t add)
+{
+#ifdef USE_REAL_UINT128_T
+	(*big_uintp) += add;
+#else
+	uint64_t word0p;
+
+	word0p = big_uintp->u128t_word0 + add;
+	if (word0p < big_uintp->u128t_word0)
+		big_uintp->u128t_word1++;
+	big_uintp->u128t_word0 = word0p;
+#endif
+}
+
 static __inline bool
 uint128_equals(uint128_t a, uint128_t b)
 {

Modified: head/tests/sys/devrandom/uint128_test.c
==============================================================================
--- head/tests/sys/devrandom/uint128_test.c	Mon Jun 17 20:11:02 2019	(r349153)
+++ head/tests/sys/devrandom/uint128_test.c	Mon Jun 17 20:29:13 2019	(r349154)
@@ -152,6 +152,62 @@ ATF_TC_BODY(uint128_inc, tc)
 	}
 }
 
+ATF_TC_WITHOUT_HEAD(uint128_add64);
+ATF_TC_BODY(uint128_add64, tc)
+{
+	static const struct u128_add64_tc {
+		uint32_t input[4];
+		uint64_t addend;
+		uint32_t expected[4];
+		const char *descr;
+	} tests[] = {
+		{
+			.input = { 0, 0, 0, 0 },
+			.addend = 1,
+			.expected = { 1, 0, 0, 0 },
+			.descr = "0 + 1 -> 1",
+		},
+		{
+			.input = { 1, 0, 0, 0 },
+			.addend = UINT32_MAX,
+			.expected = { 0, 1, 0, 0 },
+			.descr = "1 + (2^32 - 1) -> 2^32 (word carry)",
+		},
+		{
+			.input = { 1, 0, 0, 0 },
+			.addend = UINT64_MAX,
+			.expected = { 0, 0, 1, 0 },
+			.descr = "1 + (2^64 - 1) -> 2^64 (u128t_word0 carry)",
+		},
+		{
+			.input = { 0x11111111, 0x11111111, 0, 0 },
+			.addend = 0xf0123456789abcdeULL,
+			.expected = { 0x89abcdef, 0x01234567, 1, 0 },
+			.descr = "0x1111_1111_1111_1111 +"
+				 "0xf012_3456_789a_bcde ->"
+			       "0x1_0123_4567_89ab_cdef",
+		},
+		{
+			.input = { 1, 0, UINT32_MAX, 0 },
+			.addend = UINT64_MAX,
+			.expected = { 0, 0, 0, 1 },
+			.descr = "Carry ~2^96",
+		},
+	};
+	uint8_t inputle[16], expectedle[16];
+	uint128_t a;
+	size_t i;
+
+	for (i = 0; i < nitems(tests); i++) {
+		vec_u32_tole128(inputle, tests[i].input);
+		vec_u32_tole128(expectedle, tests[i].expected);
+
+		a = le128dec(inputle);
+		uint128_add64(&a, tests[i].addend);
+		u128_check_equality(le128dec(expectedle), a, tests[i].descr);
+	}
+}
+
 /*
  * Test assumptions about Chacha incrementing counter in the same way as
  * uint128.h
@@ -219,6 +275,7 @@ ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, uint128_inc);
+	ATF_TP_ADD_TC(tp, uint128_add64);
 	ATF_TP_ADD_TC(tp, uint128_chacha_ctr);
 	return (atf_no_error());
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906172029.x5HKTDb2011520>