Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Oct 2015 04:38:06 +0000 (UTC)
From:      "Conrad E. Meyer" <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r289733 - in head: sys/dev/ioat tools/tools/ioat
Message-ID:  <201510220438.t9M4c67v041733@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Thu Oct 22 04:38:05 2015
New Revision: 289733
URL: https://svnweb.freebsd.org/changeset/base/289733

Log:
  Improve flexibility of ioat_test / ioatcontrol(8)
  
  The test logic now preallocates memory before running the test.
  
  The buffer size is now configurable.  Post-copy verification is
  configurable.  The number of copies to chain into one transaction (one
  interrupt) is configurable.
  
  A 'duration' mode is added, which repeats the test until the duration
  has elapsed, reporting the B/s and transactions completed.
  
  ioatcontrol.8 has been updated to document the new arguments.
  
  Initial limits (on this particular Broadwell-DE) (and when the
  interrupts are working) seem to be: 256 interrupts/sec or ~6 GB/s,
  whichever limit is more restrictive.
  
  Unfortunately, it seems the interrupt-reset handling on Broadwell isn't
  working as intended.  That will be fixed in a later commit.
  
  Sponsored by:	EMC / Isilon Storage Division

Modified:
  head/sys/dev/ioat/ioat_test.c
  head/sys/dev/ioat/ioat_test.h
  head/tools/tools/ioat/Makefile
  head/tools/tools/ioat/ioatcontrol.8
  head/tools/tools/ioat/ioatcontrol.c

Modified: head/sys/dev/ioat/ioat_test.c
==============================================================================
--- head/sys/dev/ioat/ioat_test.c	Thu Oct 22 04:33:05 2015	(r289732)
+++ head/sys/dev/ioat/ioat_test.c	Thu Oct 22 04:38:05 2015	(r289733)
@@ -51,18 +51,28 @@ __FBSDID("$FreeBSD$");
 #include "ioat_internal.h"
 #include "ioat_test.h"
 
+#ifndef time_after
+#define	time_after(a,b)		((long)(b) - (long)(a) < 0)
+#endif
+
 MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations");
 
-#define	IOAT_TEST_SIZE	0x40000
-#define	IOAT_MAX_BUFS	8
+#define	IOAT_MAX_BUFS	256
 
 struct test_transaction {
-	uint8_t			num_buffers;
 	void			*buf[IOAT_MAX_BUFS];
 	uint32_t		length;
+	uint32_t		depth;
 	struct ioat_test	*test;
+	TAILQ_ENTRY(test_transaction)	entry;
 };
 
+#define	IT_LOCK()	mtx_lock(&ioat_test_lk)
+#define	IT_UNLOCK()	mtx_unlock(&ioat_test_lk)
+#define	IT_ASSERT()	mtx_assert(&ioat_test_lk, MA_OWNED)
+static struct mtx ioat_test_lk;
+MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF);
+
 static int g_thread_index = 1;
 static struct cdev *g_ioat_cdev = NULL;
 
@@ -73,7 +83,7 @@ ioat_test_transaction_destroy(struct tes
 
 	for (i = 0; i < IOAT_MAX_BUFS; i++) {
 		if (tx->buf[i] != NULL) {
-			contigfree(tx->buf[i], IOAT_TEST_SIZE, M_IOAT_TEST);
+			contigfree(tx->buf[i], tx->length, M_IOAT_TEST);
 			tx->buf[i] = NULL;
 		}
 	}
@@ -82,17 +92,16 @@ ioat_test_transaction_destroy(struct tes
 }
 
 static struct
-test_transaction *ioat_test_transaction_create(uint8_t num_buffers,
+test_transaction *ioat_test_transaction_create(unsigned num_buffers,
     uint32_t buffer_size)
 {
 	struct test_transaction *tx;
-	int i;
+	unsigned i;
 
-	tx = malloc(sizeof(struct test_transaction), M_IOAT_TEST, M_NOWAIT | M_ZERO);
+	tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO);
 	if (tx == NULL)
 		return (NULL);
 
-	tx->num_buffers = num_buffers;
 	tx->length = buffer_size;
 
 	for (i = 0; i < num_buffers; i++) {
@@ -107,6 +116,18 @@ test_transaction *ioat_test_transaction_
 	return (tx);
 }
 
+static bool
+ioat_compare_ok(struct test_transaction *tx)
+{
+	uint32_t i;
+
+	for (i = 0; i < tx->depth; i++) {
+		if (memcmp(tx->buf[2*i], tx->buf[2*i+1], tx->length) != 0)
+			return (false);
+	}
+	return (true);
+}
+
 static void
 ioat_dma_test_callback(void *arg)
 {
@@ -116,82 +137,195 @@ ioat_dma_test_callback(void *arg)
 	tx = arg;
 	test = tx->test;
 
-	if (memcmp(tx->buf[0], tx->buf[1], tx->length) != 0) {
+	if (test->verify && !ioat_compare_ok(tx)) {
 		ioat_log_message(0, "miscompare found\n");
-		test->status = IOAT_TEST_MISCOMPARE;
+		atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth);
+	} else if (!test->too_late)
+		atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth);
+
+	IT_LOCK();
+	TAILQ_REMOVE(&test->pend_q, tx, entry);
+	TAILQ_INSERT_TAIL(&test->free_q, tx, entry);
+	wakeup(&test->free_q);
+	IT_UNLOCK();
+}
+
+static int
+ioat_test_prealloc_memory(struct ioat_test *test, int index)
+{
+	uint32_t i, j, k;
+	struct test_transaction *tx;
+
+	for (i = 0; i < test->transactions; i++) {
+		tx = ioat_test_transaction_create(test->chain_depth * 2,
+		    test->buffer_size);
+		if (tx == NULL) {
+			ioat_log_message(0, "tx == NULL - memory exhausted\n");
+			test->status[IOAT_TEST_NO_MEMORY]++;
+			return (ENOMEM);
+		}
+
+		TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+
+		tx->test = test;
+		tx->depth = test->chain_depth;
+
+		/* fill in source buffers */
+		for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) {
+			uint32_t val = j + (index << 28);
+
+			for (k = 0; k < test->chain_depth; k++) {
+				((uint32_t *)tx->buf[2*k])[j] = ~val;
+				((uint32_t *)tx->buf[2*k+1])[j] = val;
+			}
+		}
 	}
-	atomic_add_32(&test->num_completions, 1);
-	ioat_test_transaction_destroy(tx);
-	if (test->num_completions == test->num_loops)
-		wakeup(test);
+	return (0);
 }
 
 static void
-ioat_dma_test(void *arg)
+ioat_test_release_memory(struct ioat_test *test)
+{
+	struct test_transaction *tx, *s;
+
+	TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s)
+		ioat_test_transaction_destroy(tx);
+	TAILQ_INIT(&test->free_q);
+
+	TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s)
+		ioat_test_transaction_destroy(tx);
+	TAILQ_INIT(&test->pend_q);
+}
+
+static void
+ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
 {
 	struct test_transaction *tx;
+	struct bus_dmadesc *desc;
+	bus_dmaengine_callback_t cb;
+	bus_addr_t src, dest;
+	uint32_t i, flags;
+
+	IT_LOCK();
+	while (TAILQ_EMPTY(&test->free_q))
+		msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
+
+	tx = TAILQ_FIRST(&test->free_q);
+	TAILQ_REMOVE(&test->free_q, tx, entry);
+	TAILQ_INSERT_HEAD(&test->pend_q, tx, entry);
+	IT_UNLOCK();
+
+	ioat_acquire(dma);
+	for (i = 0; i < tx->depth; i++) {
+		src = vtophys((vm_offset_t)tx->buf[2*i]);
+		dest = vtophys((vm_offset_t)tx->buf[2*i+1]);
+
+		if (i == tx->depth - 1) {
+			cb = ioat_dma_test_callback;
+			flags = DMA_INT_EN;
+		} else {
+			cb = NULL;
+			flags = 0;
+		}
+
+		desc = ioat_copy(dma, src, dest, tx->length, cb, tx, flags);
+		if (desc == NULL)
+			panic("Failed to allocate a ring slot "
+			    "-- this shouldn't happen!");
+	}
+	ioat_release(dma);
+}
+
+static void
+ioat_dma_test(void *arg)
+{
 	struct ioat_test *test;
 	bus_dmaengine_t dmaengine;
 	uint32_t loops;
-	int index, i;
+	int index, rc, start, end;
 
 	test = arg;
-	loops = test->num_loops;
+	memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status));
+
+	if (test->buffer_size > 1024 * 1024) {
+		ioat_log_message(0, "Buffer size too large >1MB\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
 
-	test->status = IOAT_TEST_OK;
-	test->num_completions = 0;
+	if (test->chain_depth * 2 > IOAT_MAX_BUFS) {
+		ioat_log_message(0, "Depth too large (> %u)\n",
+		    (unsigned)IOAT_MAX_BUFS / 2);
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
 
-	index = g_thread_index++;
-	dmaengine = ioat_get_dmaengine(test->channel_index);
+	if (btoc((uint64_t)test->buffer_size * test->chain_depth *
+	    test->transactions) > (physmem / 4)) {
+		ioat_log_message(0, "Sanity check failed -- test would "
+		    "use more than 1/4 of phys mem.\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
 
+	if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) {
+		ioat_log_message(0, "Sanity check failed -- test would "
+		    "use more than available IOAT ring space.\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
+
+	dmaengine = ioat_get_dmaengine(test->channel_index);
 	if (dmaengine == NULL) {
 		ioat_log_message(0, "Couldn't acquire dmaengine\n");
-		test->status = IOAT_TEST_NO_DMA_ENGINE;
+		test->status[IOAT_TEST_NO_DMA_ENGINE]++;
 		return;
 	}
 
-	ioat_log_message(0, "Thread %d: num_loops remaining: 0x%07x\n", index,
-	    test->num_loops);
+	index = g_thread_index++;
+	TAILQ_INIT(&test->free_q);
+	TAILQ_INIT(&test->pend_q);
 
-	for (loops = 0; loops < test->num_loops; loops++) {
-		bus_addr_t src, dest;
+	if (test->duration == 0)
+		ioat_log_message(1, "Thread %d: num_loops remaining: 0x%08x\n",
+		    index, test->transactions);
+	else
+		ioat_log_message(1, "Thread %d: starting\n", index);
+
+	rc = ioat_test_prealloc_memory(test, index);
+	if (rc != 0) {
+		ioat_log_message(0, "prealloc_memory: %d\n", rc);
+		return;
+	}
+	wmb();
 
-		if (loops % 0x10000 == 0) {
-			ioat_log_message(0, "Thread %d: "
-			    "num_loops remaining: 0x%07x\n", index,
-			    test->num_loops - loops);
+	test->too_late = false;
+	start = ticks;
+	end = start + (((sbintime_t)test->duration * hz) / 1000);
+
+	for (loops = 0;; loops++) {
+		if (test->duration == 0 && loops >= test->transactions)
+			break;
+		else if (test->duration != 0 && time_after(ticks, end)) {
+			test->too_late = true;
+			break;
 		}
 
-		tx = ioat_test_transaction_create(2, IOAT_TEST_SIZE);
-		if (tx == NULL) {
-			ioat_log_message(0, "tx == NULL - memory exhausted\n");
-			atomic_add_32(&test->num_completions, 1);
-			test->status = IOAT_TEST_NO_MEMORY;
-			continue;
-		}
-
-		tx->test = test;
-		wmb();
-
-		/* fill in source buffer */
-		for (i = 0; i < (IOAT_TEST_SIZE / sizeof(uint32_t)); i++) {
-			uint32_t val = i + (loops << 16) + (index << 28);
-			((uint32_t *)tx->buf[0])[i] = ~val;
-			((uint32_t *)tx->buf[1])[i] = val;
-		}
+		ioat_test_submit_1_tx(test, dmaengine);
+	}
 
-		src = pmap_kextract((vm_offset_t)tx->buf[0]);
-		dest = pmap_kextract((vm_offset_t)tx->buf[1]);
+	ioat_log_message(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n",
+	    ticks - start, ticks - end, (ticks - start) / hz);
 
-		ioat_acquire(dmaengine);
-		ioat_copy(dmaengine, src, dest, IOAT_TEST_SIZE,
-		    ioat_dma_test_callback, tx, DMA_INT_EN);
-		ioat_release(dmaengine);
-	}
+	IT_LOCK();
+	while (!TAILQ_EMPTY(&test->pend_q))
+		msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz);
+	IT_UNLOCK();
 
-	while (test->num_completions < test->num_loops)
-		tsleep(test, 0, "compl", 5 * hz);
+	ioat_log_message(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n",
+	    ticks - start, ticks - end, (ticks - start) / hz);
 
+	ioat_test_release_memory(test);
 }
 
 static int

Modified: head/sys/dev/ioat/ioat_test.h
==============================================================================
--- head/sys/dev/ioat/ioat_test.h	Thu Oct 22 04:33:05 2015	(r289732)
+++ head/sys/dev/ioat/ioat_test.h	Thu Oct 22 04:38:05 2015	(r289733)
@@ -29,17 +29,39 @@ __FBSDID("$FreeBSD$");
 #ifndef __IOAT_TEST_H__
 #define __IOAT_TEST_H__
 
+enum ioat_res {
+	IOAT_TEST_OK = 0,
+	IOAT_TEST_NO_DMA_ENGINE,
+	IOAT_TEST_NO_MEMORY,
+	IOAT_TEST_MISCOMPARE,
+	IOAT_NUM_RES
+};
+
+struct test_transaction;
+
 struct ioat_test {
+	volatile uint32_t status[IOAT_NUM_RES];
 	uint32_t channel_index;
-	uint32_t num_loops;
-	volatile uint32_t num_completions;
-	uint32_t status;
-};
 
-#define	IOAT_TEST_OK		0
-#define	IOAT_TEST_NO_DMA_ENGINE	1
-#define	IOAT_TEST_NO_MEMORY	2
-#define	IOAT_TEST_MISCOMPARE	3
+	/* HW max of 1MB */
+	uint32_t buffer_size;
+	uint32_t chain_depth;
+	uint32_t transactions;
+
+	/*
+	 * If non-zero, duration is time in ms;
+	 * If zero, bounded by 'transactions' above.
+	 */
+	uint32_t duration;
+
+	/* If true, check for miscompares after a copy. */
+	bool verify;
+
+	/* Internal usage -- not test inputs */
+	TAILQ_HEAD(, test_transaction) free_q;
+	TAILQ_HEAD(, test_transaction) pend_q;
+	volatile bool too_late;
+};
 
 #define	IOAT_DMATEST	_IOWR('i', 0, struct ioat_test)
 

Modified: head/tools/tools/ioat/Makefile
==============================================================================
--- head/tools/tools/ioat/Makefile	Thu Oct 22 04:33:05 2015	(r289732)
+++ head/tools/tools/ioat/Makefile	Thu Oct 22 04:38:05 2015	(r289733)
@@ -4,5 +4,6 @@ PROG=	ioatcontrol
 MAN=	ioatcontrol.8
 CFLAGS+=	-I${.CURDIR:H:H:H}/sys/dev/ioat
 WARNS?=	6
+LIBADD=	util
 
 .include <bsd.prog.mk>

Modified: head/tools/tools/ioat/ioatcontrol.8
==============================================================================
--- head/tools/tools/ioat/ioatcontrol.8	Thu Oct 22 04:33:05 2015	(r289732)
+++ head/tools/tools/ioat/ioatcontrol.8	Thu Oct 22 04:38:05 2015	(r289733)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 24, 2015
+.Dd October 21, 2015
 .Dt IOATCONTROL 8
 .Os
 .Sh NAME
@@ -33,18 +33,64 @@
 .Xr ioat 4
 .Sh SYNOPSIS
 .Nm
+.Op Fl V
 .Ar channel_number
-.Ar num_loops
+.Ar num_txns
+.Ar [ bufsize
+.Ar [ chain-len
+.Ar [ duration ] ] ]
 .Sh DESCRIPTION
 .Nm
 allows one to issue some number of test operations to the
 .Xr ioat 4
 driver on a specific hardware channel.
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl V
+Verify copies for accuracy
+.El
 .Pp
-Each loop will allocate two chunks of memory, write data patterns to them,
-submit a DMA request to copy one buffer to the other, and compare the contents
-in the callback.
-If the contents are not as expected, an error is reported.
+.Nm
+operates in one of two modes; if the
+.Ar duration
+argument is passed,
+.Nm
+tries to estimate the copy rate in bytes per second by running
+.Ar num_txns
+repeatedly in loop.
+If
+.Ar duration
+is not passed,
+.Nm
+only runs through
+.Ar num_txns
+once and prints the total bytes copied, as well as error information.
+.Pp
+The
+.Ar bufsize
+argument determines the size of buffers to use for each
+.Fn ioat_copy
+invocation.
+The default is 256 KB.
+.Pp
+The
+.Ar chain-len
+argument determines the number of copies to chain together in a single DMA
+transaction.
+The default is 1, and the maximum is currently 4.
+.Pp
+The
+.Ar duration
+argument specifies an approximate time limit for the test, in milliseconds.
+.Pp
+The test will allocate two chunks of memory for each component of each
+transaction's chain.
+It will initialize them with specific data patterns.
+During the test, it submits DMA requests to copy between pairs of buffers.
+If the
+.Fl V
+flag was specified, it will compare the contents in the callback for a copy
+error.
 .Sh FILES
 .Pa /dev/ioat_test
 .Pp
@@ -55,6 +101,10 @@ and
 .Nm
 exposes it with
 .Cd hw.ioat.enable_ioat_test=1 .
+.Sh DIAGNOSTICS
+The wait channel
+.Va test_submit
+indicates that the test code is keeping the DMA engine full of work.
 .Sh SEE ALSO
 .Xr ioat 4
 .Sh HISTORY

Modified: head/tools/tools/ioat/ioatcontrol.c
==============================================================================
--- head/tools/tools/ioat/ioatcontrol.c	Thu Oct 22 04:33:05 2015	(r289732)
+++ head/tools/tools/ioat/ioatcontrol.c	Thu Oct 22 04:38:05 2015	(r289733)
@@ -28,34 +28,88 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/ioctl.h>
+#include <sys/queue.h>
 
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <sysexits.h>
 #include <unistd.h>
 
+#include <libutil.h>
+
 #include "ioat_test.h"
 
+static int prettyprint(struct ioat_test *);
+
+static void
+usage(void)
+{
+
+	printf("Usage: %s [-V] <channel #> <txns> [<bufsize> "
+	    "[<chain-len> [duration]]]\n", getprogname());
+	exit(EX_USAGE);
+}
+
 int
 main(int argc, char **argv)
 {
 	struct ioat_test t;
-	int fd;
+	int fd, ch;
 
-	if (argc < 3) {
-		printf("Usage: %s <channel #> <num_loops>\n", argv[0]);
-		return (EX_USAGE);
+	while ((ch = getopt(argc, argv, "V")) != -1) {
+		switch (ch) {
+		case 'V':
+			t.verify = true;
+			break;
+		default:
+			usage();
+		}
 	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 2)
+		usage();
+
+	/* Defaults for optional args */
+	t.buffer_size = 256 * 1024;
+	t.chain_depth = 2;
+	t.duration = 0;
 
-	t.channel_index = atoi(argv[1]);
+	t.channel_index = atoi(argv[0]);
 	if (t.channel_index > 8) {
 		printf("Channel number must be between 0 and 7.\n");
 		return (EX_USAGE);
 	}
 
-	t.num_loops = atoi(argv[2]);
+	t.transactions = atoi(argv[1]);
+
+	if (argc >= 3) {
+		t.buffer_size = atoi(argv[2]);
+		if (t.buffer_size == 0) {
+			printf("Buffer size must be greater than zero\n");
+			return (EX_USAGE);
+		}
+	}
+
+	if (argc >= 4) {
+		t.chain_depth = atoi(argv[3]);
+		if (t.chain_depth < 1) {
+			printf("Chain length must be greater than zero\n");
+			return (EX_USAGE);
+		}
+	}
+
+	if (argc >= 5) {
+		t.duration = atoi(argv[4]);
+		if (t.duration < 1) {
+			printf("Duration must be greater than zero\n");
+			return (EX_USAGE);
+		}
+	}
 
 	fd = open("/dev/ioat_test", O_RDWR);
 	if (fd < 0) {
@@ -66,5 +120,44 @@ main(int argc, char **argv)
 	(void)ioctl(fd, IOAT_DMATEST, &t);
 	close(fd);
 
-	return (t.status);
+	return (prettyprint(&t));
+}
+
+static int
+prettyprint(struct ioat_test *t)
+{
+	char bps[10], bytesh[10];
+	uintmax_t bytes;
+
+	if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0 ||
+	    t->status[IOAT_TEST_NO_MEMORY] != 0 ||
+	    t->status[IOAT_TEST_MISCOMPARE] != 0) {
+		printf("Errors:\n");
+		if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0)
+			printf("\tNo DMA engine present: %u\n",
+			    (unsigned)t->status[IOAT_TEST_NO_DMA_ENGINE]);
+		if (t->status[IOAT_TEST_NO_MEMORY] != 0)
+			printf("\tOut of memory: %u\n",
+			    (unsigned)t->status[IOAT_TEST_NO_MEMORY]);
+		if (t->status[IOAT_TEST_MISCOMPARE] != 0)
+			printf("\tMiscompares: %u\n",
+			    (unsigned)t->status[IOAT_TEST_MISCOMPARE]);
+	}
+
+	printf("Processed %u txns\n", (unsigned)t->status[IOAT_TEST_OK] /
+	    t->chain_depth);
+	bytes = (uintmax_t)t->buffer_size * t->status[IOAT_TEST_OK];
+
+	humanize_number(bytesh, sizeof(bytesh), (int64_t)bytes, "B",
+	    HN_AUTOSCALE, HN_DECIMAL);
+	if (t->duration) {
+		humanize_number(bps, sizeof(bps),
+		    (int64_t)1000 * bytes / t->duration, "B/s", HN_AUTOSCALE,
+		    HN_DECIMAL);
+		printf("%ju (%s) copied in %u ms (%s)\n", bytes, bytesh,
+		    (unsigned)t->duration, bps);
+	} else
+		printf("%ju (%s) copied\n", bytes, bytesh);
+
+	return (EX_OK);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201510220438.t9M4c67v041733>