Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Feb 2018 05:44:01 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r329816 - in head/sys: cam/nvme dev/nvme
Message-ID:  <201802220544.w1M5i1JB069042@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Thu Feb 22 05:44:00 2018
New Revision: 329816
URL: https://svnweb.freebsd.org/changeset/base/329816

Log:
  Combine BIO_DELETE requests for nda devices
  
  Now that we're queueing BIO_DELETE requests in the CAM I/O scheduler,
  it make sense to try to combine as many as possible into a single
  request to send down to hardware. Hopefully, lots of larger requests
  like this are better than lots of individual transactions.
  
  Note for future: need to limit based on total size of the trim
  request. Should also collapse adjacent ranges where possible to
  increase the size of the max payload.
  
  Sponsored by: Netflix

Modified:
  head/sys/cam/nvme/nvme_da.c
  head/sys/dev/nvme/nvme.h

Modified: head/sys/cam/nvme/nvme_da.c
==============================================================================
--- head/sys/cam/nvme/nvme_da.c	Thu Feb 22 05:43:55 2018	(r329815)
+++ head/sys/cam/nvme/nvme_da.c	Thu Feb 22 05:44:00 2018	(r329816)
@@ -122,6 +122,14 @@ struct nda_softc {
 #endif
 };
 
+struct nda_trim_request {
+	union {
+		struct nvme_dsm_range dsm;
+		uint8_t		data[NVME_MAX_DSM_TRIM];
+	} u;
+	TAILQ_HEAD(, bio) bps;
+};
+
 /* Need quirk table */
 
 static	disk_strategy_t	ndastrategy;
@@ -150,11 +158,14 @@ static void		ndasuspend(void *arg);
 #ifndef	NDA_DEFAULT_RETRY
 #define	NDA_DEFAULT_RETRY	4
 #endif
+#ifndef NDA_MAX_TRIM_ENTRIES
+#define NDA_MAX_TRIM_ENTRIES 256	/* Number of DSM trims to use, max 256 */
+#endif
 
-
 //static int nda_retry_count = NDA_DEFAULT_RETRY;
 static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
 static int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
+static int nda_max_trim_entries = NDA_MAX_TRIM_ENTRIES;
 
 /*
  * All NVMe media is non-rotational, so all nvme device instances
@@ -895,22 +906,40 @@ ndastart(struct cam_periph *periph, union ccb *start_c
 		}
 		case BIO_DELETE:
 		{
-			struct nvme_dsm_range *dsm_range;
+			struct nvme_dsm_range *dsm_range, *dsm_end;
+			struct nda_trim_request *trim;
+			struct bio *bp1;
+			int ents;
 
-			dsm_range =
-			    malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_NOWAIT);
-			if (dsm_range == NULL) {
+			trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT);
+			if (trim == NULL) {
 				biofinish(bp, NULL, ENOMEM);
 				xpt_release_ccb(start_ccb);
 				ndaschedule(periph);
 				return;
 			}
-			dsm_range->length =
-			    bp->bio_bcount / softc->disk->d_sectorsize;
-			dsm_range->starting_lba =
-			    bp->bio_offset / softc->disk->d_sectorsize;
-			bp->bio_driver2 = dsm_range;
-			nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1);
+			TAILQ_INIT(&trim->bps);
+			bp1 = bp;
+			ents = sizeof(trim->u.data) / sizeof(struct nvme_dsm_range);
+			ents = min(ents, nda_max_trim_entries);
+			dsm_range = &trim->u.dsm;
+			dsm_end = dsm_range + ents;
+			do {
+				TAILQ_INSERT_TAIL(&trim->bps, bp1, bio_queue);
+				dsm_range->length =
+				    bp1->bio_bcount / softc->disk->d_sectorsize;
+				dsm_range->starting_lba =
+				    bp1->bio_offset / softc->disk->d_sectorsize;
+				dsm_range++;
+				if (dsm_range >= dsm_end)
+					break;
+				bp1 = cam_iosched_next_trim(softc->cam_iosched);
+				/* XXX -- Could collapse adjacent ranges, but we don't for now */
+				/* XXX -- Could limit based on total payload size */
+			} while (bp1 != NULL);
+			bp->bio_driver2 = trim;
+			nda_nvme_trim(softc, &start_ccb->nvmeio, &trim->u.dsm,
+			    dsm_range - &trim->u.dsm);
 			start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM;
 			start_ccb->ccb_h.flags |= CAM_UNLOCKED;
 			/*
@@ -991,8 +1020,6 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 		} else {
 			bp->bio_resid = 0;
 		}
-		if (state == NDA_CCB_TRIM)
-			free(bp->bio_driver2, M_NVMEDA);
 		softc->outstanding_cmds--;
 
 		/*
@@ -1004,13 +1031,15 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 		cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
 		xpt_release_ccb(done_ccb);
 		if (state == NDA_CCB_TRIM) {
-#ifdef notyet
-			TAILQ_HEAD(, bio) queue;
+			struct nda_trim_request *trim;
 			struct bio *bp1;
+			TAILQ_HEAD(, bio) queue;
 
+			trim = bp->bio_driver2;
 			TAILQ_INIT(&queue);
-			TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue);
-#endif
+			TAILQ_CONCAT(&queue, &trim->bps, bio_queue);
+			free(trim, M_NVMEDA);
+
 			/*
 			 * Since we can have multiple trims in flight, we don't
 			 * need to call this here.
@@ -1018,8 +1047,6 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 			 */
 			ndaschedule(periph);
 			cam_periph_unlock(periph);
-#ifdef notyet
-/* Not yet collapsing several BIO_DELETE requests into one TRIM */
 			while ((bp1 = TAILQ_FIRST(&queue)) != NULL) {
 				TAILQ_REMOVE(&queue, bp1, bio_queue);
 				bp1->bio_error = error;
@@ -1030,9 +1057,6 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 					bp1->bio_resid = 0;
 				biodone(bp1);
 			}
-#else
-			biodone(bp);
-#endif
 		} else {
 			ndaschedule(periph);
 			cam_periph_unlock(periph);

Modified: head/sys/dev/nvme/nvme.h
==============================================================================
--- head/sys/dev/nvme/nvme.h	Thu Feb 22 05:43:55 2018	(r329815)
+++ head/sys/dev/nvme/nvme.h	Thu Feb 22 05:44:00 2018	(r329816)
@@ -59,6 +59,9 @@
 /* Cap nvme to 1MB transfers driver explodes with larger sizes */
 #define NVME_MAX_XFER_SIZE		(MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
 
+/* Largest DSM Trim that can be done */
+#define NVME_MAX_DSM_TRIM		4096
+
 union cap_lo_register {
 	uint32_t	raw;
 	struct {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802220544.w1M5i1JB069042>