Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 25 Jan 2016 09:40:26 +0000 (UTC)
From:      Steven Hartland <smh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r294711 - in stable/10/sys/dev: nvd nvme
Message-ID:  <201601250940.u0P9eQTj029824@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: smh
Date: Mon Jan 25 09:40:25 2016
New Revision: 294711
URL: https://svnweb.freebsd.org/changeset/base/294711

Log:
  MFC r292074:
  
  Limit stripesize reported from nvd(4) to 4K
  
  Sponsored by:	Multiplay

Modified:
  stable/10/sys/dev/nvd/nvd.c
  stable/10/sys/dev/nvme/nvme.h
  stable/10/sys/dev/nvme/nvme_ns.c
  stable/10/sys/dev/nvme/nvme_sysctl.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/nvd/nvd.c
==============================================================================
--- stable/10/sys/dev/nvd/nvd.c	Mon Jan 25 09:31:32 2016	(r294710)
+++ stable/10/sys/dev/nvd/nvd.c	Mon Jan 25 09:40:25 2016	(r294711)
@@ -295,7 +295,7 @@ nvd_new_disk(struct nvme_namespace *ns, 
 	disk->d_sectorsize = nvme_ns_get_sector_size(ns);
 	disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
 	disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns);
-	disk->d_stripesize = nvme_ns_get_stripesize(ns);
+	disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns);
 
 	if (TAILQ_EMPTY(&disk_head))
 		disk->d_unit = 0;

Modified: stable/10/sys/dev/nvme/nvme.h
==============================================================================
--- stable/10/sys/dev/nvme/nvme.h	Mon Jan 25 09:31:32 2016	(r294710)
+++ stable/10/sys/dev/nvme/nvme.h	Mon Jan 25 09:40:25 2016	(r294711)
@@ -870,6 +870,7 @@ const char *	nvme_ns_get_serial_number(s
 const char *	nvme_ns_get_model_number(struct nvme_namespace *ns);
 const struct nvme_namespace_data *
 		nvme_ns_get_data(struct nvme_namespace *ns);
+uint32_t	nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns);
 uint32_t	nvme_ns_get_stripesize(struct nvme_namespace *ns);
 
 int	nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,

Modified: stable/10/sys/dev/nvme/nvme_ns.c
==============================================================================
--- stable/10/sys/dev/nvme/nvme_ns.c	Mon Jan 25 09:31:32 2016	(r294710)
+++ stable/10/sys/dev/nvme/nvme_ns.c	Mon Jan 25 09:40:25 2016	(r294711)
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
 
 #include "nvme_private.h"
 
+extern int		nvme_max_optimal_sectorsize;
+
 static void		nvme_bio_child_inbed(struct bio *parent, int bio_error);
 static void		nvme_bio_child_done(void *arg,
 					    const struct nvme_completion *cpl);
@@ -217,6 +219,22 @@ nvme_ns_get_stripesize(struct nvme_names
 	return (ns->stripesize);
 }
 
+uint32_t
+nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns)
+{
+	uint32_t stripesize;
+
+	stripesize = nvme_ns_get_stripesize(ns);
+
+	if (stripesize == 0)
+		return nvme_ns_get_sector_size(ns);
+		
+	if (nvme_max_optimal_sectorsize == 0) 
+		return (stripesize);
+
+	return (MIN(stripesize, nvme_max_optimal_sectorsize));
+}
+
 static void
 nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
 {

Modified: stable/10/sys/dev/nvme/nvme_sysctl.c
==============================================================================
--- stable/10/sys/dev/nvme/nvme_sysctl.c	Mon Jan 25 09:31:32 2016	(r294710)
+++ stable/10/sys/dev/nvme/nvme_sysctl.c	Mon Jan 25 09:40:25 2016	(r294711)
@@ -33,6 +33,22 @@ __FBSDID("$FreeBSD$");
 
 #include "nvme_private.h"
 
+SYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express");
+/*
+ * Intel NVMe controllers have a slow path for I/Os that span a 128KB
+ * stripe boundary but ZFS limits ashift, which is derived from
+ * d_stripesize, to 13 (8KB) so we limit the stripesize reported to
+ * geom(8) to 4KB by default.
+ *
+ * This may result in a small number of additional I/Os to require
+ * splitting in nvme(4), however the NVMe I/O path is very efficient
+ * so these additional I/Os will cause very minimal (if any) difference
+ * in performance or CPU utilisation.
+ */
+int nvme_max_optimal_sectorsize = 1<<12;
+SYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN,
+    &nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported");
+
 /*
  * CTLTYPE_S64 and sysctl_handle_64 were added in r217616.  Define these
  *  explicitly here for older kernels that don't include the r217616



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601250940.u0P9eQTj029824>