Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Jun 2013 23:52:40 +0000 (UTC)
From:      Jim Harris <jimharris@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r252222 - in stable/9: etc/mtree include sbin sbin/nvmecontrol sys/amd64/conf sys/conf sys/dev/nvd sys/dev/nvme sys/i386/conf sys/modules sys/modules/nvme
Message-ID:  <201306252352.r5PNqecx003637@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jimharris
Date: Tue Jun 25 23:52:39 2013
New Revision: 252222
URL: http://svnweb.freebsd.org/changeset/base/252222

Log:
  240618, 240621, 240633, 240671, 240672, 240697, 240700, 241433,
  241434, 241657, 241658, 241659, 241660, 241661, 241662, 241663,
  241664, 241665, 241689, 242420, 243951, 244410, 244411, 244413,
  244549, 245136, 247963, 248729, 248730, 248731, 248732, 248733,
  248734, 248735, 248736, 248737, 248738, 248739, 248740, 248741,
  248746, 248747, 248748, 248749, 248754, 248755, 248756, 248757,
  248758, 248759, 248760, 248761, 248762, 248763, 248764, 248765,
  248766, 248767, 248768, 248769, 248770, 248771, 248772, 248773,
  248780, 248834, 248835, 248913, 248977, 249067, 249416, 249417,
  249418, 249419, 249420, 249421, 249422, 249432

Deleted:
  stable/9/sys/dev/nvme/nvme_uio.c
Modified:
  stable/9/etc/mtree/BSD.include.dist
  stable/9/include/Makefile
  stable/9/sbin/Makefile.amd64
  stable/9/sbin/Makefile.i386
  stable/9/sbin/nvmecontrol/nvmecontrol.8
  stable/9/sbin/nvmecontrol/nvmecontrol.c
  stable/9/sys/amd64/conf/NOTES
  stable/9/sys/conf/files.amd64
  stable/9/sys/conf/files.i386
  stable/9/sys/dev/nvd/nvd.c
  stable/9/sys/dev/nvme/nvme.c
  stable/9/sys/dev/nvme/nvme.h
  stable/9/sys/dev/nvme/nvme_ctrlr.c
  stable/9/sys/dev/nvme/nvme_ctrlr_cmd.c
  stable/9/sys/dev/nvme/nvme_ns.c
  stable/9/sys/dev/nvme/nvme_ns_cmd.c
  stable/9/sys/dev/nvme/nvme_private.h
  stable/9/sys/dev/nvme/nvme_qpair.c
  stable/9/sys/dev/nvme/nvme_sysctl.c
  stable/9/sys/dev/nvme/nvme_test.c
  stable/9/sys/i386/conf/NOTES
  stable/9/sys/modules/Makefile
  stable/9/sys/modules/nvme/Makefile
Directory Properties:
  stable/9/etc/   (props changed)
  stable/9/etc/mtree/   (props changed)
  stable/9/include/   (props changed)
  stable/9/sbin/   (props changed)
  stable/9/sbin/nvmecontrol/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/dev/   (props changed)
  stable/9/sys/modules/   (props changed)

Modified: stable/9/etc/mtree/BSD.include.dist
==============================================================================
--- stable/9/etc/mtree/BSD.include.dist	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/etc/mtree/BSD.include.dist	Tue Jun 25 23:52:39 2013	(r252222)
@@ -126,6 +126,8 @@
             mpilib
             ..
         ..
+        nvme
+        ..
         ofw
         ..
         pbio

Modified: stable/9/include/Makefile
==============================================================================
--- stable/9/include/Makefile	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/include/Makefile	Tue Jun 25 23:52:39 2013	(r252222)
@@ -44,8 +44,8 @@ LDIRS=	bsm cam geom net net80211 netatal
 LSUBDIRS=	cam/ata cam/scsi \
 	dev/acpica dev/agp dev/an dev/bktr dev/ciss dev/filemon dev/firewire \
 	dev/hwpmc \
-	dev/ic dev/iicbus ${_dev_ieee488} dev/io dev/lmc dev/mfi dev/ofw \
-	dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/smbus \
+	dev/ic dev/iicbus ${_dev_ieee488} dev/io dev/lmc dev/mfi dev/nvme \
+	dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/smbus \
 	dev/speaker dev/usb dev/utopia dev/vkbd dev/wi \
 	fs/devfs fs/fdescfs fs/fifofs fs/msdosfs fs/nfs fs/ntfs fs/nullfs \
 	${_fs_nwfs} fs/portalfs fs/procfs fs/smbfs fs/udf fs/unionfs \

Modified: stable/9/sbin/Makefile.amd64
==============================================================================
--- stable/9/sbin/Makefile.amd64	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sbin/Makefile.amd64	Tue Jun 25 23:52:39 2013	(r252222)
@@ -2,3 +2,4 @@
 
 SUBDIR += bsdlabel
 SUBDIR += fdisk
+SUBDIR += nvmecontrol

Modified: stable/9/sbin/Makefile.i386
==============================================================================
--- stable/9/sbin/Makefile.i386	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sbin/Makefile.i386	Tue Jun 25 23:52:39 2013	(r252222)
@@ -2,4 +2,5 @@
 
 SUBDIR += bsdlabel
 SUBDIR += fdisk
+SUBDIR += nvmecontrol
 SUBDIR += sconfig

Modified: stable/9/sbin/nvmecontrol/nvmecontrol.8
==============================================================================
--- stable/9/sbin/nvmecontrol/nvmecontrol.8	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sbin/nvmecontrol/nvmecontrol.8	Tue Jun 25 23:52:39 2013	(r252222)
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 17, 2012
+.Dd March 26, 2013
 .Dt NVMECONTROL 8
 .Os
 .Sh NAME
@@ -54,7 +54,10 @@
 .Op Fl p
 .Aq Fl s Ar size_in_bytes
 .Aq Fl t Ar time_in_sec
-.Aq device id
+.Aq namespace id
+.Nm
+.Ic reset
+.Aq controller id
 .Sh DESCRIPTION
 NVM Express (NVMe) is a storage protocol standard, for SSDs and other
 high-speed storage devices over PCI Express.
@@ -62,6 +65,7 @@ high-speed storage devices over PCI Expr
 .Dl nvmecontrol devlist
 .Pp
 Display a list of NVMe controllers and namespaces along with their device nodes.
+.Pp
 .Dl nvmecontrol identify nvme0
 .Pp
 Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data.
@@ -77,6 +81,9 @@ Run a performance test on nvme0ns1 using
 thread will issue a single 512 byte read command.  Results are printed to
 stdout when 30 seconds expires.
 .Pp
+.Dl nvmecontrol reset nvme0
+.Pp
+Perform a controller-level reset of the nvme0 controller.
 .Sh AUTHORS
 .An -nosplit
 .Nm

Modified: stable/9/sbin/nvmecontrol/nvmecontrol.c
==============================================================================
--- stable/9/sbin/nvmecontrol/nvmecontrol.c	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sbin/nvmecontrol/nvmecontrol.c	Tue Jun 25 23:52:39 2013	(r252222)
@@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$");
 "                            <-i intr|wait> [-f refthread] [-p]\n"	       \
 "                            <namespace id>\n"
 
+#define RESET_USAGE							       \
+"       nvmecontrol reset <controller id>\n"
+
 static void perftest_usage(void);
 
 static void
@@ -64,6 +67,7 @@ usage(void)
 	fprintf(stderr, "usage:\n");
 	fprintf(stderr, DEVLIST_USAGE);
 	fprintf(stderr, IDENTIFY_USAGE);
+	fprintf(stderr, RESET_USAGE);
 	fprintf(stderr, PERFTEST_USAGE);
 	exit(EX_USAGE);
 }
@@ -206,6 +210,53 @@ ns_get_sector_size(struct nvme_namespace
 	return (1 << nsdata->lbaf[0].lbads);
 }
 
+static void
+read_controller_data(int fd, struct nvme_controller_data *cdata)
+{
+	struct nvme_pt_command	pt;
+
+	memset(&pt, 0, sizeof(pt));
+	pt.cmd.opc = NVME_OPC_IDENTIFY;
+	pt.cmd.cdw10 = 1;
+	pt.buf = cdata;
+	pt.len = sizeof(*cdata);
+	pt.is_read = 1;
+
+	if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) {
+		printf("Identify request failed. errno=%d (%s)\n",
+		    errno, strerror(errno));
+		exit(EX_IOERR);
+	}
+
+	if (nvme_completion_is_error(&pt.cpl)) {
+		printf("Passthrough command returned error.\n");
+		exit(EX_IOERR);
+	}
+}
+
+static void
+read_namespace_data(int fd, int nsid, struct nvme_namespace_data *nsdata)
+{
+	struct nvme_pt_command	pt;
+
+	memset(&pt, 0, sizeof(pt));
+	pt.cmd.opc = NVME_OPC_IDENTIFY;
+	pt.cmd.nsid = nsid;
+	pt.buf = nsdata;
+	pt.len = sizeof(*nsdata);
+	pt.is_read = 1;
+
+	if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) {
+		printf("Identify request failed. errno=%d (%s)\n",
+		    errno, strerror(errno));
+		exit(EX_IOERR);
+	}
+
+	if (nvme_completion_is_error(&pt.cpl)) {
+		printf("Passthrough command returned error.\n");
+		exit(EX_IOERR);
+	}
+}
 
 static void
 devlist(int argc, char *argv[])
@@ -241,34 +292,18 @@ devlist(int argc, char *argv[])
 
 		fd = open(path, O_RDWR);
 		if (fd < 0) {
-			printf("Could not open %s.\n", path);
+			printf("Could not open %s. errno=%d (%s)\n", path,
+			    errno, strerror(errno));
 			exit_code = EX_NOPERM;
 			continue;
 		}
 
-		if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) {
-			printf("ioctl to %s failed.\n", path);
-			exit_code = EX_IOERR;
-			continue;
-		}
-
+		read_controller_data(fd, &cdata);
 		printf("%6s: %s\n", name, cdata.mn);
 
 		for (i = 0; i < cdata.nn; i++) {
 			sprintf(name, "nvme%dns%d", ctrlr, i+1);
-			sprintf(path, "/dev/%s", name);
-
-			fd = open(path, O_RDWR);
-			if (fd < 0) {
-				printf("Could not open %s.\n", path);
-				exit_code = EX_NOPERM;
-				continue;
-			}
-			if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) == -1) {
-				printf("ioctl to %s failed.\n", path);
-				exit_code = EX_IOERR;
-				continue;
-			}
+			read_namespace_data(fd, i+1, &nsdata);
 			printf("  %10s (%lldGB)\n",
 				name,
 				nsdata.nsze *
@@ -307,21 +342,20 @@ identify_ctrlr(int argc, char *argv[])
 
 	sprintf(path, "/dev/%s", argv[optind]);
 
-	if (stat(path, &devstat) != 0) {
-		printf("Invalid device node '%s'.\n", path);
+	if (stat(path, &devstat) < 0) {
+		printf("Invalid device node %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
 		exit(EX_IOERR);
 	}
 
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
-		printf("Could not open %s.\n", path);
+		printf("Could not open %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
 		exit(EX_NOPERM);
 	}
 
-	if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) {
-		printf("ioctl to %s failed.\n", path);
-		exit(EX_IOERR);
-	}
+	read_controller_data(fd, &cdata);
 
 	if (hexflag == 1) {
 		if (verboseflag == 1)
@@ -348,7 +382,8 @@ identify_ns(int argc, char *argv[])
 	struct nvme_namespace_data	nsdata;
 	struct stat			devstat;
 	char				path[64];
-	int				ch, fd, hexflag = 0, hexlength;
+	char				*nsloc;
+	int				ch, fd, hexflag = 0, hexlength, nsid;
 	int				verboseflag = 0;
 
 	while ((ch = getopt(argc, argv, "vx")) != -1) {
@@ -364,23 +399,55 @@ identify_ns(int argc, char *argv[])
 		}
 	}
 
+	/*
+	 * Check if the specified device node exists before continuing.
+	 *  This is a cleaner check for cases where the correct controller
+	 *  is specified, but an invalid namespace on that controller.
+	 */
 	sprintf(path, "/dev/%s", argv[optind]);
+	if (stat(path, &devstat) < 0) {
+		printf("Invalid device node %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
+		exit(EX_IOERR);
+	}
+
+	nsloc = strstr(argv[optind], "ns");
+	if (nsloc == NULL) {
+		printf("Invalid namepsace %s.\n", argv[optind]);
+		exit(EX_IOERR);
+	}
+
+	/*
+	 * Pull the namespace id from the string. +2 skips past the "ns" part
+	 *  of the string.
+	 */
+	nsid = strtol(nsloc + 2, NULL, 10);
+	if (nsid == 0 && errno != 0) {
+		printf("Invalid namespace ID %s.\n", argv[optind]);
+		exit(EX_IOERR);
+	}
 
-	if (stat(path, &devstat) != 0) {
-		printf("Invalid device node '%s'.\n", path);
+	/*
+	 * We send IDENTIFY commands to the controller, not the namespace,
+	 *  since it is an admin cmd.  So the path should only include the
+	 *  nvmeX part of the nvmeXnsY string.
+	 */
+	sprintf(path, "/dev/");
+	strncat(path, argv[optind], nsloc - argv[optind]);
+	if (stat(path, &devstat) < 0) {
+		printf("Invalid device node %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
 		exit(EX_IOERR);
 	}
 
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
-		printf("Could not open %s.\n", path);
+		printf("Could not open %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
 		exit(EX_NOPERM);
 	}
 
-	if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) == -1) {
-		printf("ioctl to %s failed.\n", path);
-		exit(EX_IOERR);
-	}
+	read_namespace_data(fd, nsid, &nsdata);
 
 	if (hexflag == 1) {
 		if (verboseflag == 1)
@@ -423,7 +490,7 @@ identify(int argc, char *argv[])
 	optind = 1;
 
 	/*
-	 * If devicde node contains "ns", we consider it a namespace,
+	 * If device node contains "ns", we consider it a namespace,
 	 *  otherwise, consider it a controller.
 	 */
 	if (strstr(target, "ns") == NULL)
@@ -475,7 +542,7 @@ perftest(int argc, char *argv[])
 	char				path[64];
 	u_long				ioctl_cmd = NVME_IO_TEST;
 	bool				nflag, oflag, sflag, tflag;
-	int				err, perthread = 0;
+	int				perthread = 0;
 
 	nflag = oflag = sflag = tflag = false;
 	name = NULL;
@@ -565,14 +632,14 @@ perftest(int argc, char *argv[])
 
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
-		fprintf(stderr, "%s not valid device.\n", path);
+		fprintf(stderr, "%s not valid device. errno=%d (%s)\n", path,
+		    errno, strerror(errno));
 		perftest_usage();
 	}
 
-	err = ioctl(fd, ioctl_cmd, &io_test);
-
-	if (err) {
-		fprintf(stderr, "NVME_IO_TEST returned %d\n", errno);
+	if (ioctl(fd, ioctl_cmd, &io_test) < 0) {
+		fprintf(stderr, "NVME_IO_TEST failed. errno=%d (%s)\n", errno,
+		    strerror(errno));
 		exit(EX_IOERR);
 	}
 
@@ -580,6 +647,44 @@ perftest(int argc, char *argv[])
 	exit(EX_OK);
 }
 
+static void
+reset_ctrlr(int argc, char *argv[])
+{
+	struct stat			devstat;
+	char				path[64];
+	int				ch, fd;
+
+	while ((ch = getopt(argc, argv, "")) != -1) {
+		switch ((char)ch) {
+		default:
+			usage();
+		}
+	}
+
+	sprintf(path, "/dev/%s", argv[optind]);
+
+	if (stat(path, &devstat) < 0) {
+		printf("Invalid device node %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
+		exit(EX_IOERR);
+	}
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		printf("Could not open %s. errno=%d (%s)\n", path, errno,
+		    strerror(errno));
+		exit(EX_NOPERM);
+	}
+
+	if (ioctl(fd, NVME_RESET_CONTROLLER) < 0) {
+		printf("Reset request to %s failed. errno=%d (%s)\n", path,
+		    errno, strerror(errno));
+		exit(EX_IOERR);
+	}
+
+	exit(EX_OK);
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -593,6 +698,8 @@ main(int argc, char *argv[])
 		identify(argc-1, &argv[1]);
 	else if (strcmp(argv[1], "perftest") == 0)
 		perftest(argc-1, &argv[1]);
+	else if (strcmp(argv[1], "reset") == 0)
+		reset_ctrlr(argc-1, &argv[1]);
 
 	usage();
 

Modified: stable/9/sys/amd64/conf/NOTES
==============================================================================
--- stable/9/sys/amd64/conf/NOTES	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sys/amd64/conf/NOTES	Tue Jun 25 23:52:39 2013	(r252222)
@@ -433,6 +433,11 @@ device		isci
 options		ISCI_LOGGING	# enable debugging in isci HAL
 
 #
+# NVM Express (NVMe) support
+device         nvme    # base NVMe driver
+device         nvd     # expose NVMe namespaces as disks, depends on nvme
+
+#
 # SafeNet crypto driver: can be moved to the MI NOTES as soon as
 # it's tested on a big-endian machine
 #

Modified: stable/9/sys/conf/files.amd64
==============================================================================
--- stable/9/sys/conf/files.amd64	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sys/conf/files.amd64	Tue Jun 25 23:52:39 2013	(r252222)
@@ -213,7 +213,16 @@ dev/kbd/kbd.c			optional	atkbd | sc | uk
 dev/lindev/full.c		optional	lindev
 dev/lindev/lindev.c		optional	lindev
 dev/nfe/if_nfe.c		optional	nfe pci
+dev/nvd/nvd.c			optional	nvd nvme
 dev/nve/if_nve.c		optional	nve pci
+dev/nvme/nvme.c			optional	nvme
+dev/nvme/nvme_ctrlr.c		optional	nvme
+dev/nvme/nvme_ctrlr_cmd.c	optional	nvme
+dev/nvme/nvme_ns.c		optional	nvme
+dev/nvme/nvme_ns_cmd.c		optional	nvme
+dev/nvme/nvme_qpair.c		optional	nvme
+dev/nvme/nvme_sysctl.c		optional	nvme
+dev/nvme/nvme_test.c		optional	nvme
 dev/nvram/nvram.c		optional	nvram isa
 dev/random/ivy.c		optional	random rdrand_rng
 dev/random/nehemiah.c		optional	random padlock_rng

Modified: stable/9/sys/conf/files.i386
==============================================================================
--- stable/9/sys/conf/files.i386	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sys/conf/files.i386	Tue Jun 25 23:52:39 2013	(r252222)
@@ -222,7 +222,16 @@ dev/lindev/lindev.c		optional lindev
 dev/mse/mse.c			optional mse
 dev/mse/mse_isa.c		optional mse isa
 dev/nfe/if_nfe.c		optional nfe pci
+dev/nvd/nvd.c			optional nvd nvme
 dev/nve/if_nve.c		optional nve pci
+dev/nvme/nvme.c			optional nvme
+dev/nvme/nvme_ctrlr.c		optional nvme
+dev/nvme/nvme_ctrlr_cmd.c	optional nvme
+dev/nvme/nvme_ns.c		optional nvme
+dev/nvme/nvme_ns_cmd.c		optional nvme
+dev/nvme/nvme_qpair.c		optional nvme
+dev/nvme/nvme_sysctl.c		optional nvme
+dev/nvme/nvme_test.c		optional nvme
 dev/nvram/nvram.c		optional nvram isa
 dev/pcf/pcf_isa.c		optional pcf
 dev/random/ivy.c		optional random rdrand_rng

Modified: stable/9/sys/dev/nvd/nvd.c
==============================================================================
--- stable/9/sys/dev/nvd/nvd.c	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sys/dev/nvd/nvd.c	Tue Jun 25 23:52:39 2013	(r252222)
@@ -45,9 +45,12 @@ struct nvd_disk;
 static disk_ioctl_t nvd_ioctl;
 static disk_strategy_t nvd_strategy;
 
-static void create_geom_disk(void *, struct nvme_namespace *ns);
+static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr);
 static void destroy_geom_disk(struct nvd_disk *ndisk);
 
+static void *nvd_new_controller(struct nvme_controller *ctrlr);
+static void nvd_controller_fail(void *ctrlr);
+
 static int nvd_load(void);
 static void nvd_unload(void);
 
@@ -67,10 +70,18 @@ struct nvd_disk {
 
 	uint32_t		cur_depth;
 
-	TAILQ_ENTRY(nvd_disk)	tailq;
+	TAILQ_ENTRY(nvd_disk)	global_tailq;
+	TAILQ_ENTRY(nvd_disk)	ctrlr_tailq;
+};
+
+struct nvd_controller {
+
+	TAILQ_ENTRY(nvd_controller)	tailq;
+	TAILQ_HEAD(, nvd_disk)		disk_head;
 };
 
-TAILQ_HEAD(, nvd_disk)	nvd_head;
+static TAILQ_HEAD(, nvd_controller)	ctrlr_head;
+static TAILQ_HEAD(disk_list, nvd_disk)	disk_head;
 
 static int nvd_modevent(module_t mod, int type, void *arg)
 {
@@ -104,8 +115,11 @@ static int
 nvd_load()
 {
 
-	TAILQ_INIT(&nvd_head);
-	consumer_handle = nvme_register_consumer(create_geom_disk, NULL);
+	TAILQ_INIT(&ctrlr_head);
+	TAILQ_INIT(&disk_head);
+
+	consumer_handle = nvme_register_consumer(nvd_new_disk,
+	    nvd_new_controller, NULL, nvd_controller_fail);
 
 	return (consumer_handle != NULL ? 0 : -1);
 }
@@ -113,13 +127,20 @@ nvd_load()
 static void
 nvd_unload()
 {
-	struct nvd_disk *nvd;
+	struct nvd_controller	*ctrlr;
+	struct nvd_disk		*disk;
+
+	while (!TAILQ_EMPTY(&ctrlr_head)) {
+		ctrlr = TAILQ_FIRST(&ctrlr_head);
+		TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
+		free(ctrlr, M_NVD);
+	}
 
-	while (!TAILQ_EMPTY(&nvd_head)) {
-		nvd = TAILQ_FIRST(&nvd_head);
-		TAILQ_REMOVE(&nvd_head, nvd, tailq);
-		destroy_geom_disk(nvd);
-		free(nvd, M_NVD);
+	while (!TAILQ_EMPTY(&disk_head)) {
+		disk = TAILQ_FIRST(&disk_head);
+		TAILQ_REMOVE(&disk_head, disk, global_tailq);
+		destroy_geom_disk(disk);
+		free(disk, M_NVD);
 	}
 
 	nvme_unregister_consumer(consumer_handle);
@@ -153,7 +174,7 @@ nvd_ioctl(struct disk *ndisk, u_long cmd
 }
 
 static void
-nvd_done(void *arg, const struct nvme_completion *status)
+nvd_done(void *arg, const struct nvme_completion *cpl)
 {
 	struct bio *bp;
 	struct nvd_disk *ndisk;
@@ -162,14 +183,13 @@ nvd_done(void *arg, const struct nvme_co
 
 	ndisk = bp->bio_disk->d_drv1;
 
-	if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == NVME_QD)
-		taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
+	atomic_add_int(&ndisk->cur_depth, -1);
 
 	/*
 	 * TODO: add more extensive translation of NVMe status codes
 	 *  to different bio error codes (i.e. EIO, EINVAL, etc.)
 	 */
-	if (status->sf_sc || status->sf_sct) {
+	if (nvme_completion_is_error(cpl)) {
 		bp->bio_error = EIO;
 		bp->bio_flags |= BIO_ERROR;
 		bp->bio_resid = bp->bio_bcount;
@@ -187,9 +207,6 @@ nvd_bioq_process(void *arg, int pending)
 	int err;
 
 	for (;;) {
-		if (atomic_load_acq_int(&ndisk->cur_depth) >= NVME_QD)
-			break;
-
 		mtx_lock(&ndisk->bioqlock);
 		bp = bioq_takefirst(&ndisk->bioq);
 		mtx_unlock(&ndisk->bioqlock);
@@ -210,13 +227,13 @@ nvd_bioq_process(void *arg, int pending)
 #endif
 
 		bp->bio_driver1 = NULL;
-		atomic_add_acq_int(&ndisk->cur_depth, 1);
+		atomic_add_int(&ndisk->cur_depth, 1);
 
 		err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
 
 		if (err) {
-			atomic_add_acq_int(&ndisk->cur_depth, -1);
-			bp->bio_error = EIO;
+			atomic_add_int(&ndisk->cur_depth, -1);
+			bp->bio_error = err;
 			bp->bio_flags |= BIO_ERROR;
 			bp->bio_resid = bp->bio_bcount;
 			biodone(bp);
@@ -237,13 +254,28 @@ nvd_bioq_process(void *arg, int pending)
 	}
 }
 
-static void
-create_geom_disk(void *arg, struct nvme_namespace *ns)
+static void *
+nvd_new_controller(struct nvme_controller *ctrlr)
 {
-	struct nvd_disk *ndisk;
-	struct disk *disk;
+	struct nvd_controller	*nvd_ctrlr;
+
+	nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD,
+	    M_ZERO | M_WAITOK);
 
-	ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_NOWAIT);
+	TAILQ_INIT(&nvd_ctrlr->disk_head);
+	TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
+
+	return (nvd_ctrlr);
+}
+
+static void *
+nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
+{
+	struct nvd_disk		*ndisk;
+	struct disk		*disk;
+	struct nvd_controller	*ctrlr = ctrlr_arg;
+
+	ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
 
 	disk = disk_alloc();
 	disk->d_strategy = nvd_strategy;
@@ -255,10 +287,11 @@ create_geom_disk(void *arg, struct nvme_
 	disk->d_sectorsize = nvme_ns_get_sector_size(ns);
 	disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
 
-	if (TAILQ_EMPTY(&nvd_head))
+	if (TAILQ_EMPTY(&disk_head))
 		disk->d_unit = 0;
 	else
-		disk->d_unit = TAILQ_FIRST(&nvd_head)->disk->d_unit + 1;
+		disk->d_unit =
+		    TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1;
 
 	disk->d_flags = 0;
 
@@ -268,6 +301,11 @@ create_geom_disk(void *arg, struct nvme_
 	if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED)
 		disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
 
+/* ifdef used here to ease porting to stable branches at a later point. */
+#ifdef DISKFLAG_UNMAPPED_BIO
+	disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
+#endif
+
 	strlcpy(disk->d_ident, nvme_ns_get_serial_number(ns),
 	    sizeof(disk->d_ident));
 
@@ -290,7 +328,10 @@ create_geom_disk(void *arg, struct nvme_
 	    taskqueue_thread_enqueue, &ndisk->tq);
 	taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq");
 
-	TAILQ_INSERT_HEAD(&nvd_head, ndisk, tailq);
+	TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
+	TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
+
+	return (NULL);
 }
 
 static void
@@ -316,3 +357,22 @@ destroy_geom_disk(struct nvd_disk *ndisk
 
 	mtx_destroy(&ndisk->bioqlock);
 }
+
+static void
+nvd_controller_fail(void *ctrlr_arg)
+{
+	struct nvd_controller	*ctrlr = ctrlr_arg;
+	struct nvd_disk		*disk;
+
+	while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
+		disk = TAILQ_FIRST(&ctrlr->disk_head);
+		TAILQ_REMOVE(&disk_head, disk, global_tailq);
+		TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
+		destroy_geom_disk(disk);
+		free(disk, M_NVD);
+	}
+
+	TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
+	free(ctrlr, M_NVD);
+}
+

Modified: stable/9/sys/dev/nvme/nvme.c
==============================================================================
--- stable/9/sys/dev/nvme/nvme.c	Tue Jun 25 23:30:48 2013	(r252221)
+++ stable/9/sys/dev/nvme/nvme.c	Tue Jun 25 23:52:39 2013	(r252222)
@@ -32,22 +32,33 @@ __FBSDID("$FreeBSD$");
 #include <sys/conf.h>
 #include <sys/module.h>
 
+#include <vm/uma.h>
+
+#include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
 struct nvme_consumer {
-	nvme_consumer_cb_fn_t		cb_fn;
-	void				*cb_arg;
+	uint32_t		id;
+	nvme_cons_ns_fn_t	ns_fn;
+	nvme_cons_ctrlr_fn_t	ctrlr_fn;
+	nvme_cons_async_fn_t	async_fn;
+	nvme_cons_fail_fn_t	fail_fn;
 };
 
 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
+#define	INVALID_CONSUMER_ID	0xFFFF
+
+uma_zone_t	nvme_request_zone;
+int32_t		nvme_retry_count;
 
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
 static int    nvme_probe(device_t);
 static int    nvme_attach(device_t);
 static int    nvme_detach(device_t);
+static int    nvme_modevent(module_t mod, int type, void *arg);
 
 static devclass_t nvme_devclass;
 
@@ -65,7 +76,7 @@ static driver_t nvme_pci_driver = {
 	sizeof(struct nvme_controller),
 };
 
-DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, 0, 0);
+DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent, 0);
 MODULE_VERSION(nvme, 1);
 
 static struct _pcsid
@@ -75,15 +86,19 @@ static struct _pcsid
 } pci_ids[] = {
 	{ 0x01118086,		"NVMe Controller"  },
 	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
-	{ IDT_PCI_ID,		"IDT NVMe Controller"  },
+	{ IDT32_PCI_ID,		"IDT NVMe Controller (32 channel)"  },
+	{ IDT8_PCI_ID,		"IDT NVMe Controller (8 channel)" },
 	{ 0x00000000,		NULL  }
 };
 
 static int
 nvme_probe (device_t device)
 {
-	u_int32_t type = pci_get_devid(device);
-	struct _pcsid *ep = pci_ids;
+	struct _pcsid	*ep;
+	u_int32_t	type;
+
+	type = pci_get_devid(device);
+	ep = pci_ids;
 
 	while (ep->type && ep->type != type)
 		++ep;
@@ -91,11 +106,43 @@ nvme_probe (device_t device)
 	if (ep->desc) {
 		device_set_desc(device, ep->desc);
 		return (BUS_PROBE_DEFAULT);
-	} else
-		return (ENXIO);
+	}
+
+#if defined(PCIS_STORAGE_NVM)
+	if (pci_get_class(device)    == PCIC_STORAGE &&
+	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
+	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
+		device_set_desc(device, "Generic NVMe Device");
+		return (BUS_PROBE_GENERIC);
+	}
+#endif
+
+	return (ENXIO);
 }
 
 static void
+nvme_init(void)
+{
+	uint32_t	i;
+
+	nvme_request_zone = uma_zcreate("nvme_request",
+	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
+
+	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
+		nvme_consumer[i].id = INVALID_CONSUMER_ID;
+}
+
+SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
+
+static void
+nvme_uninit(void)
+{
+	uma_zdestroy(nvme_request_zone);
+}
+
+SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
+
+static void
 nvme_load(void)
 {
 }
@@ -160,24 +207,14 @@ nvme_modevent(module_t mod, int type, vo
 	return (0);
 }
 
-moduledata_t nvme_mod = {
-	"nvme",
-	(modeventhand_t)nvme_modevent,
-	0
-};
-
-DECLARE_MODULE(nvme, nvme_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
-
 void
 nvme_dump_command(struct nvme_command *cmd)
 {
-	printf("opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x "
-	    "mptr:%qx prp1:%qx prp2:%qx cdw:%x %x %x %x %x %x\n",
+	printf(
+"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
 	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
 	    cmd->rsvd2, cmd->rsvd3,
-	    (long long unsigned int)cmd->mptr,
-	    (long long unsigned int)cmd->prp1,
-	    (long long unsigned int)cmd->prp2,
+	    (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2,
 	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
 	    cmd->cdw15);
 }
@@ -188,87 +225,8 @@ nvme_dump_completion(struct nvme_complet
 	printf("cdw0:%08x sqhd:%04x sqid:%04x "
 	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
 	    cpl->cdw0, cpl->sqhd, cpl->sqid,
-	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
-	    cpl->sf_dnr);
-}
-
-void
-nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
-{
-	struct nvme_tracker 	*tr;
-	struct nvme_qpair 	*qpair;
-	struct nvme_prp_list	*prp_list;
-	uint32_t		cur_nseg;
-
-	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
-
-	tr = (struct nvme_tracker *)arg;
-	qpair = tr->qpair;
-
-	/*
-	 * Note that we specified PAGE_SIZE for alignment and max
-	 *  segment size when creating the bus dma tags.  So here
-	 *  we can safely just transfer each segment to its
-	 *  associated PRP entry.
-	 */
-	tr->cmd.prp1 = seg[0].ds_addr;
-
-	if (nseg == 2) {
-		tr->cmd.prp2 = seg[1].ds_addr;
-	} else if (nseg > 2) {
-		KASSERT(tr->prp_list,
-		    ("prp_list needed but not attached to tracker\n"));
-		cur_nseg = 1;
-		prp_list = tr->prp_list;
-		tr->cmd.prp2 = (uint64_t)prp_list->bus_addr;
-		while (cur_nseg < nseg) {
-			prp_list->prp[cur_nseg-1] =
-			    (uint64_t)seg[cur_nseg].ds_addr;
-			cur_nseg++;
-		}
-	}
-
-	nvme_qpair_submit_cmd(qpair, tr);
-}
-
-struct nvme_tracker *
-nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin,
-    nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t payload_size, void *payload)
-{
-	struct nvme_tracker 	*tr;
-	struct nvme_qpair	*qpair;
-	uint32_t 		modulo, offset, num_prps;
-	boolean_t		alloc_prp_list = FALSE;
-
-	if (is_admin) {
-		qpair = &ctrlr->adminq;
-	} else {
-		if (ctrlr->per_cpu_io_queues)
-			qpair = &ctrlr->ioq[curcpu];
-		else
-			qpair = &ctrlr->ioq[0];
-	}
-
-	num_prps = payload_size / PAGE_SIZE;
-	modulo = payload_size % PAGE_SIZE;
-	offset = (uint32_t)((uintptr_t)payload % PAGE_SIZE);
-
-	if (modulo || offset)
-		num_prps += 1 + (modulo + offset - 1) / PAGE_SIZE;
-
-	if (num_prps > 2)
-		alloc_prp_list = TRUE;
-
-	tr = nvme_qpair_allocate_tracker(qpair, alloc_prp_list);
-
-	memset(&tr->cmd, 0, sizeof(tr->cmd));
-
-	tr->qpair = qpair;
-	tr->cb_fn = cb_fn;
-	tr->cb_arg = cb_arg;
-	tr->payload_size = payload_size;
-
-	return (tr);
+	    cpl->cid, cpl->status.p, cpl->status.sc, cpl->status.sct,
+	    cpl->status.m, cpl->status.dnr);
 }
 
 static int
@@ -287,15 +245,17 @@ nvme_attach(device_t dev)
 	 *  to cc.en==0.  This is because we don't really know what status
 	 *  the controller was left in when boot handed off to OS.
 	 */
-	status = nvme_ctrlr_reset(ctrlr);
+	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0)
 		return (status);
 
-	status = nvme_ctrlr_reset(ctrlr);
+	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0)
 		return (status);
 
-	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
+	nvme_sysctl_initialize_ctrlr(ctrlr);
+
+	ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook;
 	ctrlr->config_hook.ich_arg = ctrlr;
 
 	config_intrhook_establish(&ctrlr->config_hook);
@@ -307,77 +267,75 @@ static int
 nvme_detach (device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
-	struct nvme_namespace	*ns;
-	int			i;
-
-	if (ctrlr->taskqueue) {
-		taskqueue_drain(ctrlr->taskqueue, &ctrlr->task);
-		taskqueue_free(ctrlr->taskqueue);
-	}
-
-	for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
-		ns = &ctrlr->ns[i];
-		if (ns->cdev)
-			destroy_dev(ns->cdev);
-	}
-
-	if (ctrlr->cdev)
-		destroy_dev(ctrlr->cdev);
-
-	for (i = 0; i < ctrlr->num_io_queues; i++) {
-		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
-	}
-
-	free(ctrlr->ioq, M_NVME);
-
-	nvme_admin_qpair_destroy(&ctrlr->adminq);
-
-	if (ctrlr->resource != NULL) {
-		bus_release_resource(dev, SYS_RES_MEMORY,
-		    ctrlr->resource_id, ctrlr->resource);
-	}
-
-#ifdef CHATHAM2
-	if (ctrlr->chatham_resource != NULL) {
-		bus_release_resource(dev, SYS_RES_MEMORY,
-		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
-	}
-#endif
-
-	if (ctrlr->tag)
-		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
-
-	if (ctrlr->res)
-		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
-		    rman_get_rid(ctrlr->res), ctrlr->res);
-
-	if (ctrlr->msix_enabled)
-		pci_release_msi(dev);
 
+	nvme_ctrlr_destruct(ctrlr, dev);
 	return (0);
 }
 
 static void
-nvme_notify_consumer(struct nvme_consumer *consumer)
+nvme_notify_consumer(struct nvme_consumer *cons)
 {
 	device_t		*devlist;
 	struct nvme_controller	*ctrlr;
-	int			dev, ns, devcount;
+	struct nvme_namespace	*ns;
+	void			*ctrlr_cookie;
+	int			dev_idx, ns_idx, devcount;
 
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
 		return;
 
-	for (dev = 0; dev < devcount; dev++) {
-		ctrlr = DEVICE2SOFTC(devlist[dev]);
-		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
-			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
+	for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
+		ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
+		if (cons->ctrlr_fn != NULL)
+			ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
+		else
+			ctrlr_cookie = NULL;
+		ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
+		for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306252352.r5PNqecx003637>