Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 1 Mar 2019 02:16:37 +0000 (UTC)
From:      Ben Widawsky <bwidawsk@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r344680 - in stable/12/sys: dev/nvdimm modules/nvdimm
Message-ID:  <201903010216.x212GbqH058994@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bwidawsk
Date: Fri Mar  1 02:16:37 2019
New Revision: 344680
URL: https://svnweb.freebsd.org/changeset/base/344680

Log:
  MFC r344474-r344476
  
  r344474:
  nvdimm: split spa dev into a separate entity
  
  Separate code for exposing a device backed by a system physical
  address range away from the NVDIMM spa code. This will allow a
  future patch to add support for NVDIMM namespaces while using the
  same device code.
  
  Submitted by:	D Scott Phillips <d.scott.phillips@intel.com>
  Reviewed by:	bwidawsk
  Sponsored by:	Intel Corporation
  Differential Revision:	https://reviews.freebsd.org/D18736
  
  r344475:
  nvdimm: Read NVDIMM namespace labels
  
  When attaching to NVDIMM devices, read and verify the namespace
  labels from the special namespace label storage area. A later
  change will expose NVDIMM namespaces derived from this label data.
  
  Submitted by:	D Scott Phillips <d.scott.phillips@intel.com>
  Discussed with:	kib
  Sponsored by:	Intel Corporation
  Differential Revision:	https://reviews.freebsd.org/D18735
  
  r344476:
  nvdimm: Simple namespace support
  
  Add support for simple NVDIMM v1.2 namespaces from the UEFI
  version 2.7 specification. The combination of NVDIMM regions and
  labels can lead to a wide variety of namespace layouts. Here we
  support a simple subset of namespaces where each NVDIMM SPA range
  is composed of a single region per member dimm.
  
  Submitted by:	D Scott Phillips <d.scott.phillips@intel.com>
  Discussed with:	kib
  Sponsored by:	Intel Corporation
  Differential Revision:	https://reviews.freebsd.org/D18736

Added:
  stable/12/sys/dev/nvdimm/nvdimm_ns.c
     - copied unchanged from r344476, head/sys/dev/nvdimm/nvdimm_ns.c
Modified:
  stable/12/sys/dev/nvdimm/nvdimm.c
  stable/12/sys/dev/nvdimm/nvdimm_spa.c
  stable/12/sys/dev/nvdimm/nvdimm_var.h
  stable/12/sys/modules/nvdimm/Makefile
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/dev/nvdimm/nvdimm.c
==============================================================================
--- stable/12/sys/dev/nvdimm/nvdimm.c	Fri Mar  1 02:08:12 2019	(r344679)
+++ stable/12/sys/dev/nvdimm/nvdimm.c	Fri Mar  1 02:16:37 2019	(r344680)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
+#include <sys/bitstring.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -51,10 +52,240 @@ __FBSDID("$FreeBSD$");
 #define _COMPONENT	ACPI_OEM
 ACPI_MODULE_NAME("NVDIMM")
 
+static struct uuid intel_nvdimm_dsm_uuid =
+    {0x4309AC30,0x0D11,0x11E4,0x91,0x91,{0x08,0x00,0x20,0x0C,0x9A,0x66}};
+#define INTEL_NVDIMM_DSM_REV 1
+#define INTEL_NVDIMM_DSM_GET_LABEL_SIZE 4
+#define INTEL_NVDIMM_DSM_GET_LABEL_DATA 5
+
 static devclass_t nvdimm_devclass;
 static devclass_t nvdimm_root_devclass;
 MALLOC_DEFINE(M_NVDIMM, "nvdimm", "NVDIMM driver memory");
 
+static int
+read_label_area_size(struct nvdimm_dev *nv)
+{
+	ACPI_OBJECT *result_buffer;
+	ACPI_HANDLE handle;
+	ACPI_STATUS status;
+	ACPI_BUFFER result;
+	uint32_t *out;
+	int error;
+
+	handle = nvdimm_root_get_acpi_handle(nv->nv_dev);
+	if (handle == NULL)
+		return (ENODEV);
+	result.Length = ACPI_ALLOCATE_BUFFER;
+	result.Pointer = NULL;
+	status = acpi_EvaluateDSM(handle, (uint8_t *)&intel_nvdimm_dsm_uuid,
+	    INTEL_NVDIMM_DSM_REV, INTEL_NVDIMM_DSM_GET_LABEL_SIZE, NULL,
+	    &result);
+	error = ENXIO;
+	if (ACPI_SUCCESS(status) && result.Pointer != NULL &&
+	    result.Length >= sizeof(ACPI_OBJECT)) {
+		result_buffer = result.Pointer;
+		if (result_buffer->Type == ACPI_TYPE_BUFFER &&
+		    result_buffer->Buffer.Length >= 12) {
+			out = (uint32_t *)result_buffer->Buffer.Pointer;
+			nv->label_area_size = out[1];
+			nv->max_label_xfer = out[2];
+			error = 0;
+		}
+	}
+	if (result.Pointer != NULL)
+		AcpiOsFree(result.Pointer);
+	return (error);
+}
+
+static int
+read_label_area(struct nvdimm_dev *nv, uint8_t *dest, off_t offset,
+    off_t length)
+{
+	ACPI_BUFFER result;
+	ACPI_HANDLE handle;
+	ACPI_OBJECT params_pkg, params_buf, *result_buf;
+	ACPI_STATUS status;
+	uint32_t params[2];
+	off_t to_read;
+	int error;
+
+	error = 0;
+	handle = nvdimm_root_get_acpi_handle(nv->nv_dev);
+	if (offset < 0 || length <= 0 ||
+	    offset + length > nv->label_area_size ||
+	    handle == NULL)
+		return (ENODEV);
+	params_pkg.Type = ACPI_TYPE_PACKAGE;
+	params_pkg.Package.Count = 1;
+	params_pkg.Package.Elements = &params_buf;
+	params_buf.Type = ACPI_TYPE_BUFFER;
+	params_buf.Buffer.Length = sizeof(params);
+	params_buf.Buffer.Pointer = (UINT8 *)params;
+	while (length > 0) {
+		to_read = MIN(length, nv->max_label_xfer);
+		params[0] = offset;
+		params[1] = to_read;
+		result.Length = ACPI_ALLOCATE_BUFFER;
+		result.Pointer = NULL;
+		status = acpi_EvaluateDSM(handle,
+		    (uint8_t *)&intel_nvdimm_dsm_uuid, INTEL_NVDIMM_DSM_REV,
+		    INTEL_NVDIMM_DSM_GET_LABEL_DATA, &params_pkg, &result);
+		if (ACPI_FAILURE(status) ||
+		    result.Length < sizeof(ACPI_OBJECT) ||
+		    result.Pointer == NULL) {
+			error = ENXIO;
+			break;
+		}
+		result_buf = (ACPI_OBJECT *)result.Pointer;
+		if (result_buf->Type != ACPI_TYPE_BUFFER ||
+		    result_buf->Buffer.Pointer == NULL ||
+		    result_buf->Buffer.Length != 4 + to_read ||
+		    ((uint16_t *)result_buf->Buffer.Pointer)[0] != 0) {
+			error = ENXIO;
+			break;
+		}
+		bcopy(result_buf->Buffer.Pointer + 4, dest, to_read);
+		dest += to_read;
+		offset += to_read;
+		length -= to_read;
+		if (result.Pointer != NULL) {
+			AcpiOsFree(result.Pointer);
+			result.Pointer = NULL;
+		}
+	}
+	if (result.Pointer != NULL)
+		AcpiOsFree(result.Pointer);
+	return (error);
+}
+
+static uint64_t
+fletcher64(const void *data, size_t length)
+{
+	size_t i;
+	uint32_t a, b;
+	const uint32_t *d;
+
+	a = 0;
+	b = 0;
+	d = (const uint32_t *)data;
+	length = length / sizeof(uint32_t);
+	for (i = 0; i < length; i++) {
+		a += d[i];
+		b += a;
+	}
+	return ((uint64_t)b << 32 | a);
+}
+
+static bool
+label_index_is_valid(struct nvdimm_label_index *index, uint32_t max_labels,
+    size_t size, size_t offset)
+{
+	uint64_t checksum;
+
+	index = (struct nvdimm_label_index *)((uint8_t *)index + offset);
+	if (strcmp(index->signature, NVDIMM_INDEX_BLOCK_SIGNATURE) != 0)
+		return false;
+	checksum = index->checksum;
+	index->checksum = 0;
+	if (checksum != fletcher64(index, size) ||
+	    index->this_offset != size * offset || index->this_size != size ||
+	    index->other_offset != size * (offset == 0 ? 1 : 0) ||
+	    index->seq == 0 || index->seq > 3 || index->slot_cnt > max_labels ||
+	    index->label_size != 1)
+		return false;
+	return true;
+}
+
+static int
+read_label(struct nvdimm_dev *nv, int num)
+{
+	struct nvdimm_label_entry *entry, *i, *next;
+	uint64_t checksum;
+	off_t offset;
+	int error;
+
+	offset = nv->label_index->label_offset +
+	    num * (128 << nv->label_index->label_size);
+	entry = malloc(sizeof(*entry), M_NVDIMM, M_WAITOK);
+	error = read_label_area(nv, (uint8_t *)&entry->label, offset,
+	    sizeof(struct nvdimm_label));
+	if (error != 0) {
+		free(entry, M_NVDIMM);
+		return (error);
+	}
+	checksum = entry->label.checksum;
+	entry->label.checksum = 0;
+	if (checksum != fletcher64(&entry->label, sizeof(entry->label)) ||
+	    entry->label.slot != num) {
+		free(entry, M_NVDIMM);
+		return (ENXIO);
+	}
+
+	/* Insertion ordered by dimm_phys_addr */
+	if (SLIST_EMPTY(&nv->labels) ||
+	    entry->label.dimm_phys_addr <=
+	    SLIST_FIRST(&nv->labels)->label.dimm_phys_addr) {
+		SLIST_INSERT_HEAD(&nv->labels, entry, link);
+		return (0);
+	}
+	SLIST_FOREACH_SAFE(i, &nv->labels, link, next) {
+		if (next == NULL ||
+		    entry->label.dimm_phys_addr <= next->label.dimm_phys_addr) {
+			SLIST_INSERT_AFTER(i, entry, link);
+			return (0);
+		}
+	}
+	__unreachable();
+}
+
+static int
+read_labels(struct nvdimm_dev *nv)
+{
+	struct nvdimm_label_index *indices;
+	size_t bitfield_size, index_size, num_labels;
+	int error, n;
+	bool index_0_valid, index_1_valid;
+
+	for (index_size = 256; ; index_size += 256) {
+		num_labels = 8 * (index_size -
+		    sizeof(struct nvdimm_label_index));
+		if (index_size + num_labels * sizeof(struct nvdimm_label) >=
+		    nv->label_area_size)
+			break;
+	}
+	num_labels = (nv->label_area_size - index_size) /
+	    sizeof(struct nvdimm_label);
+	bitfield_size = roundup2(num_labels, 8) / 8;
+	indices = malloc(2 * index_size, M_NVDIMM, M_WAITOK);
+	error = read_label_area(nv, (void *)indices, 0, 2 * index_size);
+	if (error != 0) {
+		free(indices, M_NVDIMM);
+		return (error);
+	}
+	index_0_valid = label_index_is_valid(indices, num_labels, index_size,
+	    0);
+	index_1_valid = label_index_is_valid(indices, num_labels, index_size,
+	    1);
+	if (!index_0_valid && !index_1_valid) {
+		free(indices, M_NVDIMM);
+		return (ENXIO);
+	}
+	if (index_0_valid && index_1_valid &&
+	    (indices[1].seq > indices[0].seq ||
+	    (indices[1].seq == 1 && indices[0].seq == 3)))
+		index_0_valid = false;
+	nv->label_index = malloc(index_size, M_NVDIMM, M_WAITOK);
+	bcopy(indices + (index_0_valid ? 0 : 1), nv->label_index, index_size);
+	free(indices, M_NVDIMM);
+	for (bit_ffc_at((bitstr_t *)nv->label_index->free, 0, num_labels, &n);
+	     n >= 0;
+	     bit_ffc_at((bitstr_t *)nv->label_index->free, n + 1, num_labels,
+	     &n)) {
+		read_label(nv, n);
+	}
+	return (0);
+}
+
 struct nvdimm_dev *
 nvdimm_find_by_handle(nfit_handle_t nv_handle)
 {
@@ -90,6 +321,7 @@ nvdimm_attach(device_t dev)
 	ACPI_TABLE_NFIT *nfitbl;
 	ACPI_HANDLE handle;
 	ACPI_STATUS status;
+	int error;
 
 	nv = device_get_softc(dev);
 	handle = nvdimm_root_get_acpi_handle(dev);
@@ -107,6 +339,14 @@ nvdimm_attach(device_t dev)
 	acpi_nfit_get_flush_addrs(nfitbl, nv->nv_handle, &nv->nv_flush_addr,
 	    &nv->nv_flush_addr_cnt);
 	AcpiPutTable(&nfitbl->Header);
+	error = read_label_area_size(nv);
+	if (error == 0) {
+		/*
+		 * Ignoring errors reading labels. Not all NVDIMMs
+		 * support labels and namespaces.
+		 */
+		read_labels(nv);
+	}
 	return (0);
 }
 
@@ -114,9 +354,15 @@ static int
 nvdimm_detach(device_t dev)
 {
 	struct nvdimm_dev *nv;
+	struct nvdimm_label_entry *label, *next;
 
 	nv = device_get_softc(dev);
 	free(nv->nv_flush_addr, M_NVDIMM);
+	free(nv->label_index, M_NVDIMM);
+	SLIST_FOREACH_SAFE(label, &nv->labels, link, next) {
+		SLIST_REMOVE_HEAD(&nv->labels, link);
+		free(label, M_NVDIMM);
+	}
 	return (0);
 }
 
@@ -216,6 +462,7 @@ nvdimm_root_create_spas(struct nvdimm_root_dev *dev, A
 			free(spa, M_NVDIMM);
 			break;
 		}
+		nvdimm_create_namespaces(spa_mapping, nfitbl);
 		SLIST_INSERT_HEAD(&dev->spas, spa_mapping, link);
 	}
 	free(spas, M_NVDIMM);
@@ -273,6 +520,7 @@ nvdimm_root_detach(device_t dev)
 
 	root = device_get_softc(dev);
 	SLIST_FOREACH_SAFE(spa, &root->spas, link, next) {
+		nvdimm_destroy_namespaces(spa);
 		nvdimm_spa_fini(spa);
 		SLIST_REMOVE_HEAD(&root->spas, link);
 		free(spa, M_NVDIMM);

Copied: stable/12/sys/dev/nvdimm/nvdimm_ns.c (from r344476, head/sys/dev/nvdimm/nvdimm_ns.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/12/sys/dev/nvdimm/nvdimm_ns.c	Fri Mar  1 02:16:37 2019	(r344680, copy of r344476, head/sys/dev/nvdimm/nvdimm_ns.c)
@@ -0,0 +1,97 @@
+/*-
+ * Copyright (c) 2018 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/uuid.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/nvdimm/nvdimm_var.h>
+
+int
+nvdimm_create_namespaces(struct SPA_mapping *spa, ACPI_TABLE_NFIT *nfitbl)
+{
+	ACPI_NFIT_MEMORY_MAP **regions;
+	struct nvdimm_dev *nv;
+	struct nvdimm_label_entry *e;
+	struct nvdimm_namespace *ns;
+	nfit_handle_t dimm_handle;
+	char *name;
+	int i, error, num_regions;
+
+	acpi_nfit_get_region_mappings_by_spa_range(nfitbl, spa->spa_nfit_idx,
+	    &regions, &num_regions);
+	if (num_regions == 0 || num_regions != regions[0]->InterleaveWays) {
+		free(regions, M_NVDIMM);
+		return (ENXIO);
+	}
+	dimm_handle = regions[0]->DeviceHandle;
+	nv = nvdimm_find_by_handle(dimm_handle);
+	if (nv == NULL) {
+		free(regions, M_NVDIMM);
+		return (ENXIO);
+	}
+	i = 0;
+	error = 0;
+	SLIST_FOREACH(e, &nv->labels, link) {
+		ns = malloc(sizeof(struct nvdimm_namespace), M_NVDIMM,
+		    M_WAITOK | M_ZERO);
+		ns->dev.spa_domain = spa->dev.spa_domain;
+		ns->dev.spa_phys_base = spa->dev.spa_phys_base +
+		    regions[0]->RegionOffset +
+		    num_regions *
+		    (e->label.dimm_phys_addr - regions[0]->Address);
+		ns->dev.spa_len = num_regions * e->label.raw_size;
+		ns->dev.spa_efi_mem_flags = spa->dev.spa_efi_mem_flags;
+		asprintf(&name, M_NVDIMM, "spa%dns%d", spa->spa_nfit_idx, i);
+		error = nvdimm_spa_dev_init(&ns->dev, name);
+		free(name, M_NVDIMM);
+		if (error != 0)
+			break;
+		SLIST_INSERT_HEAD(&spa->namespaces, ns, link);
+		i++;
+	}
+	free(regions, M_NVDIMM);
+	return (error);
+}
+
+void
+nvdimm_destroy_namespaces(struct SPA_mapping *spa)
+{
+	struct nvdimm_namespace *ns, *next;
+
+	SLIST_FOREACH_SAFE(ns, &spa->namespaces, link, next) {
+		SLIST_REMOVE_HEAD(&spa->namespaces, link);
+		nvdimm_spa_dev_fini(&ns->dev);
+		free(ns, M_NVDIMM);
+	}
+}

Modified: stable/12/sys/dev/nvdimm/nvdimm_spa.c
==============================================================================
--- stable/12/sys/dev/nvdimm/nvdimm_spa.c	Fri Mar  1 02:08:12 2019	(r344679)
+++ stable/12/sys/dev/nvdimm/nvdimm_spa.c	Fri Mar  1 02:16:37 2019	(r344680)
@@ -143,45 +143,45 @@ nvdimm_spa_type_from_uuid(struct uuid *uuid)
 }
 
 static vm_memattr_t
-nvdimm_spa_memattr(struct SPA_mapping *spa)
+nvdimm_spa_memattr(struct nvdimm_spa_dev *dev)
 {
 	vm_memattr_t mode;
 
-	if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
+	if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
 		mode = VM_MEMATTR_WRITE_BACK;
-	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
+	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
 		mode = VM_MEMATTR_WRITE_THROUGH;
-	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
+	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
 		mode = VM_MEMATTR_WRITE_COMBINING;
-	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
+	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
 		mode = VM_MEMATTR_WRITE_PROTECTED;
-	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
+	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
 		mode = VM_MEMATTR_UNCACHEABLE;
 	else {
 		if (bootverbose)
-			printf("SPA%d mapping attr unsupported\n",
-			    spa->spa_nfit_idx);
+			printf("SPA mapping attr %#lx unsupported\n",
+			    dev->spa_efi_mem_flags);
 		mode = VM_MEMATTR_UNCACHEABLE;
 	}
 	return (mode);
 }
 
 static int
-nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *uio)
+nvdimm_spa_uio(struct nvdimm_spa_dev *dev, struct uio *uio)
 {
 	struct vm_page m, *ma;
 	off_t off;
 	vm_memattr_t mattr;
 	int error, n;
 
-	if (spa->spa_kva == NULL) {
-		mattr = nvdimm_spa_memattr(spa);
+	if (dev->spa_kva == NULL) {
+		mattr = nvdimm_spa_memattr(dev);
 		vm_page_initfake(&m, 0, mattr);
 		ma = &m;
 		while (uio->uio_resid > 0) {
-			if (uio->uio_offset >= spa->spa_len)
+			if (uio->uio_offset >= dev->spa_len)
 				break;
-			off = spa->spa_phys_base + uio->uio_offset;
+			off = dev->spa_phys_base + uio->uio_offset;
 			vm_page_updatefake(&m, trunc_page(off), mattr);
 			n = PAGE_SIZE;
 			if (n > uio->uio_resid)
@@ -192,14 +192,14 @@ nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *ui
 		}
 	} else {
 		while (uio->uio_resid > 0) {
-			if (uio->uio_offset >= spa->spa_len)
+			if (uio->uio_offset >= dev->spa_len)
 				break;
 			n = INT_MAX;
 			if (n > uio->uio_resid)
 				n = uio->uio_resid;
-			if (uio->uio_offset + n > spa->spa_len)
-				n = spa->spa_len - uio->uio_offset;
-			error = uiomove((char *)spa->spa_kva + uio->uio_offset,
+			if (uio->uio_offset + n > dev->spa_len)
+				n = dev->spa_len - uio->uio_offset;
+			error = uiomove((char *)dev->spa_kva + uio->uio_offset,
 			    n, uio);
 			if (error != 0)
 				break;
@@ -216,20 +216,20 @@ nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int i
 }
 
 static int
-nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
+nvdimm_spa_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
-	struct SPA_mapping *spa;
+	struct nvdimm_spa_dev *dev;
 	int error;
 
-	spa = dev->si_drv1;
+	dev = cdev->si_drv1;
 	error = 0;
 	switch (cmd) {
 	case DIOCGSECTORSIZE:
 		*(u_int *)data = DEV_BSIZE;
 		break;
 	case DIOCGMEDIASIZE:
-		*(off_t *)data = spa->spa_len;
+		*(off_t *)data = dev->spa_len;
 		break;
 	default:
 		error = ENOTTY;
@@ -239,19 +239,19 @@ nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t
 }
 
 static int
-nvdimm_spa_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
+nvdimm_spa_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
     vm_object_t *objp, int nprot)
 {
-	struct SPA_mapping *spa;
+	struct nvdimm_spa_dev *dev;
 
-	spa = dev->si_drv1;
-	if (spa->spa_obj == NULL)
+	dev = cdev->si_drv1;
+	if (dev->spa_obj == NULL)
 		return (ENXIO);
-	if (*offset >= spa->spa_len || *offset + size < *offset ||
-	    *offset + size > spa->spa_len)
+	if (*offset >= dev->spa_len || *offset + size < *offset ||
+	    *offset + size > dev->spa_len)
 		return (EINVAL);
-	vm_object_reference(spa->spa_obj);
-	*objp = spa->spa_obj;
+	vm_object_reference(dev->spa_obj);
+	*objp = dev->spa_obj;
 	return (0);
 }
 
@@ -266,18 +266,17 @@ static struct cdevsw spa_cdevsw = {
 };
 
 static void
-nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, struct bio *bp,
-    int rw)
+nvdimm_spa_g_all_unmapped(struct nvdimm_spa_dev *dev, struct bio *bp, int rw)
 {
 	struct vm_page maa[bp->bio_ma_n];
 	vm_page_t ma[bp->bio_ma_n];
 	vm_memattr_t mattr;
 	int i;
 
-	mattr = nvdimm_spa_memattr(spa);
+	mattr = nvdimm_spa_memattr(dev);
 	for (i = 0; i < nitems(ma); i++) {
 		maa[i].flags = 0;
-		vm_page_initfake(&maa[i], spa->spa_phys_base +
+		vm_page_initfake(&maa[i], dev->spa_phys_base +
 		    trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
 		ma[i] = &maa[i];
 	}
@@ -292,30 +291,30 @@ nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, str
 static void
 nvdimm_spa_g_thread(void *arg)
 {
-	struct SPA_mapping *spa;
+	struct g_spa *sc;
 	struct bio *bp;
 	struct uio auio;
 	struct iovec aiovec;
 	int error;
 
-	spa = arg;
+	sc = arg;
 	for (;;) {
-		mtx_lock(&spa->spa_g_mtx);
+		mtx_lock(&sc->spa_g_mtx);
 		for (;;) {
-			bp = bioq_takefirst(&spa->spa_g_queue);
+			bp = bioq_takefirst(&sc->spa_g_queue);
 			if (bp != NULL)
 				break;
-			msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO,
+			msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO,
 			    "spa_g", 0);
-			if (!spa->spa_g_proc_run) {
-				spa->spa_g_proc_exiting = true;
-				wakeup(&spa->spa_g_queue);
-				mtx_unlock(&spa->spa_g_mtx);
+			if (!sc->spa_g_proc_run) {
+				sc->spa_g_proc_exiting = true;
+				wakeup(&sc->spa_g_queue);
+				mtx_unlock(&sc->spa_g_mtx);
 				kproc_exit(0);
 			}
 			continue;
 		}
-		mtx_unlock(&spa->spa_g_mtx);
+		mtx_unlock(&sc->spa_g_mtx);
 		if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
 		    bp->bio_cmd != BIO_FLUSH) {
 			error = EOPNOTSUPP;
@@ -324,13 +323,15 @@ nvdimm_spa_g_thread(void *arg)
 
 		error = 0;
 		if (bp->bio_cmd == BIO_FLUSH) {
-			if (spa->spa_kva != NULL) {
-				pmap_large_map_wb(spa->spa_kva, spa->spa_len);
+			if (sc->dev->spa_kva != NULL) {
+				pmap_large_map_wb(sc->dev->spa_kva,
+				    sc->dev->spa_len);
 			} else {
 				pmap_flush_cache_phys_range(
-				    (vm_paddr_t)spa->spa_phys_base,
-				    (vm_paddr_t)spa->spa_phys_base +
-				    spa->spa_len, nvdimm_spa_memattr(spa));
+				    (vm_paddr_t)sc->dev->spa_phys_base,
+				    (vm_paddr_t)sc->dev->spa_phys_base +
+				    sc->dev->spa_len,
+				    nvdimm_spa_memattr(sc->dev));
 			}
 			/*
 			 * XXX flush IMC
@@ -339,8 +340,8 @@ nvdimm_spa_g_thread(void *arg)
 		}
 		
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
-			if (spa->spa_kva != NULL) {
-				aiovec.iov_base = (char *)spa->spa_kva +
+			if (sc->dev->spa_kva != NULL) {
+				aiovec.iov_base = (char *)sc->dev->spa_kva +
 				    bp->bio_offset;
 				aiovec.iov_len = bp->bio_length;
 				auio.uio_iov = &aiovec;
@@ -355,7 +356,8 @@ nvdimm_spa_g_thread(void *arg)
 				    bp->bio_ma_offset, bp->bio_length, &auio);
 				bp->bio_resid = auio.uio_resid;
 			} else {
-				nvdimm_spa_g_all_unmapped(spa, bp, bp->bio_cmd);
+				nvdimm_spa_g_all_unmapped(sc->dev, bp,
+				    bp->bio_cmd);
 				bp->bio_resid = bp->bio_length;
 				error = 0;
 			}
@@ -370,11 +372,11 @@ nvdimm_spa_g_thread(void *arg)
 			auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
 			    UIO_WRITE;
 			auio.uio_td = curthread;
-			error = nvdimm_spa_uio(spa, &auio);
+			error = nvdimm_spa_uio(sc->dev, &auio);
 			bp->bio_resid = auio.uio_resid;
 		}
 		bp->bio_bcount = bp->bio_length;
-		devstat_end_transaction_bio(spa->spa_g_devstat, bp);
+		devstat_end_transaction_bio(sc->spa_g_devstat, bp);
 completed:
 		bp->bio_completed = bp->bio_length;
 		g_io_deliver(bp, error);
@@ -384,18 +386,18 @@ completed:
 static void
 nvdimm_spa_g_start(struct bio *bp)
 {
-	struct SPA_mapping *spa;
+	struct g_spa *sc;
 
-	spa = bp->bio_to->geom->softc;
+	sc = bp->bio_to->geom->softc;
 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
-		mtx_lock(&spa->spa_g_stat_mtx);
-		devstat_start_transaction_bio(spa->spa_g_devstat, bp);
-		mtx_unlock(&spa->spa_g_stat_mtx);
+		mtx_lock(&sc->spa_g_stat_mtx);
+		devstat_start_transaction_bio(sc->spa_g_devstat, bp);
+		mtx_unlock(&sc->spa_g_stat_mtx);
 	}
-	mtx_lock(&spa->spa_g_mtx);
-	bioq_disksort(&spa->spa_g_queue, bp);
-	wakeup(&spa->spa_g_queue);
-	mtx_unlock(&spa->spa_g_mtx);
+	mtx_lock(&sc->spa_g_mtx);
+	bioq_disksort(&sc->spa_g_queue, bp);
+	wakeup(&sc->spa_g_queue);
+	mtx_unlock(&sc->spa_g_mtx);
 }
 
 static int
@@ -405,11 +407,16 @@ nvdimm_spa_g_access(struct g_provider *pp, int r, int 
 	return (0);
 }
 
+static struct g_geom * nvdimm_spa_g_create(struct nvdimm_spa_dev *dev,
+    const char *name);
+static g_ctl_destroy_geom_t nvdimm_spa_g_destroy_geom;
+
 struct g_class nvdimm_spa_g_class = {
 	.name =		"SPA",
 	.version =	G_VERSION,
 	.start =	nvdimm_spa_g_start,
 	.access =	nvdimm_spa_g_access,
+	.destroy_geom =	nvdimm_spa_g_destroy_geom,
 };
 DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
 
@@ -417,49 +424,63 @@ int
 nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
     enum SPA_mapping_type spa_type)
 {
-	struct make_dev_args mda;
-	struct sglist *spa_sg;
-	int error, error1;
+	char *name;
+	int error;
 
 	spa->spa_type = spa_type;
-	spa->spa_domain = ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
-	    nfitaddr->ProximityDomain : -1;
 	spa->spa_nfit_idx = nfitaddr->RangeIndex;
-	spa->spa_phys_base = nfitaddr->Address;
-	spa->spa_len = nfitaddr->Length;
-	spa->spa_efi_mem_flags = nfitaddr->MemoryMapping;
+	spa->dev.spa_domain =
+	    ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
+	    nfitaddr->ProximityDomain : -1;
+	spa->dev.spa_phys_base = nfitaddr->Address;
+	spa->dev.spa_len = nfitaddr->Length;
+	spa->dev.spa_efi_mem_flags = nfitaddr->MemoryMapping;
 	if (bootverbose) {
 		printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
 		    spa->spa_nfit_idx,
-		    (uintmax_t)spa->spa_phys_base, (uintmax_t)spa->spa_len,
+		    (uintmax_t)spa->dev.spa_phys_base,
+		    (uintmax_t)spa->dev.spa_len,
 		    nvdimm_SPA_uuid_list[spa_type].u_name,
-		    spa->spa_efi_mem_flags);
+		    spa->dev.spa_efi_mem_flags);
 	}
 	if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
 		return (0);
 
-	error1 = pmap_large_map(spa->spa_phys_base, spa->spa_len,
-	    &spa->spa_kva, nvdimm_spa_memattr(spa));
+	asprintf(&name, M_NVDIMM, "spa%d", spa->spa_nfit_idx);
+	error = nvdimm_spa_dev_init(&spa->dev, name);
+	free(name, M_NVDIMM);
+	return (error);
+}
+
+int
+nvdimm_spa_dev_init(struct nvdimm_spa_dev *dev, const char *name)
+{
+	struct make_dev_args mda;
+	struct sglist *spa_sg;
+	char *devname;
+	int error, error1;
+
+	error1 = pmap_large_map(dev->spa_phys_base, dev->spa_len,
+	    &dev->spa_kva, nvdimm_spa_memattr(dev));
 	if (error1 != 0) {
-		printf("NVDIMM SPA%d cannot map into KVA, error %d\n",
-		    spa->spa_nfit_idx, error1);
-		spa->spa_kva = NULL;
+		printf("NVDIMM %s cannot map into KVA, error %d\n", name,
+		    error1);
+		dev->spa_kva = NULL;
 	}
 
 	spa_sg = sglist_alloc(1, M_WAITOK);
-	error = sglist_append_phys(spa_sg, spa->spa_phys_base,
-	    spa->spa_len);
+	error = sglist_append_phys(spa_sg, dev->spa_phys_base,
+	    dev->spa_len);
 	if (error == 0) {
-		spa->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, spa->spa_len,
+		dev->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, dev->spa_len,
 		    VM_PROT_ALL, 0, NULL);
-		if (spa->spa_obj == NULL) {
-			printf("NVDIMM SPA%d failed to alloc vm object",
-			    spa->spa_nfit_idx);
+		if (dev->spa_obj == NULL) {
+			printf("NVDIMM %s failed to alloc vm object", name);
 			sglist_free(spa_sg);
 		}
 	} else {
-		printf("NVDIMM SPA%d failed to init sglist, error %d",
-		    spa->spa_nfit_idx, error);
+		printf("NVDIMM %s failed to init sglist, error %d", name,
+		    error);
 		sglist_free(spa_sg);
 	}
 
@@ -470,78 +491,112 @@ nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYS
 	mda.mda_uid = UID_ROOT;
 	mda.mda_gid = GID_OPERATOR;
 	mda.mda_mode = 0660;
-	mda.mda_si_drv1 = spa;
-	error = make_dev_s(&mda, &spa->spa_dev, "nvdimm_spa%d",
-	    spa->spa_nfit_idx);
+	mda.mda_si_drv1 = dev;
+	asprintf(&devname, M_NVDIMM, "nvdimm_%s", name);
+	error = make_dev_s(&mda, &dev->spa_dev, "%s", devname);
+	free(devname, M_NVDIMM);
 	if (error != 0) {
-		printf("NVDIMM SPA%d cannot create devfs node, error %d\n",
-		    spa->spa_nfit_idx, error);
+		printf("NVDIMM %s cannot create devfs node, error %d\n", name,
+		    error);
 		if (error1 == 0)
 			error1 = error;
 	}
+	dev->spa_g = nvdimm_spa_g_create(dev, name);
+	if (dev->spa_g == NULL && error1 == 0)
+		error1 = ENXIO;
+	return (error1);
+}
 
-	bioq_init(&spa->spa_g_queue);
-	mtx_init(&spa->spa_g_mtx, "spag", NULL, MTX_DEF);
-	mtx_init(&spa->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
-	spa->spa_g_proc_run = true;
-	spa->spa_g_proc_exiting = false;
-	error = kproc_create(nvdimm_spa_g_thread, spa, &spa->spa_g_proc, 0, 0,
-	    "g_spa%d", spa->spa_nfit_idx);
+static struct g_geom *
+nvdimm_spa_g_create(struct nvdimm_spa_dev *dev, const char *name)
+{
+	struct g_geom *gp;
+	struct g_spa *sc;
+	int error;
+
+	gp = NULL;
+	sc = malloc(sizeof(struct g_spa), M_NVDIMM, M_WAITOK | M_ZERO);
+	sc->dev = dev;
+	bioq_init(&sc->spa_g_queue);
+	mtx_init(&sc->spa_g_mtx, "spag", NULL, MTX_DEF);
+	mtx_init(&sc->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
+	sc->spa_g_proc_run = true;
+	sc->spa_g_proc_exiting = false;
+	error = kproc_create(nvdimm_spa_g_thread, sc, &sc->spa_g_proc, 0, 0,
+	    "g_spa");
 	if (error != 0) {
-		printf("NVDIMM SPA%d cannot create geom worker, error %d\n",
-		    spa->spa_nfit_idx, error);
-		if (error1 == 0)
-			error1 = error;
+		mtx_destroy(&sc->spa_g_mtx);
+		mtx_destroy(&sc->spa_g_stat_mtx);
+		free(sc, M_NVDIMM);
+		printf("NVDIMM %s cannot create geom worker, error %d\n", name,
+		    error);
 	} else {
 		g_topology_lock();
-		spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d",
-		    spa->spa_nfit_idx);
-		spa->spa_g->softc = spa;
-		spa->spa_p = g_new_providerf(spa->spa_g, "spa%d",
-		    spa->spa_nfit_idx);
-		spa->spa_p->mediasize = spa->spa_len;
-		spa->spa_p->sectorsize = DEV_BSIZE;
-		spa->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
+		gp = g_new_geomf(&nvdimm_spa_g_class, "%s", name);
+		gp->softc = sc;
+		sc->spa_p = g_new_providerf(gp, "%s", name);
+		sc->spa_p->mediasize = dev->spa_len;
+		sc->spa_p->sectorsize = DEV_BSIZE;
+		sc->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
 		    G_PF_ACCEPT_UNMAPPED;
-		g_error_provider(spa->spa_p, 0);
-		spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx,
-		    DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
+		g_error_provider(sc->spa_p, 0);
+		sc->spa_g_devstat = devstat_new_entry("spa", -1, DEV_BSIZE,
+		    DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
 		    DEVSTAT_PRIORITY_MAX);
 		g_topology_unlock();
 	}
-	return (error1);
+	return (gp);
 }
 
 void
 nvdimm_spa_fini(struct SPA_mapping *spa)
 {
 
-	mtx_lock(&spa->spa_g_mtx);
-	spa->spa_g_proc_run = false;
-	wakeup(&spa->spa_g_queue);
-	while (!spa->spa_g_proc_exiting)
-		msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, "spa_e", 0);
-	mtx_unlock(&spa->spa_g_mtx);
-	if (spa->spa_g != NULL) {
+	nvdimm_spa_dev_fini(&spa->dev);
+}
+
+void
+nvdimm_spa_dev_fini(struct nvdimm_spa_dev *dev)
+{
+
+	if (dev->spa_g != NULL) {
 		g_topology_lock();
-		g_wither_geom(spa->spa_g, ENXIO);
+		nvdimm_spa_g_destroy_geom(NULL, dev->spa_g->class, dev->spa_g);
 		g_topology_unlock();
-		spa->spa_g = NULL;
-		spa->spa_p = NULL;
 	}
-	if (spa->spa_g_devstat != NULL) {
-		devstat_remove_entry(spa->spa_g_devstat);
-		spa->spa_g_devstat = NULL;
+	if (dev->spa_dev != NULL) {
+		destroy_dev(dev->spa_dev);
+		dev->spa_dev = NULL;
 	}
-	if (spa->spa_dev != NULL) {
-		destroy_dev(spa->spa_dev);
-		spa->spa_dev = NULL;
+	vm_object_deallocate(dev->spa_obj);
+	if (dev->spa_kva != NULL) {
+		pmap_large_unmap(dev->spa_kva, dev->spa_len);
+		dev->spa_kva = NULL;
 	}
-	vm_object_deallocate(spa->spa_obj);
-	if (spa->spa_kva != NULL) {
-		pmap_large_unmap(spa->spa_kva, spa->spa_len);
-		spa->spa_kva = NULL;
+}
+
+static int
+nvdimm_spa_g_destroy_geom(struct gctl_req *req, struct g_class *cp,
+    struct g_geom *gp)
+{
+	struct g_spa *sc;
+
+	sc = gp->softc;
+	mtx_lock(&sc->spa_g_mtx);
+	sc->spa_g_proc_run = false;
+	wakeup(&sc->spa_g_queue);
+	while (!sc->spa_g_proc_exiting)
+		msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO, "spa_e", 0);
+	mtx_unlock(&sc->spa_g_mtx);
+	g_topology_assert();
+	g_wither_geom(gp, ENXIO);
+	sc->spa_p = NULL;
+	if (sc->spa_g_devstat != NULL) {
+		devstat_remove_entry(sc->spa_g_devstat);
+		sc->spa_g_devstat = NULL;
 	}
-	mtx_destroy(&spa->spa_g_mtx);
-	mtx_destroy(&spa->spa_g_stat_mtx);
+	mtx_destroy(&sc->spa_g_mtx);
+	mtx_destroy(&sc->spa_g_stat_mtx);
+	free(sc, M_NVDIMM);
+	return (0);
 }

Modified: stable/12/sys/dev/nvdimm/nvdimm_var.h
==============================================================================
--- stable/12/sys/dev/nvdimm/nvdimm_var.h	Fri Mar  1 02:08:12 2019	(r344679)
+++ stable/12/sys/dev/nvdimm/nvdimm_var.h	Fri Mar  1 02:16:37 2019	(r344680)
@@ -33,6 +33,51 @@
 #ifndef __DEV_NVDIMM_VAR_H__
 #define	__DEV_NVDIMM_VAR_H__
 
+#define NVDIMM_INDEX_BLOCK_SIGNATURE "NAMESPACE_INDEX"
+
+struct nvdimm_label_index {
+	char		signature[16];
+	uint8_t		flags[3];
+	uint8_t		label_size;
+	uint32_t	seq;
+	uint64_t	this_offset;
+	uint64_t	this_size;
+	uint64_t	other_offset;
+	uint64_t	label_offset;
+	uint32_t	slot_cnt;
+	uint16_t	rev_major;
+	uint16_t	rev_minor;
+	uint64_t	checksum;
+	uint8_t		free[0];
+};
+
+struct nvdimm_label {
+	struct uuid	uuid;
+	char		name[64];
+	uint32_t	flags;
+	uint16_t	nlabel;
+	uint16_t	position;
+	uint64_t	set_cookie;
+	uint64_t	lba_size;
+	uint64_t	dimm_phys_addr;
+	uint64_t	raw_size;
+	uint32_t	slot;
+	uint8_t		alignment;
+	uint8_t		reserved[3];
+	struct uuid	type_guid;
+	struct uuid	address_abstraction_guid;
+	uint8_t		reserved1[88];
+	uint64_t	checksum;
+};
+
+struct nvdimm_label_entry {
+	SLIST_ENTRY(nvdimm_label_entry) link;
+	struct nvdimm_label	label;
+};

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201903010216.x212GbqH058994>