Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 16 Oct 2018 20:12:36 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r339391 - in head/sys: dev/nvdimm modules modules/nvdimm
Message-ID:  <201810162012.w9GKCaWb016859@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Tue Oct 16 20:12:35 2018
New Revision: 339391
URL: https://svnweb.freebsd.org/changeset/base/339391

Log:
  Add initial driver for ACPI NFIT-enumerated NVDIMMs.
  
  Driver enumerates NVDIMMs.  Besides, for each found System Physical
  Address (SPA) range, spaN geom provider is created, which allows
  formatting and mounting the region as the normal volume.  Also,
  /dev/nvdimm_spaN node is created, which can be read/written/mapped by
  userspace, the mapping is zero-copy.
  
  No support for block access methods implemented, labels are not
  parsed.   No management interfaces are provided.
  
  Tested by:	Intel, NetApp
  Sponsored by:	The FreeBSD Foundation
  Approved by:	re (gjb)
  MFC after:	2 weeks

Added:
  head/sys/dev/nvdimm/
  head/sys/dev/nvdimm/nvdimm.c   (contents, props changed)
  head/sys/dev/nvdimm/nvdimm_spa.c   (contents, props changed)
  head/sys/dev/nvdimm/nvdimm_var.h   (contents, props changed)
  head/sys/modules/nvdimm/
  head/sys/modules/nvdimm/Makefile   (contents, props changed)
Modified:
  head/sys/modules/Makefile

Added: head/sys/dev/nvdimm/nvdimm.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/nvdimm/nvdimm.c	Tue Oct 16 20:12:35 2018	(r339391)
@@ -0,0 +1,423 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/uuid.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <contrib/dev/acpica/include/acuuid.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/nvdimm/nvdimm_var.h>
+
+#define _COMPONENT	ACPI_OEM
+ACPI_MODULE_NAME("NVDIMM")
+
+static devclass_t nvdimm_devclass;
+static device_t *nvdimm_devs;
+static int nvdimm_devcnt;
+MALLOC_DEFINE(M_NVDIMM, "nvdimm", "NVDIMM driver memory");
+
+struct nvdimm_dev *
+nvdimm_find_by_handle(nfit_handle_t nv_handle)
+{
+	device_t dev;
+	struct nvdimm_dev *res, *nv;
+	int i;
+
+	res = NULL;
+	for (i = 0; i < nvdimm_devcnt; i++) {
+		dev = nvdimm_devs[i];
+		if (dev == NULL)
+			continue;
+		nv = device_get_softc(dev);
+		if (nv->nv_handle == nv_handle) {
+			res = nv;
+			break;
+		}
+	}
+	return (res);
+}
+
+static int
+nvdimm_parse_flush_addr(void *nfitsubtbl, void *arg)
+{
+	ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr;
+	struct nvdimm_dev *nv;
+	int i;
+
+	nfitflshaddr = nfitsubtbl;
+	nv = arg;
+	if (nfitflshaddr->DeviceHandle != nv->nv_handle)
+		return (0);
+
+	MPASS(nv->nv_flush_addr == NULL && nv->nv_flush_addr_cnt == 0);
+	nv->nv_flush_addr = malloc(nfitflshaddr->HintCount * sizeof(uint64_t *),
+	    M_NVDIMM, M_WAITOK);
+	for (i = 0; i < nfitflshaddr->HintCount; i++)
+		nv->nv_flush_addr[i] = (uint64_t *)nfitflshaddr->HintAddress[i];
+	nv->nv_flush_addr_cnt = nfitflshaddr->HintCount;
+	return (0);
+}
+
+int
+nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type,
+    int (*cb)(void *, void *), void *arg)
+{
+	ACPI_NFIT_HEADER *nfithdr;
+	ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr;
+	ACPI_NFIT_MEMORY_MAP *nfitmap;
+	ACPI_NFIT_INTERLEAVE *nfitintrl;
+	ACPI_NFIT_SMBIOS *nfitsmbios;
+	ACPI_NFIT_CONTROL_REGION *nfitctlreg;
+	ACPI_NFIT_DATA_REGION *nfitdtreg;
+	ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr;
+	char *ptr;
+	int error;
+
+	error = 0;
+	for (ptr = (char *)(nfitbl + 1);
+	    ptr < (char *)nfitbl + nfitbl->Header.Length;
+	    ptr += nfithdr->Length) {
+		nfithdr = (ACPI_NFIT_HEADER *)ptr;
+		if (nfithdr->Type != type)
+			continue;
+		switch (nfithdr->Type) {
+		case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+			nfitaddr = __containerof(nfithdr,
+			    ACPI_NFIT_SYSTEM_ADDRESS, Header);
+			error = cb(nfitaddr, arg);
+			break;
+		case ACPI_NFIT_TYPE_MEMORY_MAP:
+			nfitmap = __containerof(nfithdr,
+			    ACPI_NFIT_MEMORY_MAP, Header);
+			error = cb(nfitmap, arg);
+			break;
+		case ACPI_NFIT_TYPE_INTERLEAVE:
+			nfitintrl = __containerof(nfithdr,
+			    ACPI_NFIT_INTERLEAVE, Header);
+			error = cb(nfitintrl, arg);
+			break;
+		case ACPI_NFIT_TYPE_SMBIOS:
+			nfitsmbios = __containerof(nfithdr,
+			    ACPI_NFIT_SMBIOS, Header);
+			error = cb(nfitsmbios, arg);
+			break;
+		case ACPI_NFIT_TYPE_CONTROL_REGION:
+			nfitctlreg = __containerof(nfithdr,
+			    ACPI_NFIT_CONTROL_REGION, Header);
+			error = cb(nfitctlreg, arg);
+			break;
+		case ACPI_NFIT_TYPE_DATA_REGION:
+			nfitdtreg = __containerof(nfithdr,
+			    ACPI_NFIT_DATA_REGION, Header);
+			error = cb(nfitdtreg, arg);
+			break;
+		case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+			nfitflshaddr = __containerof(nfithdr,
+			    ACPI_NFIT_FLUSH_ADDRESS, Header);
+			error = cb(nfitflshaddr, arg);
+			break;
+		case ACPI_NFIT_TYPE_RESERVED:
+		default:
+			if (bootverbose)
+				printf("NFIT subtype %d unknown\n",
+				    nfithdr->Type);
+			error = 0;
+			break;
+		}
+		if (error != 0)
+			break;
+	}
+	return (error);
+}
+
+static ACPI_STATUS
+nvdimm_walk_dev(ACPI_HANDLE handle, UINT32 level, void *ctx, void **st)
+{
+	ACPI_STATUS status;
+	struct nvdimm_ns_walk_ctx *wctx;
+
+	wctx = ctx;
+	status = wctx->func(handle, wctx->arg);
+	return_ACPI_STATUS(status);
+}
+
+static ACPI_STATUS
+nvdimm_walk_root(ACPI_HANDLE handle, UINT32 level, void *ctx, void **st)
+{
+	ACPI_STATUS status;
+
+	if (!acpi_MatchHid(handle, "ACPI0012"))
+		return_ACPI_STATUS(AE_OK);
+	status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, handle, 100,
+	    nvdimm_walk_dev, NULL, ctx, NULL);
+	if (ACPI_FAILURE(status))
+		return_ACPI_STATUS(status);
+	return_ACPI_STATUS(AE_CTRL_TERMINATE);
+}
+
+static ACPI_STATUS
+nvdimm_foreach_acpi(ACPI_STATUS (*func)(ACPI_HANDLE, void *), void *arg)
+{
+	struct nvdimm_ns_walk_ctx wctx;
+	ACPI_STATUS status;
+
+	wctx.func = func;
+	wctx.arg = arg;
+	status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, 100,
+	    nvdimm_walk_root, NULL, &wctx, NULL);
+	return_ACPI_STATUS(status);
+}
+
+static ACPI_STATUS
+nvdimm_count_devs(ACPI_HANDLE handle __unused, void *arg)
+{
+	int *cnt;
+
+	cnt = arg;
+	(*cnt)++;
+
+	ACPI_BUFFER name;
+	ACPI_STATUS status;
+	if (bootverbose) {
+		name.Length = ACPI_ALLOCATE_BUFFER;
+		status = AcpiGetName(handle, ACPI_FULL_PATHNAME, &name);
+		if (ACPI_FAILURE(status))
+			return_ACPI_STATUS(status);
+		printf("nvdimm: enumerated %s\n", name.Pointer);
+		AcpiOsFree(name.Pointer);
+	}
+
+	return_ACPI_STATUS(AE_OK);
+}
+
+struct nvdimm_create_dev_arg {
+	device_t acpi0;
+	int *cnt;
+};
+
+static ACPI_STATUS
+nvdimm_create_dev(ACPI_HANDLE handle, void *arg)
+{
+	struct nvdimm_create_dev_arg *narg;
+	device_t child;
+	int idx;
+
+	narg = arg;
+	idx = *(narg->cnt);
+	child = device_find_child(narg->acpi0, "nvdimm", idx);
+	if (child == NULL)
+		child = BUS_ADD_CHILD(narg->acpi0, 1, "nvdimm", idx);
+	if (child == NULL) {
+		if (bootverbose)
+			device_printf(narg->acpi0,
+			    "failed to create nvdimm%d\n", idx);
+		return_ACPI_STATUS(AE_ERROR);
+	}
+	acpi_set_handle(child, handle);
+	KASSERT(nvdimm_devs[idx] == NULL, ("nvdimm_devs[%d] not NULL", idx));
+	nvdimm_devs[idx] = child;
+
+	(*(narg->cnt))++;
+	return_ACPI_STATUS(AE_OK);
+}
+
+static bool
+nvdimm_init(void)
+{
+	ACPI_STATUS status;
+
+	if (nvdimm_devcnt != 0)
+		return (true);
+	if (acpi_disabled("nvdimm"))
+		return (false);
+	status = nvdimm_foreach_acpi(nvdimm_count_devs, &nvdimm_devcnt);
+	if (ACPI_FAILURE(status)) {
+		if (bootverbose)
+			printf("nvdimm_init: count failed\n");
+		return (false);
+	}
+	nvdimm_devs = malloc(nvdimm_devcnt * sizeof(device_t), M_NVDIMM,
+	    M_WAITOK | M_ZERO);
+	return (true);
+}
+
+static void
+nvdimm_identify(driver_t *driver, device_t parent)
+{
+	struct nvdimm_create_dev_arg narg;
+	ACPI_STATUS status;
+	int i;
+
+	if (!nvdimm_init())
+		return;
+	narg.acpi0 = parent;
+	narg.cnt = &i;
+	i = 0;
+	status = nvdimm_foreach_acpi(nvdimm_create_dev, &narg);
+	if (ACPI_FAILURE(status) && bootverbose)
+		printf("nvdimm_identify: create failed\n");
+}
+
+static int
+nvdimm_probe(device_t dev)
+{
+
+	return (BUS_PROBE_NOWILDCARD);
+}
+
+static int
+nvdimm_attach(device_t dev)
+{
+	struct nvdimm_dev *nv;
+	ACPI_TABLE_NFIT *nfitbl;
+	ACPI_HANDLE handle;
+	ACPI_STATUS status;
+	int i;
+
+	nv = device_get_softc(dev);
+	handle = acpi_get_handle(dev);
+	if (handle == NULL)
+		return (EINVAL);
+	nv->nv_dev = dev;
+	for (i = 0; i < nvdimm_devcnt; i++) {
+		if (nvdimm_devs[i] == dev) {
+			nv->nv_devs_idx = i;
+			break;
+		}
+	}
+	MPASS(i < nvdimm_devcnt);
+	if (ACPI_FAILURE(acpi_GetInteger(handle, "_ADR", &nv->nv_handle))) {
+		device_printf(dev, "cannot get handle\n");
+		return (ENXIO);
+	}
+
+	status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl);
+	if (ACPI_FAILURE(status)) {
+		if (bootverbose)
+			device_printf(dev, "cannot get NFIT\n");
+		return (ENXIO);
+	}
+	nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_FLUSH_ADDRESS,
+	    nvdimm_parse_flush_addr, nv);
+	AcpiPutTable(&nfitbl->Header);
+	return (0);
+}
+
+static int
+nvdimm_detach(device_t dev)
+{
+	struct nvdimm_dev *nv;
+
+	nv = device_get_softc(dev);
+	nvdimm_devs[nv->nv_devs_idx] = NULL;
+	free(nv->nv_flush_addr, M_NVDIMM);
+	return (0);
+}
+
+static int
+nvdimm_suspend(device_t dev)
+{
+
+	return (0);
+}
+
+static int
+nvdimm_resume(device_t dev)
+{
+
+	return (0);
+}
+
+static device_method_t nvdimm_methods[] = {
+	DEVMETHOD(device_identify, nvdimm_identify),
+	DEVMETHOD(device_probe, nvdimm_probe),
+	DEVMETHOD(device_attach, nvdimm_attach),
+	DEVMETHOD(device_detach, nvdimm_detach),
+	DEVMETHOD(device_suspend, nvdimm_suspend),
+	DEVMETHOD(device_resume, nvdimm_resume),
+	DEVMETHOD_END
+};
+
+static driver_t	nvdimm_driver = {
+	"nvdimm",
+	nvdimm_methods,
+	sizeof(struct nvdimm_dev),
+};
+
+static void
+nvdimm_fini(void)
+{
+
+	free(nvdimm_devs, M_NVDIMM);
+	nvdimm_devs = NULL;
+	nvdimm_devcnt = 0;
+}
+
+static int
+nvdimm_modev(struct module *mod, int what, void *arg)
+{
+	int error;
+
+	switch (what) {
+	case MOD_LOAD:
+		error = 0;
+		break;
+
+	case MOD_UNLOAD:
+		nvdimm_fini();
+		error = 0;
+		break;
+
+	case MOD_QUIESCE:
+		error = 0;
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+DRIVER_MODULE(nvdimm, acpi, nvdimm_driver, nvdimm_devclass, nvdimm_modev, NULL);
+MODULE_DEPEND(nvdimm, acpi, 1, 1, 1);

Added: head/sys/dev/nvdimm/nvdimm_spa.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/nvdimm/nvdimm_spa.c	Tue Oct 16 20:12:35 2018	(r339391)
@@ -0,0 +1,632 @@
+/*-
+ * Copyright (c) 2017, 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/devicestat.h>
+#include <sys/disk.h>
+#include <sys/efi.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/sglist.h>
+#include <sys/uio.h>
+#include <sys/uuid.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+#include <machine/vmparam.h>
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <contrib/dev/acpica/include/acuuid.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/nvdimm/nvdimm_var.h>
+
+struct SPA_mapping *spa_mappings;
+int spa_mappings_cnt;
+
+static int
+nvdimm_spa_count(void *nfitsubtbl __unused, void *arg)
+{
+	int *cnt;
+
+	cnt = arg;
+	(*cnt)++;
+	return (0);
+}
+
+static struct nvdimm_SPA_uuid_list_elm {
+	const char		*u_name;
+	const char		*u_id_str;
+	struct uuid		u_id;
+	const bool		u_usr_acc;
+} nvdimm_SPA_uuid_list[] = {
+	[SPA_TYPE_VOLATILE_MEMORY] = {
+		.u_name =	"VOLA MEM ",
+		.u_id_str =	UUID_VOLATILE_MEMORY,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_PERSISTENT_MEMORY] = {
+		.u_name =	"PERS MEM",
+		.u_id_str =	UUID_PERSISTENT_MEMORY,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_CONTROL_REGION] = {
+		.u_name =	"CTRL RG ",
+		.u_id_str =	UUID_CONTROL_REGION,
+		.u_usr_acc =	false,
+	},
+	[SPA_TYPE_DATA_REGION] = {
+		.u_name =	"DATA RG ",
+		.u_id_str =	UUID_DATA_REGION,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_VOLATILE_VIRTUAL_DISK] = {
+		.u_name =	"VIRT DSK",
+		.u_id_str =	UUID_VOLATILE_VIRTUAL_DISK,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_VOLATILE_VIRTUAL_CD] = {
+		.u_name =	"VIRT CD ",
+		.u_id_str =	UUID_VOLATILE_VIRTUAL_CD,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = {
+		.u_name =	"PV DSK  ",
+		.u_id_str =	UUID_PERSISTENT_VIRTUAL_DISK,
+		.u_usr_acc =	true,
+	},
+	[SPA_TYPE_PERSISTENT_VIRTUAL_CD] = {
+		.u_name =	"PV CD   ",
+		.u_id_str =	UUID_PERSISTENT_VIRTUAL_CD,
+		.u_usr_acc =	true,
+	},
+};
+
+static vm_memattr_t
+nvdimm_spa_memattr(struct SPA_mapping *spa)
+{
+	vm_memattr_t mode;
+
+	if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
+		mode = VM_MEMATTR_WRITE_BACK;
+	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
+		mode = VM_MEMATTR_WRITE_THROUGH;
+	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
+		mode = VM_MEMATTR_WRITE_COMBINING;
+	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
+		mode = VM_MEMATTR_WRITE_PROTECTED;
+	else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
+		mode = VM_MEMATTR_UNCACHEABLE;
+	else {
+		if (bootverbose)
+			printf("SPA%d mapping attr unsupported\n",
+			    spa->spa_nfit_idx);
+		mode = VM_MEMATTR_UNCACHEABLE;
+	}
+	return (mode);
+}
+
+static int
+nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *uio)
+{
+	struct vm_page m, *ma;
+	off_t off;
+	vm_memattr_t mattr;
+	int error, n;
+
+	if (spa->spa_kva == NULL) {
+		mattr = nvdimm_spa_memattr(spa);
+		vm_page_initfake(&m, 0, mattr);
+		ma = &m;
+		while (uio->uio_resid > 0) {
+			if (uio->uio_offset >= spa->spa_len)
+				break;
+			off = spa->spa_phys_base + uio->uio_offset;
+			vm_page_updatefake(&m, trunc_page(off), mattr);
+			n = PAGE_SIZE;
+			if (n > uio->uio_resid)
+				n = uio->uio_resid;
+			error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio);
+			if (error != 0)
+				break;
+		}
+	} else {
+		while (uio->uio_resid > 0) {
+			if (uio->uio_offset >= spa->spa_len)
+				break;
+			n = INT_MAX;
+			if (n > uio->uio_resid)
+				n = uio->uio_resid;
+			if (uio->uio_offset + n > spa->spa_len)
+				n = spa->spa_len - uio->uio_offset;
+			error = uiomove((char *)spa->spa_kva + uio->uio_offset,
+			    n, uio);
+			if (error != 0)
+				break;
+		}
+	}
+	return (error);
+}
+
+static int
+nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag)
+{
+
+	return (nvdimm_spa_uio(dev->si_drv1, uio));
+}
+
+static int
+nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
+    struct thread *td)
+{
+	struct SPA_mapping *spa;
+	int error;
+
+	spa = dev->si_drv1;
+	error = 0;
+	switch (cmd) {
+	case DIOCGSECTORSIZE:
+		*(u_int *)data = DEV_BSIZE;
+		break;
+	case DIOCGMEDIASIZE:
+		*(off_t *)data = spa->spa_len;
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+	return (error);
+}
+
+static int
+nvdimm_spa_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
+    vm_object_t *objp, int nprot)
+{
+	struct SPA_mapping *spa;
+
+	spa = dev->si_drv1;
+	if (spa->spa_obj == NULL)
+		return (ENXIO);
+	if (*offset >= spa->spa_len || *offset + size < *offset ||
+	    *offset + size > spa->spa_len)
+		return (EINVAL);
+	vm_object_reference(spa->spa_obj);
+	*objp = spa->spa_obj;
+	return (0);
+}
+
+static struct cdevsw spa_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_flags =	D_DISK,
+	.d_name =	"nvdimm_spa",
+	.d_read =	nvdimm_spa_rw,
+	.d_write =	nvdimm_spa_rw,
+	.d_ioctl =	nvdimm_spa_ioctl,
+	.d_mmap_single = nvdimm_spa_mmap_single,
+};
+
+static void
+nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, struct bio *bp,
+    int rw)
+{
+	struct vm_page maa[bp->bio_ma_n];
+	vm_page_t ma[bp->bio_ma_n];
+	vm_memattr_t mattr;
+	int i;
+
+	mattr = nvdimm_spa_memattr(spa);
+	for (i = 0; i < nitems(ma); i++) {
+		maa[i].flags = 0;
+		vm_page_initfake(&maa[i], spa->spa_phys_base +
+		    trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
+		ma[i] = &maa[i];
+	}
+	if (rw == BIO_READ)
+		pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma,
+		    bp->bio_ma_offset, bp->bio_length);
+	else
+		pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma,
+		    bp->bio_offset & PAGE_MASK, bp->bio_length);
+}
+
+static void
+nvdimm_spa_g_thread(void *arg)
+{
+	struct SPA_mapping *spa;
+	struct bio *bp;
+	struct uio auio;
+	struct iovec aiovec;
+	int error;
+
+	spa = arg;
+	for (;;) {
+		mtx_lock(&spa->spa_g_mtx);
+		for (;;) {
+			bp = bioq_takefirst(&spa->spa_g_queue);
+			if (bp != NULL)
+				break;
+			msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO,
+			    "spa_g", 0);
+			if (!spa->spa_g_proc_run) {
+				spa->spa_g_proc_exiting = true;
+				wakeup(&spa->spa_g_queue);
+				mtx_unlock(&spa->spa_g_mtx);
+				kproc_exit(0);
+			}
+			continue;
+		}
+		mtx_unlock(&spa->spa_g_mtx);
+		if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
+		    bp->bio_cmd != BIO_FLUSH) {
+			error = EOPNOTSUPP;
+			goto completed;
+		}
+
+		error = 0;
+		if (bp->bio_cmd == BIO_FLUSH) {
+			if (spa->spa_kva != NULL) {
+				pmap_large_map_wb(spa->spa_kva, spa->spa_len);
+			} else {
+				pmap_flush_cache_phys_range(
+				    (vm_paddr_t)spa->spa_phys_base,
+				    (vm_paddr_t)spa->spa_phys_base +
+				    spa->spa_len, nvdimm_spa_memattr(spa));
+			}
+			/*
+			 * XXX flush IMC
+			 */
+			goto completed;
+		}
+		
+		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+			if (spa->spa_kva != NULL) {
+				aiovec.iov_base = (char *)spa->spa_kva +
+				    bp->bio_offset;
+				aiovec.iov_len = bp->bio_length;
+				auio.uio_iov = &aiovec;
+				auio.uio_iovcnt = 1;
+				auio.uio_resid = bp->bio_length;
+				auio.uio_offset = bp->bio_offset;
+				auio.uio_segflg = UIO_SYSSPACE;
+				auio.uio_rw = bp->bio_cmd == BIO_READ ?
+				    UIO_WRITE : UIO_READ;
+				auio.uio_td = curthread;
+				error = uiomove_fromphys(bp->bio_ma,
+				    bp->bio_ma_offset, bp->bio_length, &auio);
+			} else {
+				nvdimm_spa_g_all_unmapped(spa, bp, bp->bio_cmd);
+				error = 0;
+			}
+		} else {
+			aiovec.iov_base = bp->bio_data;
+			aiovec.iov_len = bp->bio_length;
+			auio.uio_iov = &aiovec;
+			auio.uio_iovcnt = 1;
+			auio.uio_resid = bp->bio_length;
+			auio.uio_offset = bp->bio_offset;
+			auio.uio_segflg = UIO_SYSSPACE;
+			auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
+			    UIO_WRITE;
+			auio.uio_td = curthread;
+			error = nvdimm_spa_uio(spa, &auio);
+		}
+		devstat_end_transaction_bio(spa->spa_g_devstat, bp);
+completed:
+		bp->bio_completed = bp->bio_length;
+		g_io_deliver(bp, error);
+	}
+}
+
+static void
+nvdimm_spa_g_start(struct bio *bp)
+{
+	struct SPA_mapping *spa;
+
+	spa = bp->bio_to->geom->softc;
+	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
+		mtx_lock(&spa->spa_g_stat_mtx);
+		devstat_start_transaction_bio(spa->spa_g_devstat, bp);
+		mtx_unlock(&spa->spa_g_stat_mtx);
+	}
+	mtx_lock(&spa->spa_g_mtx);
+	bioq_disksort(&spa->spa_g_queue, bp);
+	wakeup(&spa->spa_g_queue);
+	mtx_unlock(&spa->spa_g_mtx);
+}
+
+static int
+nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e)
+{
+
+	return (0);
+}
+
+static g_init_t nvdimm_spa_g_init;
+static g_fini_t nvdimm_spa_g_fini;
+
+struct g_class nvdimm_spa_g_class = {
+	.name =		"SPA",
+	.version =	G_VERSION,
+	.start =	nvdimm_spa_g_start,
+	.access =	nvdimm_spa_g_access,
+	.init =		nvdimm_spa_g_init,
+	.fini =		nvdimm_spa_g_fini,
+};
+DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
+
+static int
+nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
+    int spa_type)
+{
+	struct make_dev_args mda;
+	struct sglist *spa_sg;
+	int error, error1;
+
+	spa->spa_type = spa_type;
+	spa->spa_domain = ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
+	    nfitaddr->ProximityDomain : -1;
+	spa->spa_nfit_idx = nfitaddr->RangeIndex;
+	spa->spa_phys_base = nfitaddr->Address;
+	spa->spa_len = nfitaddr->Length;
+	spa->spa_efi_mem_flags = nfitaddr->MemoryMapping;
+	if (bootverbose) {
+		printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
+		    spa->spa_nfit_idx,
+		    (uintmax_t)spa->spa_phys_base, (uintmax_t)spa->spa_len,
+		    nvdimm_SPA_uuid_list[spa_type].u_name,
+		    spa->spa_efi_mem_flags);
+	}
+	if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
+		return (0);
+
+	error1 = pmap_large_map(spa->spa_phys_base, spa->spa_len,
+	    &spa->spa_kva, nvdimm_spa_memattr(spa));
+	if (error1 != 0) {
+		printf("NVDIMM SPA%d cannot map into KVA, error %d\n",
+		    spa->spa_nfit_idx, error1);
+		spa->spa_kva = NULL;
+	}
+
+	spa_sg = sglist_alloc(1, M_WAITOK);
+	error = sglist_append_phys(spa_sg, spa->spa_phys_base,
+	    spa->spa_len);
+	if (error == 0) {
+		spa->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, spa->spa_len,
+		    VM_PROT_ALL, 0, NULL);
+		if (spa->spa_obj == NULL) {
+			printf("NVDIMM SPA%d failed to alloc vm object",
+			    spa->spa_nfit_idx);
+			sglist_free(spa_sg);
+		}
+	} else {
+		printf("NVDIMM SPA%d failed to init sglist, error %d",
+		    spa->spa_nfit_idx, error);
+		sglist_free(spa_sg);
+	}
+
+	make_dev_args_init(&mda);
+	mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
+	mda.mda_devsw = &spa_cdevsw;
+	mda.mda_cr = NULL;
+	mda.mda_uid = UID_ROOT;
+	mda.mda_gid = GID_OPERATOR;
+	mda.mda_mode = 0660;
+	mda.mda_si_drv1 = spa;
+	error = make_dev_s(&mda, &spa->spa_dev, "nvdimm_spa%d",
+	    spa->spa_nfit_idx);
+	if (error != 0) {
+		printf("NVDIMM SPA%d cannot create devfs node, error %d\n",
+		    spa->spa_nfit_idx, error);
+		if (error1 == 0)
+			error1 = error;
+	}
+
+	bioq_init(&spa->spa_g_queue);
+	mtx_init(&spa->spa_g_mtx, "spag", NULL, MTX_DEF);
+	mtx_init(&spa->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
+	spa->spa_g_proc_run = true;
+	spa->spa_g_proc_exiting = false;
+	error = kproc_create(nvdimm_spa_g_thread, spa, &spa->spa_g_proc, 0, 0,
+	    "g_spa%d", spa->spa_nfit_idx);
+	if (error != 0) {
+		printf("NVDIMM SPA%d cannot create geom worker, error %d\n",
+		    spa->spa_nfit_idx, error);
+		if (error1 == 0)
+			error1 = error;
+	} else {
+		g_topology_assert();
+		spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d",
+		    spa->spa_nfit_idx);
+		spa->spa_g->softc = spa;
+		spa->spa_p = g_new_providerf(spa->spa_g, "spa%d",
+		    spa->spa_nfit_idx);
+		spa->spa_p->mediasize = spa->spa_len;
+		spa->spa_p->sectorsize = DEV_BSIZE;
+		spa->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
+		    G_PF_ACCEPT_UNMAPPED;
+		g_error_provider(spa->spa_p, 0);
+		spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx,
+		    DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
+		    DEVSTAT_PRIORITY_MAX);
+	}
+	return (error1);
+}
+
+static void
+nvdimm_spa_fini_one(struct SPA_mapping *spa)
+{
+
+	mtx_lock(&spa->spa_g_mtx);
+	spa->spa_g_proc_run = false;
+	wakeup(&spa->spa_g_queue);
+	while (!spa->spa_g_proc_exiting)
+		msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, "spa_e", 0);
+	mtx_unlock(&spa->spa_g_mtx);
+	if (spa->spa_g != NULL) {
+		g_topology_lock();
+		g_wither_geom(spa->spa_g, ENXIO);
+		g_topology_unlock();
+		spa->spa_g = NULL;
+		spa->spa_p = NULL;
+	}
+	if (spa->spa_g_devstat != NULL) {
+		devstat_remove_entry(spa->spa_g_devstat);
+		spa->spa_g_devstat = NULL;
+	}
+	if (spa->spa_dev != NULL) {
+		destroy_dev(spa->spa_dev);
+		spa->spa_dev = NULL;
+	}
+	vm_object_deallocate(spa->spa_obj);
+	if (spa->spa_kva != NULL) {
+		pmap_large_unmap(spa->spa_kva, spa->spa_len);
+		spa->spa_kva = NULL;
+	}
+	mtx_destroy(&spa->spa_g_mtx);
+	mtx_destroy(&spa->spa_g_stat_mtx);
+}
+
+static int
+nvdimm_spa_parse(void *nfitsubtbl, void *arg)
+{
+	ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr;
+	struct SPA_mapping *spa;
+	int error, *i, j;
+
+	i = arg;
+	spa = &spa_mappings[*i];
+	nfitaddr = nfitsubtbl;
+
+	for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
+		/* XXXKIB: is ACPI UUID representation compatible ? */
+		if (uuidcmp((struct uuid *)&nfitaddr->RangeGuid,
+		    &nvdimm_SPA_uuid_list[j].u_id) != 0)
+			continue;
+		error = nvdimm_spa_init_one(spa, nfitaddr, j);
+		if (error != 0)
+			nvdimm_spa_fini_one(spa);
+		break;
+	}
+	if (j == nitems(nvdimm_SPA_uuid_list) && bootverbose) {
+		printf("Unknown SPA UUID %d ", nfitaddr->RangeIndex);
+		printf_uuid((struct uuid *)&nfitaddr->RangeGuid);
+		printf("\n");
+	}
+	(*i)++;
+	return (0);
+}
+
+static int
+nvdimm_spa_init1(ACPI_TABLE_NFIT *nfitbl)
+{
+	struct nvdimm_SPA_uuid_list_elm *sle;
+	int error, i;
+
+	for (i = 0; i < nitems(nvdimm_SPA_uuid_list); i++) {
+		sle = &nvdimm_SPA_uuid_list[i];
+		error = parse_uuid(sle->u_id_str, &sle->u_id);
+		if (error != 0) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201810162012.w9GKCaWb016859>