From owner-svn-src-stable@FreeBSD.ORG Sun Nov 3 20:50:49 2013 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id CD70DBB6; Sun, 3 Nov 2013 20:50:49 +0000 (UTC) (envelope-from jimharris@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id B8AC521DB; Sun, 3 Nov 2013 20:50:49 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id rA3Kon7j034347; Sun, 3 Nov 2013 20:50:49 GMT (envelope-from jimharris@svn.freebsd.org) Received: (from jimharris@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id rA3KomWb034340; Sun, 3 Nov 2013 20:50:48 GMT (envelope-from jimharris@svn.freebsd.org) Message-Id: <201311032050.rA3KomWb034340@svn.freebsd.org> From: Jim Harris Date: Sun, 3 Nov 2013 20:50:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r257587 - in stable/9/sys/dev: nvd nvme X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 03 Nov 2013 20:50:49 -0000 Author: jimharris Date: Sun Nov 3 20:50:48 2013 New Revision: 257587 URL: http://svnweb.freebsd.org/changeset/base/257587 Log: MFC r256151: Add driver-assisted striping for upcoming Intel NVMe controllers that can benefit from it. Sponsored by: Intel Modified: stable/9/sys/dev/nvd/nvd.c stable/9/sys/dev/nvme/nvme.h stable/9/sys/dev/nvme/nvme_ns.c stable/9/sys/dev/nvme/nvme_private.h Directory Properties: stable/9/sys/ (props changed) stable/9/sys/dev/ (props changed) Modified: stable/9/sys/dev/nvd/nvd.c ============================================================================== --- stable/9/sys/dev/nvd/nvd.c Sun Nov 3 20:38:51 2013 (r257586) +++ stable/9/sys/dev/nvd/nvd.c Sun Nov 3 20:50:48 2013 (r257587) @@ -187,17 +187,6 @@ nvd_done(void *arg, const struct nvme_co atomic_add_int(&ndisk->cur_depth, -1); - /* - * TODO: add more extensive translation of NVMe status codes - * to different bio error codes (i.e. EIO, EINVAL, etc.) - */ - if (nvme_completion_is_error(cpl)) { - bp->bio_error = EIO; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - } else - bp->bio_resid = 0; - biodone(bp); } Modified: stable/9/sys/dev/nvme/nvme.h ============================================================================== --- stable/9/sys/dev/nvme/nvme.h Sun Nov 3 20:38:51 2013 (r257586) +++ stable/9/sys/dev/nvme/nvme.h Sun Nov 3 20:50:48 2013 (r257587) @@ -532,7 +532,7 @@ struct nvme_controller_data { uint8_t reserved6[1024]; /* bytes 3072-4095: vendor specific */ - uint8_t reserved7[1024]; + uint8_t vs[1024]; } __packed __aligned(4); struct nvme_namespace_data { Modified: stable/9/sys/dev/nvme/nvme_ns.c ============================================================================== --- stable/9/sys/dev/nvme/nvme_ns.c Sun Nov 3 20:38:51 2013 (r257586) +++ stable/9/sys/dev/nvme/nvme_ns.c Sun Nov 3 20:50:48 2013 (r257587) @@ -34,13 +34,31 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include +#include + #include "nvme_private.h" +static void nvme_bio_child_inbed(struct bio *parent, int bio_error); +static void nvme_bio_child_done(void *arg, + const struct nvme_completion *cpl); +static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, + uint32_t alignment); +static void nvme_free_child_bios(int num_bios, + struct bio **child_bios); +static struct bio ** nvme_allocate_child_bios(int num_bios); +static struct bio ** nvme_construct_child_bios(struct bio *bp, + uint32_t alignment, + int *num_bios); +static int nvme_ns_split_bio(struct nvme_namespace *ns, + struct bio *bp, + uint32_t alignment); + static int nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) @@ -202,18 +220,218 @@ nvme_ns_bio_done(void *arg, const struct if (bp->bio_driver2) free(bp->bio_driver2, M_NVME); + if (nvme_completion_is_error(status)) { + bp->bio_flags |= BIO_ERROR; + if (bp->bio_error == 0) + bp->bio_error = EIO; + } + + if ((bp->bio_flags & BIO_ERROR) == 0) + bp->bio_resid = 0; + else + bp->bio_resid = bp->bio_bcount; + bp_cb_fn(bp, status); } +static void +nvme_bio_child_inbed(struct bio *parent, int bio_error) +{ + struct nvme_completion parent_cpl; + int inbed; + + if (bio_error != 0) { + parent->bio_flags |= BIO_ERROR; + parent->bio_error = bio_error; + } + + /* + * atomic_fetchadd will return value before adding 1, so we still + * must add 1 to get the updated inbed number. + */ + inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; + if (inbed == parent->bio_children) { + bzero(&parent_cpl, sizeof(parent_cpl)); + if (parent->bio_flags & BIO_ERROR) + parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR; + nvme_ns_bio_done(parent, &parent_cpl); + } +} + +static void +nvme_bio_child_done(void *arg, const struct nvme_completion *cpl) +{ + struct bio *child = arg; + struct bio *parent; + int bio_error; + + parent = child->bio_parent; + g_destroy_bio(child); + bio_error = nvme_completion_is_error(cpl) ? EIO : 0; + nvme_bio_child_inbed(parent, bio_error); +} + +static uint32_t +nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align) +{ + uint32_t num_segs, offset, remainder; + + if (align == 0) + return (1); + + KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n")); + + num_segs = size / align; + remainder = size & (align - 1); + offset = addr & (align - 1); + if (remainder > 0 || offset > 0) + num_segs += 1 + (remainder + offset - 1) / align; + return (num_segs); +} + +static void +nvme_free_child_bios(int num_bios, struct bio **child_bios) +{ + int i; + + for (i = 0; i < num_bios; i++) { + if (child_bios[i] != NULL) + g_destroy_bio(child_bios[i]); + } + + free(child_bios, M_NVME); +} + +static struct bio ** +nvme_allocate_child_bios(int num_bios) +{ + struct bio **child_bios; + int err = 0, i; + + child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT); + if (child_bios == NULL) + return (NULL); + + for (i = 0; i < num_bios; i++) { + child_bios[i] = g_new_bio(); + if (child_bios[i] == NULL) + err = ENOMEM; + } + + if (err == ENOMEM) { + nvme_free_child_bios(num_bios, child_bios); + return (NULL); + } + + return (child_bios); +} + +static struct bio ** +nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios) +{ + struct bio **child_bios; + struct bio *child; + uint64_t cur_offset; + caddr_t data; + uint32_t rem_bcount; + int i; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + struct vm_page **ma; + uint32_t ma_offset; +#endif + + *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount, + alignment); + child_bios = nvme_allocate_child_bios(*num_bios); + if (child_bios == NULL) + return (NULL); + + bp->bio_children = *num_bios; + bp->bio_inbed = 0; + cur_offset = bp->bio_offset; + rem_bcount = bp->bio_bcount; + data = bp->bio_data; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + ma_offset = bp->bio_ma_offset; + ma = bp->bio_ma; +#endif + + for (i = 0; i < *num_bios; i++) { + child = child_bios[i]; + child->bio_parent = bp; + child->bio_cmd = bp->bio_cmd; + child->bio_offset = cur_offset; + child->bio_bcount = min(rem_bcount, + alignment - (cur_offset & (alignment - 1))); + child->bio_flags = bp->bio_flags; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + if (bp->bio_flags & BIO_UNMAPPED) { + child->bio_ma_offset = ma_offset; + child->bio_ma = ma; + child->bio_ma_n = + nvme_get_num_segments(child->bio_ma_offset, + child->bio_bcount, PAGE_SIZE); + ma_offset = (ma_offset + child->bio_bcount) & + PAGE_MASK; + ma += child->bio_ma_n; + if (ma_offset != 0) + ma -= 1; + } else +#endif + { + child->bio_data = data; + data += child->bio_bcount; + } + cur_offset += child->bio_bcount; + rem_bcount -= child->bio_bcount; + } + + return (child_bios); +} + +static int +nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp, + uint32_t alignment) +{ + struct bio *child; + struct bio **child_bios; + int err, i, num_bios; + + child_bios = nvme_construct_child_bios(bp, alignment, &num_bios); + if (child_bios == NULL) + return (ENOMEM); + + for (i = 0; i < num_bios; i++) { + child = child_bios[i]; + err = nvme_ns_bio_process(ns, child, nvme_bio_child_done); + if (err != 0) { + nvme_bio_child_inbed(bp, err); + g_destroy_bio(child); + } + } + + free(child_bios, M_NVME); + return (0); +} + int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn) { struct nvme_dsm_range *dsm_range; + uint32_t num_bios; int err; bp->bio_driver1 = cb_fn; + if (ns->stripesize > 0 && + (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { + num_bios = nvme_get_num_segments(bp->bio_offset, + bp->bio_bcount, ns->stripesize); + if (num_bios > 1) + return (nvme_ns_split_bio(ns, bp, ns->stripesize)); + } + switch (bp->bio_cmd) { case BIO_READ: err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); @@ -276,6 +494,11 @@ nvme_ns_construct(struct nvme_namespace ns->ctrlr = ctrlr; ns->id = id; + ns->stripesize = 0; + + if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0) + ns->stripesize = + (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size; /* * Namespaces are reconstructed after a controller reset, so check Modified: stable/9/sys/dev/nvme/nvme_private.h ============================================================================== --- stable/9/sys/dev/nvme/nvme_private.h Sun Nov 3 20:38:51 2013 (r257586) +++ stable/9/sys/dev/nvme/nvme_private.h Sun Nov 3 20:50:48 2013 (r257587) @@ -238,6 +238,7 @@ struct nvme_namespace { uint16_t flags; struct cdev *cdev; void *cons_cookie[NVME_MAX_CONSUMERS]; + uint32_t stripesize; struct mtx lock; };