From owner-svn-src-all@FreeBSD.ORG Thu Jan 1 01:43:01 2015 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 8254EFF7; Thu, 1 Jan 2015 01:43:01 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6C78764CC2; Thu, 1 Jan 2015 01:43:01 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t011h1EF001381; Thu, 1 Jan 2015 01:43:01 GMT (envelope-from bryanv@FreeBSD.org) Received: (from bryanv@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t011h1qs001380; Thu, 1 Jan 2015 01:43:01 GMT (envelope-from bryanv@FreeBSD.org) Message-Id: <201501010143.t011h1qs001380@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: bryanv set sender to bryanv@FreeBSD.org using -f From: Bryan Venteicher Date: Thu, 1 Jan 2015 01:43:01 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r276487 - stable/10/sys/dev/virtio/block X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Jan 2015 01:43:01 -0000 Author: bryanv Date: Thu Jan 1 01:43:00 2015 New Revision: 276487 URL: https://svnweb.freebsd.org/changeset/base/276487 Log: MFC r275335: Cleanup and performance improvement of the virtio_blk driver - Add support for GEOM direct completion. Depending on the benchmark, this tends to give a ~30% improvement w.r.t IOPs and BW. - Remove an invariants check in the strategy routine. This assertion is caught later on by an existing panic. - Rename and resort various related functions to make more sense. Modified: stable/10/sys/dev/virtio/block/virtio_blk.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/dev/virtio/block/virtio_blk.c ============================================================================== --- stable/10/sys/dev/virtio/block/virtio_blk.c Wed Dec 31 23:25:37 2014 (r276486) +++ stable/10/sys/dev/virtio/block/virtio_blk.c Thu Jan 1 01:43:00 2015 (r276487) @@ -58,7 +58,6 @@ struct vtblk_request { struct virtio_blk_outhdr vbr_hdr; struct bio *vbr_bp; uint8_t vbr_ack; - TAILQ_ENTRY(vtblk_request) vbr_link; }; @@ -132,53 +131,60 @@ static int vtblk_dump(void *, void *, vm static void vtblk_strategy(struct bio *); static void vtblk_negotiate_features(struct vtblk_softc *); +static void vtblk_setup_features(struct vtblk_softc *); static int vtblk_maximum_segments(struct vtblk_softc *, struct virtio_blk_config *); static int vtblk_alloc_virtqueue(struct vtblk_softc *); static void vtblk_resize_disk(struct vtblk_softc *, uint64_t); -static void vtblk_set_write_cache(struct vtblk_softc *, int); -static int vtblk_write_cache_enabled(struct vtblk_softc *sc, - struct virtio_blk_config *); -static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); static void vtblk_alloc_disk(struct vtblk_softc *, struct virtio_blk_config *); static void vtblk_create_disk(struct vtblk_softc *); -static int vtblk_quiesce(struct vtblk_softc *); -static void vtblk_startio(struct vtblk_softc *); -static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); -static int vtblk_execute_request(struct vtblk_softc *, +static int vtblk_request_prealloc(struct vtblk_softc *); +static void vtblk_request_free(struct vtblk_softc *); +static struct vtblk_request * + vtblk_request_dequeue(struct vtblk_softc *); +static void vtblk_request_enqueue(struct vtblk_softc *, struct vtblk_request *); +static struct vtblk_request * + vtblk_request_next_ready(struct vtblk_softc *); +static void vtblk_request_requeue_ready(struct vtblk_softc *, + struct vtblk_request *); +static struct vtblk_request * + vtblk_request_next(struct vtblk_softc *); +static struct vtblk_request * + vtblk_request_bio(struct vtblk_softc *); +static int vtblk_request_execute(struct vtblk_softc *, + struct vtblk_request *); +static int vtblk_request_error(struct vtblk_request *); -static void vtblk_vq_intr(void *); +static void vtblk_queue_completed(struct vtblk_softc *, + struct bio_queue *); +static void vtblk_done_completed(struct vtblk_softc *, + struct bio_queue *); +static void vtblk_drain_vq(struct vtblk_softc *, int); +static void vtblk_drain(struct vtblk_softc *); -static void vtblk_stop(struct vtblk_softc *); +static void vtblk_startio(struct vtblk_softc *); +static void vtblk_bio_done(struct vtblk_softc *, struct bio *, int); static void vtblk_read_config(struct vtblk_softc *, struct virtio_blk_config *); -static void vtblk_get_ident(struct vtblk_softc *); -static void vtblk_prepare_dump(struct vtblk_softc *); -static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); -static int vtblk_flush_dump(struct vtblk_softc *); +static void vtblk_ident(struct vtblk_softc *); static int vtblk_poll_request(struct vtblk_softc *, struct vtblk_request *); +static int vtblk_quiesce(struct vtblk_softc *); +static void vtblk_vq_intr(void *); +static void vtblk_stop(struct vtblk_softc *); -static void vtblk_finish_completed(struct vtblk_softc *); -static void vtblk_drain_vq(struct vtblk_softc *, int); -static void vtblk_drain(struct vtblk_softc *); - -static int vtblk_alloc_requests(struct vtblk_softc *); -static void vtblk_free_requests(struct vtblk_softc *); -static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); -static void vtblk_enqueue_request(struct vtblk_softc *, - struct vtblk_request *); - -static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); -static void vtblk_enqueue_ready(struct vtblk_softc *, - struct vtblk_request *); +static void vtblk_dump_prepare(struct vtblk_softc *); +static int vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t); +static int vtblk_dump_flush(struct vtblk_softc *); -static int vtblk_request_error(struct vtblk_request *); -static void vtblk_finish_bio(struct bio *, int); +static void vtblk_set_write_cache(struct vtblk_softc *, int); +static int vtblk_write_cache_enabled(struct vtblk_softc *sc, + struct virtio_blk_config *); +static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); static void vtblk_setup_sysctl(struct vtblk_softc *); static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); @@ -288,30 +294,18 @@ vtblk_attach(device_t dev) struct virtio_blk_config blkcfg; int error; + virtio_set_feature_desc(dev, vtblk_feature_desc); + sc = device_get_softc(dev); sc->vtblk_dev = dev; - VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); - bioq_init(&sc->vtblk_bioq); TAILQ_INIT(&sc->vtblk_req_free); TAILQ_INIT(&sc->vtblk_req_ready); - virtio_set_feature_desc(dev, vtblk_feature_desc); - vtblk_negotiate_features(sc); - - if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) - sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; - if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) - sc->vtblk_flags |= VTBLK_FLAG_READONLY; - if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) - sc->vtblk_flags |= VTBLK_FLAG_BARRIER; - if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) - sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; - vtblk_setup_sysctl(sc); + vtblk_setup_features(sc); - /* Get local copy of config. */ vtblk_read_config(sc, &blkcfg); /* @@ -350,7 +344,7 @@ vtblk_attach(device_t dev) goto fail; } - error = vtblk_alloc_requests(sc); + error = vtblk_request_prealloc(sc); if (error) { device_printf(dev, "cannot preallocate requests\n"); goto fail; @@ -517,14 +511,14 @@ vtblk_dump(void *arg, void *virtual, vm_ VTBLK_LOCK(sc); if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { - vtblk_prepare_dump(sc); + vtblk_dump_prepare(sc); sc->vtblk_flags |= VTBLK_FLAG_DUMPING; } if (length > 0) - error = vtblk_write_dump(sc, virtual, offset, length); + error = vtblk_dump_write(sc, virtual, offset, length); else if (virtual == NULL && offset == 0) - error = vtblk_flush_dump(sc); + error = vtblk_dump_flush(sc); else { error = EINVAL; sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; @@ -541,7 +535,7 @@ vtblk_strategy(struct bio *bp) struct vtblk_softc *sc; if ((sc = bp->bio_disk->d_drv1) == NULL) { - vtblk_finish_bio(bp, EINVAL); + vtblk_bio_done(NULL, bp, EINVAL); return; } @@ -551,37 +545,21 @@ vtblk_strategy(struct bio *bp) */ if (sc->vtblk_flags & VTBLK_FLAG_READONLY && (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { - vtblk_finish_bio(bp, EROFS); + vtblk_bio_done(sc, bp, EROFS); return; } -#ifdef INVARIANTS - /* - * Prevent read/write buffers spanning too many segments from - * getting into the queue. This should only trip if d_maxsize - * was incorrectly set. - */ - if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - int nsegs, max_nsegs; - - nsegs = sglist_count(bp->bio_data, bp->bio_bcount); - max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; + VTBLK_LOCK(sc); - KASSERT(nsegs <= max_nsegs, - ("%s: bio %p spanned too many segments: %d, max: %d", - __func__, bp, nsegs, max_nsegs)); + if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { + VTBLK_UNLOCK(sc); + vtblk_bio_done(sc, bp, ENXIO); + return; } -#endif - VTBLK_LOCK(sc); - if (sc->vtblk_flags & VTBLK_FLAG_DETACH) - vtblk_finish_bio(bp, ENXIO); - else { - bioq_insert_tail(&sc->vtblk_bioq, bp); + bioq_insert_tail(&sc->vtblk_bioq, bp); + vtblk_startio(sc); - if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) - vtblk_startio(sc); - } VTBLK_UNLOCK(sc); } @@ -597,6 +575,25 @@ vtblk_negotiate_features(struct vtblk_so sc->vtblk_features = virtio_negotiate_features(dev, features); } +static void +vtblk_setup_features(struct vtblk_softc *sc) +{ + device_t dev; + + dev = sc->vtblk_dev; + + vtblk_negotiate_features(sc); + + if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) + sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; + if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) + sc->vtblk_flags |= VTBLK_FLAG_READONLY; + if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) + sc->vtblk_flags |= VTBLK_FLAG_BARRIER; + if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) + sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; +} + static int vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) @@ -658,59 +655,6 @@ vtblk_resize_disk(struct vtblk_softc *sc } static void -vtblk_set_write_cache(struct vtblk_softc *sc, int wc) -{ - - /* Set either writeback (1) or writethrough (0) mode. */ - virtio_write_dev_config_1(sc->vtblk_dev, - offsetof(struct virtio_blk_config, writeback), wc); -} - -static int -vtblk_write_cache_enabled(struct vtblk_softc *sc, - struct virtio_blk_config *blkcfg) -{ - int wc; - - if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { - wc = vtblk_tunable_int(sc, "writecache_mode", - vtblk_writecache_mode); - if (wc >= 0 && wc < VTBLK_CACHE_MAX) - vtblk_set_write_cache(sc, wc); - else - wc = blkcfg->writeback; - } else - wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); - - return (wc); -} - -static int -vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS) -{ - struct vtblk_softc *sc; - int wc, error; - - sc = oidp->oid_arg1; - wc = sc->vtblk_write_cache; - - error = sysctl_handle_int(oidp, &wc, 0, req); - if (error || req->newptr == NULL) - return (error); - if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) - return (EPERM); - if (wc < 0 || wc >= VTBLK_CACHE_MAX) - return (EINVAL); - - VTBLK_LOCK(sc); - sc->vtblk_write_cache = wc; - vtblk_set_write_cache(sc, sc->vtblk_write_cache); - VTBLK_UNLOCK(sc); - - return (0); -} - -static void vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) { device_t dev; @@ -726,7 +670,8 @@ vtblk_alloc_disk(struct vtblk_softc *sc, dp->d_name = VTBLK_DISK_NAME; dp->d_unit = device_get_unit(dev); dp->d_drv1 = sc; - dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO; + dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO | + DISKFLAG_DIRECT_COMPLETION; dp->d_hba_vendor = virtio_get_vendor(dev); dp->d_hba_device = virtio_get_device(dev); dp->d_hba_subvendor = virtio_get_subvendor(dev); @@ -787,11 +732,7 @@ vtblk_create_disk(struct vtblk_softc *sc dp = sc->vtblk_disk; - /* - * Retrieving the identification string must be done after - * the virtqueue interrupt is setup otherwise it will hang. - */ - vtblk_get_ident(sc); + vtblk_ident(sc); device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n", (uintmax_t) dp->d_mediasize >> 20, @@ -802,57 +743,107 @@ vtblk_create_disk(struct vtblk_softc *sc } static int -vtblk_quiesce(struct vtblk_softc *sc) +vtblk_request_prealloc(struct vtblk_softc *sc) { - int error; + struct vtblk_request *req; + int i, nreqs; - error = 0; + nreqs = virtqueue_size(sc->vtblk_vq); - VTBLK_LOCK_ASSERT(sc); + /* + * Preallocate sufficient requests to keep the virtqueue full. Each + * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce + * the number allocated when indirect descriptors are not available. + */ + if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) + nreqs /= VTBLK_MIN_SEGMENTS; - while (!virtqueue_empty(sc->vtblk_vq)) { - if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq", - VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) { - error = EBUSY; - break; - } + for (i = 0; i < nreqs; i++) { + req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT); + if (req == NULL) + return (ENOMEM); + + MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1); + MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1); + + sc->vtblk_request_count++; + vtblk_request_enqueue(sc, req); } - return (error); + return (0); } static void -vtblk_startio(struct vtblk_softc *sc) +vtblk_request_free(struct vtblk_softc *sc) { - struct virtqueue *vq; struct vtblk_request *req; - int enq; - vq = sc->vtblk_vq; - enq = 0; + MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready)); - VTBLK_LOCK_ASSERT(sc); + while ((req = vtblk_request_dequeue(sc)) != NULL) { + sc->vtblk_request_count--; + free(req, M_DEVBUF); + } - while (!virtqueue_full(vq)) { - if ((req = vtblk_dequeue_ready(sc)) == NULL) - req = vtblk_bio_request(sc); - if (req == NULL) - break; + KASSERT(sc->vtblk_request_count == 0, + ("%s: leaked %d requests", __func__, sc->vtblk_request_count)); +} - if (vtblk_execute_request(sc, req) != 0) { - vtblk_enqueue_ready(sc, req); - break; - } +static struct vtblk_request * +vtblk_request_dequeue(struct vtblk_softc *sc) +{ + struct vtblk_request *req; - enq++; + req = TAILQ_FIRST(&sc->vtblk_req_free); + if (req != NULL) { + TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); + bzero(req, sizeof(struct vtblk_request)); } - if (enq > 0) - virtqueue_notify(vq); + return (req); +} + +static void +vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req) +{ + + TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); +} + +static struct vtblk_request * +vtblk_request_next_ready(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + + req = TAILQ_FIRST(&sc->vtblk_req_ready); + if (req != NULL) + TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); + + return (req); +} + +static void +vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req) +{ + + /* NOTE: Currently, there will be at most one request in the queue. */ + TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); +} + +static struct vtblk_request * +vtblk_request_next(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + + req = vtblk_request_next_ready(sc); + if (req != NULL) + return (req); + + return (vtblk_request_bio(sc)); } static struct vtblk_request * -vtblk_bio_request(struct vtblk_softc *sc) +vtblk_request_bio(struct vtblk_softc *sc) { struct bio_queue_head *bioq; struct vtblk_request *req; @@ -863,7 +854,7 @@ vtblk_bio_request(struct vtblk_softc *sc if (bioq_first(bioq) == NULL) return (NULL); - req = vtblk_dequeue_request(sc); + req = vtblk_request_dequeue(sc); if (req == NULL) return (NULL); @@ -888,11 +879,14 @@ vtblk_bio_request(struct vtblk_softc *sc panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd); } + if (bp->bio_flags & BIO_ORDERED) + req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; + return (req); } static int -vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) +vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req) { struct virtqueue *vq; struct sglist *sg; @@ -905,26 +899,20 @@ vtblk_execute_request(struct vtblk_softc ordered = 0; writable = 0; - VTBLK_LOCK_ASSERT(sc); - /* - * Wait until the ordered request completes before - * executing subsequent requests. + * Some hosts (such as bhyve) do not implement the barrier feature, + * so we emulate it in the driver by allowing the barrier request + * to be the only one in flight. */ - if (sc->vtblk_req_ordered != NULL) - return (EBUSY); - - if (bp->bio_flags & BIO_ORDERED) { - if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) { - /* - * This request will be executed once all - * the in-flight requests are completed. - */ + if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) { + if (sc->vtblk_req_ordered != NULL) + return (EBUSY); + if (bp->bio_flags & BIO_ORDERED) { if (!virtqueue_empty(vq)) return (EBUSY); ordered = 1; - } else - req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; + req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER; + } } sglist_reset(sg); @@ -933,7 +921,7 @@ vtblk_execute_request(struct vtblk_softc if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { error = sglist_append_bio(sg, bp); if (error || sg->sg_nseg == sg->sg_maxseg) { - panic("%s: data buffer too big bio:%p error:%d", + panic("%s: bio %p data buffer too big %d", __func__, bp, error); } @@ -953,44 +941,156 @@ vtblk_execute_request(struct vtblk_softc return (error); } -static void -vtblk_vq_intr(void *xsc) +static int +vtblk_request_error(struct vtblk_request *req) { - struct vtblk_softc *sc; - struct virtqueue *vq; - - sc = xsc; - vq = sc->vtblk_vq; + int error; -again: - VTBLK_LOCK(sc); - if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { - VTBLK_UNLOCK(sc); - return; + switch (req->vbr_ack) { + case VIRTIO_BLK_S_OK: + error = 0; + break; + case VIRTIO_BLK_S_UNSUPP: + error = ENOTSUP; + break; + default: + error = EIO; + break; } - vtblk_finish_completed(sc); + return (error); +} - if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) - vtblk_startio(sc); - else - wakeup(&sc->vtblk_vq); +static void +vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue) +{ + struct vtblk_request *req; + struct bio *bp; - if (virtqueue_enable_intr(vq) != 0) { - virtqueue_disable_intr(vq); - VTBLK_UNLOCK(sc); - goto again; + while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) { + if (sc->vtblk_req_ordered != NULL) { + MPASS(sc->vtblk_req_ordered == req); + sc->vtblk_req_ordered = NULL; + } + + bp = req->vbr_bp; + bp->bio_error = vtblk_request_error(req); + TAILQ_INSERT_TAIL(queue, bp, bio_queue); + + vtblk_request_enqueue(sc, req); } +} - VTBLK_UNLOCK(sc); +static void +vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue) +{ + struct bio *bp, *tmp; + + TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) { + if (bp->bio_error != 0) + disk_err(bp, "hard error", -1, 1); + vtblk_bio_done(sc, bp, bp->bio_error); + } } static void -vtblk_stop(struct vtblk_softc *sc) +vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) { + struct virtqueue *vq; + struct vtblk_request *req; + int last; - virtqueue_disable_intr(sc->vtblk_vq); - virtio_stop(sc->vtblk_dev); + vq = sc->vtblk_vq; + last = 0; + + while ((req = virtqueue_drain(vq, &last)) != NULL) { + if (!skip_done) + vtblk_bio_done(sc, req->vbr_bp, ENXIO); + + vtblk_request_enqueue(sc, req); + } + + sc->vtblk_req_ordered = NULL; + KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); +} + +static void +vtblk_drain(struct vtblk_softc *sc) +{ + struct bio_queue queue; + struct bio_queue_head *bioq; + struct vtblk_request *req; + struct bio *bp; + + bioq = &sc->vtblk_bioq; + TAILQ_INIT(&queue); + + if (sc->vtblk_vq != NULL) { + vtblk_queue_completed(sc, &queue); + vtblk_done_completed(sc, &queue); + + vtblk_drain_vq(sc, 0); + } + + while ((req = vtblk_request_next_ready(sc)) != NULL) { + vtblk_bio_done(sc, req->vbr_bp, ENXIO); + vtblk_request_enqueue(sc, req); + } + + while (bioq_first(bioq) != NULL) { + bp = bioq_takefirst(bioq); + vtblk_bio_done(sc, bp, ENXIO); + } + + vtblk_request_free(sc); +} + +static void +vtblk_startio(struct vtblk_softc *sc) +{ + struct virtqueue *vq; + struct vtblk_request *req; + int enq; + + VTBLK_LOCK_ASSERT(sc); + vq = sc->vtblk_vq; + enq = 0; + + if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) + return; + + while (!virtqueue_full(vq)) { + req = vtblk_request_next(sc); + if (req == NULL) + break; + + if (vtblk_request_execute(sc, req) != 0) { + vtblk_request_requeue_ready(sc, req); + break; + } + + enq++; + } + + if (enq > 0) + virtqueue_notify(vq); +} + +static void +vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error) +{ + + /* Because of GEOM direct dispatch, we cannot hold any locks. */ + if (sc != NULL) + VTBLK_LOCK_ASSERT_NOTOWNED(sc); + + if (error) { + bp->bio_resid = bp->bio_bcount; + bp->bio_error = error; + bp->bio_flags |= BIO_ERROR; + } + + biodone(bp); } #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg) \ @@ -1025,7 +1125,7 @@ vtblk_read_config(struct vtblk_softc *sc #undef VTBLK_GET_CONFIG static void -vtblk_get_ident(struct vtblk_softc *sc) +vtblk_ident(struct vtblk_softc *sc) { struct bio buf; struct disk *dp; @@ -1038,7 +1138,7 @@ vtblk_get_ident(struct vtblk_softc *sc) if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0) return; - req = vtblk_dequeue_request(sc); + req = vtblk_request_dequeue(sc); if (req == NULL) return; @@ -1058,7 +1158,7 @@ vtblk_get_ident(struct vtblk_softc *sc) error = vtblk_poll_request(sc, req); VTBLK_UNLOCK(sc); - vtblk_enqueue_request(sc, req); + vtblk_request_enqueue(sc, req); if (error) { device_printf(sc->vtblk_dev, @@ -1066,77 +1166,6 @@ vtblk_get_ident(struct vtblk_softc *sc) } } -static void -vtblk_prepare_dump(struct vtblk_softc *sc) -{ - device_t dev; - struct virtqueue *vq; - - dev = sc->vtblk_dev; - vq = sc->vtblk_vq; - - vtblk_stop(sc); - - /* - * Drain all requests caught in-flight in the virtqueue, - * skipping biodone(). When dumping, only one request is - * outstanding at a time, and we just poll the virtqueue - * for the response. - */ - vtblk_drain_vq(sc, 1); - - if (virtio_reinit(dev, sc->vtblk_features) != 0) { - panic("%s: cannot reinit VirtIO block device during dump", - device_get_nameunit(dev)); - } - - virtqueue_disable_intr(vq); - virtio_reinit_complete(dev); -} - -static int -vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, - size_t length) -{ - struct bio buf; - struct vtblk_request *req; - - req = &sc->vtblk_dump_request; - req->vbr_ack = -1; - req->vbr_hdr.type = VIRTIO_BLK_T_OUT; - req->vbr_hdr.ioprio = 1; - req->vbr_hdr.sector = offset / 512; - - req->vbr_bp = &buf; - bzero(&buf, sizeof(struct bio)); - - buf.bio_cmd = BIO_WRITE; - buf.bio_data = virtual; - buf.bio_bcount = length; - - return (vtblk_poll_request(sc, req)); -} - -static int -vtblk_flush_dump(struct vtblk_softc *sc) -{ - struct bio buf; - struct vtblk_request *req; - - req = &sc->vtblk_dump_request; - req->vbr_ack = -1; - req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; - req->vbr_hdr.ioprio = 1; - req->vbr_hdr.sector = 0; - - req->vbr_bp = &buf; - bzero(&buf, sizeof(struct bio)); - - buf.bio_cmd = BIO_FLUSH; - - return (vtblk_poll_request(sc, req)); -} - static int vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) { @@ -1148,7 +1177,7 @@ vtblk_poll_request(struct vtblk_softc *s if (!virtqueue_empty(vq)) return (EBUSY); - error = vtblk_execute_request(sc, req); + error = vtblk_request_execute(sc, req); if (error) return (error); @@ -1164,212 +1193,188 @@ vtblk_poll_request(struct vtblk_softc *s return (error); } -static void -vtblk_finish_completed(struct vtblk_softc *sc) +static int +vtblk_quiesce(struct vtblk_softc *sc) { - struct vtblk_request *req; - struct bio *bp; int error; - while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) { - bp = req->vbr_bp; + VTBLK_LOCK_ASSERT(sc); + error = 0; - if (sc->vtblk_req_ordered != NULL) { - /* This should be the only outstanding request. */ - MPASS(sc->vtblk_req_ordered == req); - sc->vtblk_req_ordered = NULL; + while (!virtqueue_empty(sc->vtblk_vq)) { + if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq", + VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) { + error = EBUSY; + break; } - - error = vtblk_request_error(req); - if (error) - disk_err(bp, "hard error", -1, 1); - - vtblk_finish_bio(bp, error); - vtblk_enqueue_request(sc, req); } + + return (error); } static void -vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) +vtblk_vq_intr(void *xsc) { + struct vtblk_softc *sc; struct virtqueue *vq; - struct vtblk_request *req; - int last; + struct bio_queue queue; + sc = xsc; vq = sc->vtblk_vq; - last = 0; - - while ((req = virtqueue_drain(vq, &last)) != NULL) { - if (!skip_done) - vtblk_finish_bio(req->vbr_bp, ENXIO); + TAILQ_INIT(&queue); - vtblk_enqueue_request(sc, req); - } - - sc->vtblk_req_ordered = NULL; - KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); -} - -static void -vtblk_drain(struct vtblk_softc *sc) -{ - struct bio_queue_head *bioq; - struct vtblk_request *req; - struct bio *bp; + VTBLK_LOCK(sc); - bioq = &sc->vtblk_bioq; +again: + if (sc->vtblk_flags & VTBLK_FLAG_DETACH) + goto out; - if (sc->vtblk_vq != NULL) { - vtblk_finish_completed(sc); - vtblk_drain_vq(sc, 0); - } + vtblk_queue_completed(sc, &queue); + vtblk_startio(sc); - while ((req = vtblk_dequeue_ready(sc)) != NULL) { - vtblk_finish_bio(req->vbr_bp, ENXIO); - vtblk_enqueue_request(sc, req); + if (virtqueue_enable_intr(vq) != 0) { + virtqueue_disable_intr(vq); + goto again; } - while (bioq_first(bioq) != NULL) { - bp = bioq_takefirst(bioq); - vtblk_finish_bio(bp, ENXIO); - } + if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) + wakeup(&sc->vtblk_vq); - vtblk_free_requests(sc); +out: + VTBLK_UNLOCK(sc); + vtblk_done_completed(sc, &queue); } -#ifdef INVARIANTS static void -vtblk_request_invariants(struct vtblk_request *req) +vtblk_stop(struct vtblk_softc *sc) { - int hdr_nsegs, ack_nsegs; - - hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)); - ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)); - KASSERT(hdr_nsegs == 1, ("request header crossed page boundary")); - KASSERT(ack_nsegs == 1, ("request ack crossed page boundary")); + virtqueue_disable_intr(sc->vtblk_vq); + virtio_stop(sc->vtblk_dev); } -#endif -static int -vtblk_alloc_requests(struct vtblk_softc *sc) +static void +vtblk_dump_prepare(struct vtblk_softc *sc) { - struct vtblk_request *req; - int i, nreqs; + device_t dev; + struct virtqueue *vq; - nreqs = virtqueue_size(sc->vtblk_vq); + dev = sc->vtblk_dev; + vq = sc->vtblk_vq; + + vtblk_stop(sc); /* - * Preallocate sufficient requests to keep the virtqueue full. Each - * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce - * the number allocated when indirect descriptors are not available. + * Drain all requests caught in-flight in the virtqueue, + * skipping biodone(). When dumping, only one request is + * outstanding at a time, and we just poll the virtqueue + * for the response. */ - if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) - nreqs /= VTBLK_MIN_SEGMENTS; - - for (i = 0; i < nreqs; i++) { - req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT); - if (req == NULL) - return (ENOMEM); - -#ifdef INVARIANTS - vtblk_request_invariants(req); -#endif + vtblk_drain_vq(sc, 1); - sc->vtblk_request_count++; - vtblk_enqueue_request(sc, req); + if (virtio_reinit(dev, sc->vtblk_features) != 0) { + panic("%s: cannot reinit VirtIO block device during dump", + device_get_nameunit(dev)); } - return (0); + virtqueue_disable_intr(vq); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***