From owner-svn-src-head@FreeBSD.ORG Mon Nov 30 04:32:35 2009 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 48F921065670; Mon, 30 Nov 2009 04:32:35 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 36CBE8FC13; Mon, 30 Nov 2009 04:32:35 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id nAU4WYti018516; Mon, 30 Nov 2009 04:32:34 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id nAU4WYeA018513; Mon, 30 Nov 2009 04:32:34 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200911300432.nAU4WYeA018513@svn.freebsd.org> From: Kip Macy Date: Mon, 30 Nov 2009 04:32:34 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r199960 - head/sys/dev/xen/blkfront X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 30 Nov 2009 04:32:35 -0000 Author: kmacy Date: Mon Nov 30 04:32:34 2009 New Revision: 199960 URL: http://svn.freebsd.org/changeset/base/199960 Log: Merge Scott Long's latest blkfront now that the licensing issues are resolved Modified: head/sys/dev/xen/blkfront/blkfront.c head/sys/dev/xen/blkfront/block.h Modified: head/sys/dev/xen/blkfront/blkfront.c ============================================================================== --- head/sys/dev/xen/blkfront/blkfront.c Mon Nov 30 04:20:43 2009 (r199959) +++ head/sys/dev/xen/blkfront/blkfront.c Mon Nov 30 04:32:34 2009 (r199960) @@ -1,6 +1,7 @@ /* * XenBSD block device driver * + * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy @@ -46,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -63,27 +65,21 @@ __FBSDID("$FreeBSD$"); #include "xenbus_if.h" -#define ASSERT(S) KASSERT(S, (#S)) /* prototypes */ -struct xb_softc; +static void xb_free_command(struct xb_command *cm); static void xb_startio(struct xb_softc *sc); -static void connect(device_t, struct blkfront_info *); +static void connect(struct xb_softc *); static void blkfront_closing(device_t); static int blkfront_detach(device_t); -static int talk_to_backend(device_t, struct blkfront_info *); -static int setup_blkring(device_t, struct blkfront_info *); +static int talk_to_backend(struct xb_softc *); +static int setup_blkring(struct xb_softc *); static void blkif_int(void *); -#if 0 -static void blkif_restart_queue(void *arg); -#endif -static void blkif_recover(struct blkfront_info *); -static void blkif_completion(struct blk_shadow *); -static void blkif_free(struct blkfront_info *, int); +static void blkif_recover(struct xb_softc *); +static void blkif_completion(struct xb_command *); +static void blkif_free(struct xb_softc *, int); +static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); #define GRANT_INVALID_REF 0 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) - -LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; /* Control whether runtime update of vbds is enabled. */ #define ENABLE_VBD_UPDATE 0 @@ -92,7 +88,6 @@ LIST_HEAD(xb_softc_list_head, xb_softc) static void vbd_update(void); #endif - #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 #define BLKIF_STATE_SUSPENDED 2 @@ -111,44 +106,34 @@ static char * blkif_status_name[] = { [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", }; #endif -#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) + #if 0 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) #endif -static grant_ref_t gref_head; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) -static void kick_pending_request_queues(struct blkfront_info *); +#define BLKIF_MAXIO (32 * 1024) + static int blkif_open(struct disk *dp); static int blkif_close(struct disk *dp); static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); -static int blkif_queue_request(struct bio *bp); +static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm); static void xb_strategy(struct bio *bp); // In order to quiesce the device during kernel dumps, outstanding requests to // DOM0 for disk reads/writes need to be accounted for. -static int blkif_queued_requests; static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); - /* XXX move to xb_vbd.c when VBD update support is added */ #define MAX_VBDS 64 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ #define XBD_SECTOR_SHFT 9 -static struct mtx blkif_io_lock; - -static vm_paddr_t -pfn_to_mfn(vm_paddr_t pfn) -{ - return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); -} - /* * Translate Linux major/minor to an appropriate name and unit * number. For HVM guests, this allows us to use the same drive names @@ -217,23 +202,18 @@ blkfront_vdevice_to_unit(int vdevice, in } int -xlvbd_add(device_t dev, blkif_sector_t capacity, - int vdevice, uint16_t vdisk_info, uint16_t sector_size, - struct blkfront_info *info) +xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity, + int vdevice, uint16_t vdisk_info, uint16_t sector_size) { - struct xb_softc *sc; int unit, error = 0; const char *name; blkfront_vdevice_to_unit(vdevice, &unit, &name); - sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); sc->xb_unit = unit; - sc->xb_info = info; - info->sc = sc; if (strcmp(name, "xbd")) - device_printf(dev, "attaching as %s%d\n", name, unit); + device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); sc->xb_disk = disk_alloc(); @@ -247,31 +227,18 @@ xlvbd_add(device_t dev, blkif_sector_t c sc->xb_disk->d_drv1 = sc; sc->xb_disk->d_sectorsize = sector_size; - /* XXX */ sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; -#if 0 - sc->xb_disk->d_maxsize = DFLTPHYS; -#else /* XXX: xen can't handle large single i/o requests */ - sc->xb_disk->d_maxsize = 4096; -#endif -#ifdef notyet - XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", - xb_diskinfo[sc->xb_unit].device, sc->xb_unit, - sc->xb_disk->d_mediasize); -#endif + sc->xb_disk->d_maxsize = BLKIF_MAXIO; sc->xb_disk->d_flags = 0; disk_create(sc->xb_disk, DISK_VERSION_00); - bioq_init(&sc->xb_bioq); return error; } void -xlvbd_del(struct blkfront_info *info) +xlvbd_del(struct xb_softc *sc) { - struct xb_softc *sc; - sc = info->sc; disk_destroy(sc->xb_disk); } /************************ end VBD support *****************/ @@ -289,102 +256,147 @@ xb_strategy(struct bio *bp) if (sc == NULL) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; - goto bad; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + return; } - DPRINTK(""); - /* * Place it in the queue of disk activities for this disk */ - mtx_lock(&blkif_io_lock); + mtx_lock(&sc->xb_io_lock); - bioq_disksort(&sc->xb_bioq, bp); + xb_enqueue_bio(sc, bp); xb_startio(sc); - mtx_unlock(&blkif_io_lock); + mtx_unlock(&sc->xb_io_lock); return; +} - bad: - /* - * Correctly set the bio to indicate a failed tranfer. - */ - bp->bio_resid = bp->bio_bcount; +static void +xb_bio_complete(struct xb_softc *sc, struct xb_command *cm) +{ + struct bio *bp; + + bp = cm->bp; + + if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) { + disk_err(bp, "disk error" , -1, 0); + printf(" status: %x\n", cm->status); + bp->bio_flags |= BIO_ERROR; + } + + if (bp->bio_flags & BIO_ERROR) + bp->bio_error = EIO; + else + bp->bio_resid = 0; + + xb_free_command(cm); biodone(bp); - return; } -static void xb_quiesce(struct blkfront_info *info); // Quiesce the disk writes for a dump file before allowing the next buffer. static void -xb_quiesce(struct blkfront_info *info) +xb_quiesce(struct xb_softc *sc) { int mtd; // While there are outstanding requests - while (blkif_queued_requests) { - RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd); + while (!TAILQ_EMPTY(&sc->cm_busy)) { + RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd); if (mtd) { - // Recieved request completions, update queue. - blkif_int(info); + /* Recieved request completions, update queue. */ + blkif_int(sc); } - if (blkif_queued_requests) { - // Still pending requests, wait for the disk i/o to complete + if (!TAILQ_EMPTY(&sc->cm_busy)) { + /* + * Still pending requests, wait for the disk i/o + * to complete. + */ HYPERVISOR_yield(); } } } -// Some bio structures for dumping core -#define DUMP_BIO_NO 16 // 16 * 4KB = 64KB dump block -static struct bio xb_dump_bp[DUMP_BIO_NO]; +/* Kernel dump function for a paravirtualized disk device */ +static void +xb_dump_complete(struct xb_command *cm) +{ + + xb_enqueue_complete(cm); +} -// Kernel dump function for a paravirtualized disk device static int xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { - int sbp; - int mbp; - size_t chunk; - struct disk *dp = arg; - struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; - int rc = 0; - - xb_quiesce(sc->xb_info); // All quiet on the western front. - if (length > 0) { - // If this lock is held, then this module is failing, and a successful - // kernel dump is highly unlikely anyway. - mtx_lock(&blkif_io_lock); - // Split the 64KB block into 16 4KB blocks - for (sbp=0; length>0 && sbp PAGE_SIZE ? PAGE_SIZE : length; - xb_dump_bp[sbp].bio_disk = dp; - xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize; - xb_dump_bp[sbp].bio_bcount = chunk; - xb_dump_bp[sbp].bio_resid = chunk; - xb_dump_bp[sbp].bio_data = virtual; - xb_dump_bp[sbp].bio_cmd = BIO_WRITE; - xb_dump_bp[sbp].bio_done = NULL; - - bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]); - - length -= chunk; - offset += chunk; - virtual = (char *) virtual + chunk; + struct disk *dp = arg; + struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; + struct xb_command *cm; + size_t chunk; + int sbp; + int rc = 0; + + if (length <= 0) + return (rc); + + xb_quiesce(sc); /* All quiet on the western front. */ + + /* + * If this lock is held, then this module is failing, and a + * successful kernel dump is highly unlikely anyway. + */ + mtx_lock(&sc->xb_io_lock); + + /* Split the 64KB block as needed */ + for (sbp=0; length > 0; sbp++) { + cm = xb_dequeue_free(sc); + if (cm == NULL) { + mtx_unlock(&sc->xb_io_lock); + device_printf(sc->xb_dev, "dump: no more commands?\n"); + return (EBUSY); } - // Tell DOM0 to do the I/O - xb_startio(sc); - mtx_unlock(&blkif_io_lock); - - // Must wait for the completion: the dump routine reuses the same - // 16 x 4KB buffer space. - xb_quiesce(sc->xb_info); // All quite on the eastern front - // If there were any errors, bail out... - for (mbp=0; mbpgref_head) < 0) { + xb_free_command(cm); + mtx_unlock(&sc->xb_io_lock); + device_printf(sc->xb_dev, "no more grant allocs?\n"); + return (EBUSY); } + + chunk = length > BLKIF_MAXIO ? BLKIF_MAXIO : length; + cm->data = virtual; + cm->datalen = chunk; + cm->operation = BLKIF_OP_WRITE; + cm->sector_number = offset / dp->d_sectorsize; + cm->cm_complete = xb_dump_complete; + + xb_enqueue_ready(cm); + + length -= chunk; + offset += chunk; + virtual = (char *) virtual + chunk; } + + /* Tell DOM0 to do the I/O */ + xb_startio(sc); + mtx_unlock(&sc->xb_io_lock); + + /* Poll for the completion. */ + xb_quiesce(sc); /* All quite on the eastern front */ + + /* If there were any errors, bail out... */ + while ((cm = xb_dequeue_complete(sc)) != NULL) { + if (cm->status != BLKIF_RSP_OKAY) { + device_printf(sc->xb_dev, + "Dump I/O failed at sector %jd\n", + cm->sector_number); + rc = EIO; + } + xb_free_command(cm); + } + return (rc); } @@ -410,9 +422,10 @@ blkfront_probe(device_t dev) static int blkfront_attach(device_t dev) { - int error, vdevice, i, unit; - struct blkfront_info *info; + struct xb_softc *sc; + struct xb_command *cm; const char *name; + int error, vdevice, i, unit; /* FIXME: Use dynamic device id if this is not set. */ error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), @@ -427,29 +440,56 @@ blkfront_attach(device_t dev) if (!strcmp(name, "xbd")) device_set_unit(dev, unit); - info = device_get_softc(dev); - - /* - * XXX debug only - */ - for (i = 0; i < sizeof(*info); i++) - if (((uint8_t *)info)[i] != 0) - panic("non-null memory"); - - info->shadow_free = 0; - info->xbdev = dev; - info->vdevice = vdevice; - info->connected = BLKIF_STATE_DISCONNECTED; + sc = device_get_softc(dev); + mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF); + xb_initq_free(sc); + xb_initq_busy(sc); + xb_initq_ready(sc); + xb_initq_complete(sc); + xb_initq_bio(sc); + + /* Allocate parent DMA tag */ + if (bus_dma_tag_create( NULL, /* parent */ + 4096, 0, /* algnmnt, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + BLKIF_MAXIO, /* maxsize */ + BLKIF_MAX_SEGMENTS_PER_REQUEST, /* nsegments */ + PAGE_SIZE, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + busdma_lock_mutex, /* lockfunc */ + &sc->xb_io_lock, /* lockarg */ + &sc->xb_io_dmat)) { + device_printf(dev, "Cannot allocate parent DMA tag\n"); + return (ENOMEM); + } +#ifdef notyet + if (bus_dma_tag_set(sc->xb_io_dmat, BUS_DMA_SET_MINSEGSZ, + XBD_SECTOR_SIZE)) { + device_printf(dev, "Cannot set sector size\n"); + return (EINVAL); + } +#endif + + sc->xb_dev = dev; + sc->vdevice = vdevice; + sc->connected = BLKIF_STATE_DISCONNECTED; /* work queue needed ? */ - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + for (i = 0; i < BLK_RING_SIZE; i++) { + cm = &sc->shadow[i]; + cm->req.id = i; + cm->cm_sc = sc; + if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) + break; + xb_free_command(cm); + } /* Front end dir is a number, which is used as the id. */ - info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); + sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); - error = talk_to_backend(dev, info); + error = talk_to_backend(sc); if (error) return (error); @@ -459,12 +499,12 @@ blkfront_attach(device_t dev) static int blkfront_suspend(device_t dev) { - struct blkfront_info *info = device_get_softc(dev); + struct xb_softc *sc = device_get_softc(dev); /* Prevent new requests being issued until we fix things up. */ - mtx_lock(&blkif_io_lock); - info->connected = BLKIF_STATE_SUSPENDED; - mtx_unlock(&blkif_io_lock); + mtx_lock(&sc->xb_io_lock); + sc->connected = BLKIF_STATE_SUSPENDED; + mtx_unlock(&sc->xb_io_lock); return (0); } @@ -472,29 +512,31 @@ blkfront_suspend(device_t dev) static int blkfront_resume(device_t dev) { - struct blkfront_info *info = device_get_softc(dev); + struct xb_softc *sc = device_get_softc(dev); int err; DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); - blkif_free(info, 1); - err = talk_to_backend(dev, info); - if (info->connected == BLKIF_STATE_SUSPENDED && !err) - blkif_recover(info); + blkif_free(sc, 1); + err = talk_to_backend(sc); + if (sc->connected == BLKIF_STATE_SUSPENDED && !err) + blkif_recover(sc); return (err); } /* Common code used when first setting up, and when resuming. */ static int -talk_to_backend(device_t dev, struct blkfront_info *info) +talk_to_backend(struct xb_softc *sc) { - const char *message = NULL; + device_t dev; struct xenbus_transaction xbt; + const char *message = NULL; int err; /* Create shared ring, alloc event channel. */ - err = setup_blkring(dev, info); + dev = sc->xb_dev; + err = setup_blkring(sc); if (err) goto out; @@ -506,13 +548,13 @@ talk_to_backend(device_t dev, struct blk } err = xenbus_printf(xbt, xenbus_get_node(dev), - "ring-ref","%u", info->ring_ref); + "ring-ref","%u", sc->ring_ref); if (err) { message = "writing ring-ref"; goto abort_transaction; } err = xenbus_printf(xbt, xenbus_get_node(dev), - "event-channel", "%u", irq_to_evtchn_port(info->irq)); + "event-channel", "%u", irq_to_evtchn_port(sc->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -540,47 +582,47 @@ talk_to_backend(device_t dev, struct blk if (message) xenbus_dev_fatal(dev, err, "%s", message); destroy_blkring: - blkif_free(info, 0); + blkif_free(sc, 0); out: return err; } static int -setup_blkring(device_t dev, struct blkfront_info *info) +setup_blkring(struct xb_softc *sc) { blkif_sring_t *sring; int error; - info->ring_ref = GRANT_INVALID_REF; + sc->ring_ref = GRANT_INVALID_REF; sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (sring == NULL) { - xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); + xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); return ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&sc->ring, sring, PAGE_SIZE); - error = xenbus_grant_ring(dev, - (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref); + error = xenbus_grant_ring(sc->xb_dev, + (vtomach(sc->ring.sring) >> PAGE_SHIFT), &sc->ring_ref); if (error) { free(sring, M_DEVBUF); - info->ring.sring = NULL; + sc->ring.sring = NULL; goto fail; } - error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), - "xbd", (driver_intr_t *)blkif_int, info, - INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); + error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(sc->xb_dev), + "xbd", (driver_intr_t *)blkif_int, sc, + INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); if (error) { - xenbus_dev_fatal(dev, error, + xenbus_dev_fatal(sc->xb_dev, error, "bind_evtchn_to_irqhandler failed"); goto fail; } return (0); fail: - blkif_free(info, 0); + blkif_free(sc, 0); return (error); } @@ -591,7 +633,7 @@ setup_blkring(device_t dev, struct blkfr static int blkfront_backend_changed(device_t dev, XenbusState backend_state) { - struct blkfront_info *info = device_get_softc(dev); + struct xb_softc *sc = device_get_softc(dev); DPRINTK("backend_state=%d\n", backend_state); @@ -606,22 +648,22 @@ blkfront_backend_changed(device_t dev, X break; case XenbusStateConnected: - connect(dev, info); + connect(sc); break; case XenbusStateClosing: - if (info->users > 0) + if (sc->users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else blkfront_closing(dev); #ifdef notyet - bd = bdget(info->dev); + bd = bdget(sc->dev); if (bd == NULL) xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); down(&bd->bd_sem); - if (info->users > 0) + if (sc->users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else @@ -639,14 +681,15 @@ blkfront_backend_changed(device_t dev, X ** the details about the physical device - #sectors, size, etc). */ static void -connect(device_t dev, struct blkfront_info *info) +connect(struct xb_softc *sc) { + device_t dev = sc->xb_dev; unsigned long sectors, sector_size; unsigned int binfo; - int err; + int err, feature_barrier; - if( (info->connected == BLKIF_STATE_CONNECTED) || - (info->connected == BLKIF_STATE_SUSPENDED) ) + if( (sc->connected == BLKIF_STATE_CONNECTED) || + (sc->connected == BLKIF_STATE_SUSPENDED) ) return; DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); @@ -663,10 +706,10 @@ connect(device_t dev, struct blkfront_in return; } err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), - "feature-barrier", "%lu", &info->feature_barrier, + "feature-barrier", "%lu", &feature_barrier, NULL); - if (err) - info->feature_barrier = 0; + if (!err || feature_barrier) + sc->xb_flags |= XB_BARRIER; device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), @@ -674,20 +717,17 @@ connect(device_t dev, struct blkfront_in xenbus_get_node(dev)); bus_print_child_footer(device_get_parent(dev), dev); - xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); + xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size); (void)xenbus_set_state(dev, XenbusStateConnected); /* Kick pending requests. */ - mtx_lock(&blkif_io_lock); - info->connected = BLKIF_STATE_CONNECTED; - kick_pending_request_queues(info); - mtx_unlock(&blkif_io_lock); - info->is_ready = 1; + mtx_lock(&sc->xb_io_lock); + sc->connected = BLKIF_STATE_CONNECTED; + xb_startio(sc); + sc->xb_flags |= XB_READY; + mtx_unlock(&sc->xb_io_lock); -#if 0 - add_disk(info->gd); -#endif } /** @@ -699,14 +739,14 @@ connect(device_t dev, struct blkfront_in static void blkfront_closing(device_t dev) { - struct blkfront_info *info = device_get_softc(dev); + struct xb_softc *sc = device_get_softc(dev); DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); - if (info->mi) { + if (sc->mi) { DPRINTK("Calling xlvbd_del\n"); - xlvbd_del(info); - info->mi = NULL; + xlvbd_del(sc); + sc->mi = NULL; } xenbus_set_state(dev, XenbusStateClosed); @@ -716,92 +756,33 @@ blkfront_closing(device_t dev) static int blkfront_detach(device_t dev) { - struct blkfront_info *info = device_get_softc(dev); + struct xb_softc *sc = device_get_softc(dev); DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); - blkif_free(info, 0); + blkif_free(sc, 0); + mtx_destroy(&sc->xb_io_lock); return 0; } -static inline int -GET_ID_FROM_FREELIST(struct blkfront_info *info) -{ - unsigned long nfree = info->shadow_free; - - KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); - info->shadow_free = info->shadow[nfree].req.id; - info->shadow[nfree].req.id = 0x0fffffee; /* debug */ - atomic_add_int(&blkif_queued_requests, 1); - return nfree; -} - static inline void -ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) -{ - info->shadow[id].req.id = info->shadow_free; - info->shadow[id].request = 0; - info->shadow_free = id; - atomic_subtract_int(&blkif_queued_requests, 1); -} - -static inline void -flush_requests(struct blkfront_info *info) +flush_requests(struct xb_softc *sc) { int notify; - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); if (notify) - notify_remote_via_irq(info->irq); -} - -static void -kick_pending_request_queues(struct blkfront_info *info) -{ - /* XXX check if we can't simplify */ -#if 0 - if (!RING_FULL(&info->ring)) { - /* Re-enable calldowns. */ - blk_start_queue(info->rq); - /* Kick things off immediately. */ - do_blkif_request(info->rq); - } -#endif - if (!RING_FULL(&info->ring)) { -#if 0 - sc = LIST_FIRST(&xbsl_head); - LIST_REMOVE(sc, entry); - /* Re-enable calldowns. */ - blk_start_queue(di->rq); -#endif - /* Kick things off immediately. */ - xb_startio(info->sc); - } -} - -#if 0 -/* XXX */ -static void blkif_restart_queue(void *arg) -{ - struct blkfront_info *info = (struct blkfront_info *)arg; - - mtx_lock(&blkif_io_lock); - kick_pending_request_queues(info); - mtx_unlock(&blkif_io_lock); + notify_remote_via_irq(sc->irq); } -#endif static void blkif_restart_queue_callback(void *arg) { -#if 0 - struct blkfront_info *info = (struct blkfront_info *)arg; - /* XXX BSD equiv ? */ + struct xb_softc *sc = arg; - schedule_work(&info->work); -#endif + xb_startio(sc); } static int @@ -815,7 +796,7 @@ blkif_open(struct disk *dp) } sc->xb_flags |= XB_OPEN; - sc->xb_info->users++; + sc->users++; return (0); } @@ -827,11 +808,11 @@ blkif_close(struct disk *dp) if (sc == NULL) return (ENXIO); sc->xb_flags &= ~XB_OPEN; - if (--(sc->xb_info->users) == 0) { + if (--(sc->users) == 0) { /* Check whether we have been instructed to close. We will have ignored this request initially, as the device was still mounted. */ - device_t dev = sc->xb_info->xbdev; + device_t dev = sc->xb_dev; XenbusState state = xenbus_read_driver_state(xenbus_get_otherend_path(dev)); @@ -852,6 +833,18 @@ blkif_ioctl(struct disk *dp, u_long cmd, return (ENOTTY); } +static void +xb_free_command(struct xb_command *cm) +{ + + KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0, + ("Freeing command that is still on a queue\n")); + + cm->cm_flags = 0; + cm->bp = NULL; + cm->cm_complete = NULL; + xb_enqueue_free(cm); +} /* * blkif_queue_request @@ -863,106 +856,152 @@ blkif_ioctl(struct disk *dp, u_long cmd, * buffer: buffer to read/write into. this should be a * virtual address in the guest os. */ -static int blkif_queue_request(struct bio *bp) +static struct xb_command * +xb_bio_command(struct xb_softc *sc) +{ + struct xb_command *cm; + struct bio *bp; + + if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) + return (NULL); + + bp = xb_dequeue_bio(sc); + if (bp == NULL) + return (NULL); + + if ((cm = xb_dequeue_free(sc)) == NULL) { + xb_requeue_bio(sc, bp); + return (NULL); + } + + if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, + &cm->gref_head) < 0) { + gnttab_request_free_callback(&sc->callback, + blkif_restart_queue_callback, sc, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + xb_requeue_bio(sc, bp); + xb_enqueue_free(cm); + sc->xb_flags |= XB_FROZEN; + return (NULL); + } + + /* XXX Can we grab refs before doing the load so that the ref can + * be filled out here? + */ + cm->bp = bp; + cm->data = bp->bio_data; + cm->datalen = bp->bio_bcount; + cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : + BLKIF_OP_WRITE; + cm->sector_number = (blkif_sector_t)bp->bio_pblkno; + + return (cm); +} + +static int +blkif_queue_request(struct xb_softc *sc, struct xb_command *cm) { - caddr_t alignbuf; + int error; + + error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen, + blkif_queue_cb, cm, 0); + if (error == EINPROGRESS) { + printf("EINPROGRESS\n"); + sc->xb_flags |= XB_FROZEN; + cm->cm_flags |= XB_CMD_FROZEN; + return (0); + } + + return (error); +} + +static void +blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct xb_softc *sc; + struct xb_command *cm; + blkif_request_t *ring_req; vm_paddr_t buffer_ma; - blkif_request_t *ring_req; - unsigned long id; uint64_t fsect, lsect; - struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; - struct blkfront_info *info = sc->xb_info; - int ref; - - if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) - return 1; - - if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { - gnttab_request_free_callback( - &info->callback, - blkif_restart_queue_callback, - info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - return 1; + int ref, i, op; + + cm = arg; + sc = cm->cm_sc; + + if (error) { + printf("error %d in blkif_queue_cb\n", error); + cm->bp->bio_error = EIO; + biodone(cm->bp); + xb_free_command(cm); + return; } - /* Check if the buffer is properly aligned */ - if ((vm_offset_t)bp->bio_data & PAGE_MASK) { - int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : - PAGE_SIZE; - caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, - M_NOWAIT); - - alignbuf = (char *)roundup2((u_long)newbuf, align); - - /* save a copy of the current buffer */ - bp->bio_driver1 = newbuf; - bp->bio_driver2 = alignbuf; - - /* Copy the data for a write */ - if (bp->bio_cmd == BIO_WRITE) - bcopy(bp->bio_data, alignbuf, bp->bio_bcount); - } else - alignbuf = bp->bio_data; - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&info->ring, - info->ring.req_prod_pvt); - id = GET_ID_FROM_FREELIST(info); - info->shadow[id].request = (unsigned long)bp; - - ring_req->id = id; - ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : - BLKIF_OP_WRITE; - - ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; - ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; - - ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer - * chaining is not supported. *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***