Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 26 Jun 2013 20:39:08 +0000 (UTC)
From:      "Justin T. Gibbs" <gibbs@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r252260 - head/sys/dev/xen/blkfront
Message-ID:  <201306262039.r5QKd8Ko055944@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gibbs
Date: Wed Jun 26 20:39:07 2013
New Revision: 252260
URL: http://svnweb.freebsd.org/changeset/base/252260

Log:
  In the Xen block front driver, take advantage of backends that
  support cache flush and write barrier commands.
  
  sys/dev/xen/blkfront/block.h:
  	Add per-command flag that specifies that the I/O queue must
  	be frozen after this command is dispatched.  This is used
  	to implement "single-stepping".
  
  	Remove the unused per-command flag that indicates a polled
  	command.
  
  	Add block device instance flags to record backend features.
  
  	Add a block device instance flag to indicate the I/O queue
  	is frozen until all outstanding I/O completes.
  
  	Enhance the queue API to allow the number of elements in a
  	queue to be interrogated.
  
  	Prefer "inline" to "__inline".
  
  sys/dev/xen/blkfront/blkfront.c:
  	Formalize queue freeze semantics by adding methods for both
  	global and command-associated queue freezing.
  
  	Provide mechanism to freeze the I/O queue until all outstanding
  	I/O completes.  Use this to implement barrier semantics
  	(BIO_ORDERED) when the backend does not support
  	BLKIF_OP_WRITE_BARRIER commands.
  
  	Implement BIO_FLUSH as either a BLKIF_OP_FLUSH_DISKCACHE
  	command or a 0 byte write barrier.  Currently, all publicly
  	available backends perform a diskcache flush when processing
  	barrier commands, and this frontend behavior matches what
  	is done in Linux.
  
  	Simplify code by using new queue length API.
  
  	Report backend features during device attach and via sysctl.
  
  Submitted by:	Roger Pau Monné
  Submitted by:	gibbs (Merge with new driver queue API, sysctl support)

Modified:
  head/sys/dev/xen/blkfront/blkfront.c
  head/sys/dev/xen/blkfront/block.h

Modified: head/sys/dev/xen/blkfront/blkfront.c
==============================================================================
--- head/sys/dev/xen/blkfront/blkfront.c	Wed Jun 26 19:43:22 2013	(r252259)
+++ head/sys/dev/xen/blkfront/blkfront.c	Wed Jun 26 20:39:07 2013	(r252260)
@@ -111,6 +111,26 @@ xbd_thaw(struct xbd_softc *sc, xbd_flag_
 	sc->xbd_qfrozen_cnt--;
 }
 
+static void
+xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
+{
+	if ((cm->cm_flags & XBDCF_FROZEN) != 0)
+		return;
+
+	cm->cm_flags |= XBDCF_FROZEN|cm_flag;
+	xbd_freeze(sc, XBDF_NONE);
+}
+
+static void
+xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
+{
+	if ((cm->cm_flags & XBDCF_FROZEN) == 0)
+		return;
+
+	cm->cm_flags &= ~XBDCF_FROZEN;
+	xbd_thaw(sc, XBDF_NONE);
+}
+
 static inline void 
 xbd_flush_requests(struct xbd_softc *sc)
 {
@@ -263,8 +283,7 @@ xbd_queue_request(struct xbd_softc *sc, 
 		 * we just attempted to map, so we can't rely on bus dma
 		 * blocking for it too.
 		 */
-		xbd_freeze(sc, XBDF_NONE);
-		cm->cm_flags |= XBDCF_FROZEN|XBDCF_ASYNC_MAPPING;
+		xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
 		return (0);
 	}
 
@@ -318,10 +337,46 @@ xbd_bio_command(struct xbd_softc *sc)
 	cm->cm_bp = bp;
 	cm->cm_data = bp->bio_data;
 	cm->cm_datalen = bp->bio_bcount;
-	cm->cm_operation = (bp->bio_cmd == BIO_READ) ?
-	    BLKIF_OP_READ : BLKIF_OP_WRITE;
 	cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
 
+	switch (bp->bio_cmd) {
+	case BIO_READ:
+		cm->cm_operation = BLKIF_OP_READ;
+		break;
+	case BIO_WRITE:
+		cm->cm_operation = BLKIF_OP_WRITE;
+		if ((bp->bio_flags & BIO_ORDERED) != 0) {
+			if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+				cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+			} else {
+				/*
+				 * Single step this command.
+				 */
+				cm->cm_flags |= XBDCF_Q_FREEZE;
+				if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+					/*
+					 * Wait for in-flight requests to
+					 * finish.
+					 */
+					xbd_freeze(sc, XBDF_WAIT_IDLE);
+					xbd_requeue_cm(cm, XBD_Q_READY);
+					return (NULL);
+				}
+			}
+		}
+		break;
+	case BIO_FLUSH:
+		if ((sc->xbd_flags & XBDF_FLUSH) != 0)
+			cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
+		else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
+			cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+		else
+			panic("flush request, but no flush support available");
+		break;
+	default:
+		panic("unknown bio command %d", bp->bio_cmd);
+	}
+
 	return (cm);
 }
 
@@ -356,6 +411,14 @@ xbd_startio(struct xbd_softc *sc)
 		if (cm == NULL)
 			break;
 
+		if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
+			/*
+			 * Single step command.  Future work is
+			 * held off until this command completes.
+			 */
+			xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
+		}
+
 		if ((error = xbd_queue_request(sc, cm)) != 0) {
 			printf("xbd_queue_request returned %d\n", error);
 			break;
@@ -425,7 +488,8 @@ xbd_int(void *xsc)
 
 		if (cm->cm_operation == BLKIF_OP_READ)
 			op = BUS_DMASYNC_POSTREAD;
-		else if (cm->cm_operation == BLKIF_OP_WRITE)
+		else if (cm->cm_operation == BLKIF_OP_WRITE ||
+		    cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
 			op = BUS_DMASYNC_POSTWRITE;
 		else
 			op = 0;
@@ -436,10 +500,7 @@ xbd_int(void *xsc)
 		 * Release any hold this command has on future command
 		 * dispatch. 
 		 */
-		if ((cm->cm_flags & XBDCF_FROZEN) != 0) {
-			xbd_thaw(sc, XBDF_NONE);
-			cm->cm_flags &= ~XBDCF_FROZEN;
-		}
+		xbd_cm_thaw(sc, cm);
 
 		/*
 		 * Directly call the i/o complete routine to save an
@@ -465,6 +526,9 @@ xbd_int(void *xsc)
 		sc->xbd_ring.sring->rsp_event = i + 1;
 	}
 
+	if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
+		xbd_thaw(sc, XBDF_WAIT_IDLE);
+
 	xbd_startio(sc);
 
 	if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED))
@@ -483,13 +547,13 @@ xbd_quiesce(struct xbd_softc *sc)
 	int mtd;
 
 	// While there are outstanding requests
-	while (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
+	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
 		if (mtd) {
 			/* Recieved request completions, update queue. */
 			xbd_int(sc);
 		}
-		if (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
+		if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 			/*
 			 * Still pending requests, wait for the disk i/o
 			 * to complete.
@@ -750,11 +814,55 @@ xbd_free_ring(struct xbd_softc *sc)
 }
 
 /*-------------------------- Initialization/Teardown -------------------------*/
+static int
+xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
+{
+	struct sbuf sb;
+	int feature_cnt;
+
+	sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
+
+	feature_cnt = 0;
+	if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
+		sbuf_printf(&sb, "flush");
+		feature_cnt++;
+	}
+
+	if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+		if (feature_cnt != 0)
+			sbuf_printf(&sb, ", ");
+		sbuf_printf(&sb, "write_barrier");
+		feature_cnt++;
+	}
+
+	(void) sbuf_finish(&sb);
+	return (sbuf_len(&sb));
+}
+
+static int
+xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
+{
+	char features[80];
+	struct xbd_softc *sc = arg1;
+	int error;
+	int len;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
+	len = xbd_feature_string(sc, features, sizeof(features));
+
+	/* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
+	return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
+}
+
 static void
 xbd_setup_sysctl(struct xbd_softc *xbd)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid *sysctl_tree = NULL;
+	struct sysctl_oid_list *children;
 	
 	sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
 	if (sysctl_ctx == NULL)
@@ -764,22 +872,31 @@ xbd_setup_sysctl(struct xbd_softc *xbd)
 	if (sysctl_tree == NULL)
 		return;
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+	children = SYSCTL_CHILDREN(sysctl_tree);
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
 	    "maximum outstanding requests (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+	    "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
+	    "maximum outstanding requests (negotiated)");
+
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_request_segments", CTLFLAG_RD,
 	    &xbd->xbd_max_request_segments, 0,
 	    "maximum number of pages per requests (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
 	    "maximum size in bytes of a request (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
 	    "communication channel pages (negotiated)");
+
+	SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
+	    "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
+	    xbd_sysctl_features, "A", "protocol features (negotiated)");
 }
 
 /*
@@ -854,6 +971,7 @@ int
 xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
     int vdevice, uint16_t vdisk_info, unsigned long sector_size)
 {
+	char features[80];
 	int unit, error = 0;
 	const char *name;
 
@@ -861,9 +979,14 @@ xbd_instance_create(struct xbd_softc *sc
 
 	sc->xbd_unit = unit;
 
-	if (strcmp(name, "xbd"))
+	if (strcmp(name, "xbd") != 0)
 		device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
 
+	if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
+		device_printf(sc->xbd_dev, "features: %s\n",
+		    features);
+	}
+
 	sc->xbd_disk = disk_alloc();
 	sc->xbd_disk->d_unit = sc->xbd_unit;
 	sc->xbd_disk->d_open = xbd_open;
@@ -878,6 +1001,11 @@ xbd_instance_create(struct xbd_softc *sc
 	sc->xbd_disk->d_mediasize = sectors * sector_size;
 	sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
 	sc->xbd_disk->d_flags = 0;
+	if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
+		sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
+		device_printf(sc->xbd_dev,
+		    "synchronize cache commands enabled.\n");
+	}
 	disk_create(sc->xbd_disk, DISK_VERSION);
 
 	return error;
@@ -1183,7 +1311,7 @@ xbd_connect(struct xbd_softc *sc)
 	device_t dev = sc->xbd_dev;
 	unsigned long sectors, sector_size;
 	unsigned int binfo;
-	int err, feature_barrier;
+	int err, feature_barrier, feature_flush;
 
 	if (sc->xbd_state == XBD_STATE_CONNECTED || 
 	    sc->xbd_state == XBD_STATE_SUSPENDED)
@@ -1205,9 +1333,15 @@ xbd_connect(struct xbd_softc *sc)
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 	     "feature-barrier", "%lu", &feature_barrier,
 	     NULL);
-	if (!err || feature_barrier)
+	if (err == 0 && feature_barrier != 0)
 		sc->xbd_flags |= XBDF_BARRIER;
 
+	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+	     "feature-flush-cache", "%lu", &feature_flush,
+	     NULL);
+	if (err == 0 && feature_flush != 0)
+		sc->xbd_flags |= XBDF_FLUSH;
+
 	if (sc->xbd_disk == NULL) {
 		device_printf(dev, "%juMB <%s> at %s",
 		    (uintmax_t) sectors / (1048576 / sector_size),
@@ -1339,7 +1473,7 @@ xbd_suspend(device_t dev)
 
 	/* Wait for outstanding I/O to drain. */
 	retval = 0;
-	while (TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq) == 0) {
+	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 		if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
 		    PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
 			retval = EBUSY;

Modified: head/sys/dev/xen/blkfront/block.h
==============================================================================
--- head/sys/dev/xen/blkfront/block.h	Wed Jun 26 19:43:22 2013	(r252259)
+++ head/sys/dev/xen/blkfront/block.h	Wed Jun 26 20:39:07 2013	(r252260)
@@ -94,8 +94,11 @@
 
 typedef enum {
 	XBDCF_Q_MASK		= 0xFF,
+	/* This command has contributed to xbd_qfrozen_cnt. */
 	XBDCF_FROZEN		= 1<<8,
-	XBDCF_POLLED		= 1<<9,
+	/* Freeze the command queue on dispatch (i.e. single step command). */
+	XBDCF_Q_FREEZE		= 1<<9,
+	/* Bus DMA returned EINPROGRESS for this command. */
 	XBDCF_ASYNC_MAPPING	= 1<<10,
 	XBDCF_INITIALIZER	= XBDCF_Q_MASK
 } xbdc_flag_t;
@@ -147,9 +150,14 @@ typedef enum {
 	XBDF_NONE	  = 0,
 	XBDF_OPEN	  = 1 << 0, /* drive is open (can't shut down) */
 	XBDF_BARRIER	  = 1 << 1, /* backend supports barriers */
-	XBDF_READY	  = 1 << 2, /* Is ready */
-	XBDF_CM_SHORTAGE  = 1 << 3, /* Free cm resource shortage active. */
-	XBDF_GNT_SHORTAGE = 1 << 4  /* Grant ref resource shortage active */
+	XBDF_FLUSH	  = 1 << 2, /* backend supports flush */
+	XBDF_READY	  = 1 << 3, /* Is ready */
+	XBDF_CM_SHORTAGE  = 1 << 4, /* Free cm resource shortage active. */
+	XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
+	XBDF_WAIT_IDLE	  = 1 << 6  /*
+				     * No new work until oustanding work
+				     * completes.
+				     */
 } xbd_flag_t;
 
 /*
@@ -206,6 +214,12 @@ xbd_removed_qentry(struct xbd_softc *sc,
 	sc->xbd_cm_q[index].q_length--;
 }
 
+static inline uint32_t
+xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
+{
+	return (sc->xbd_cm_q[index].q_length);
+}
+
 static inline void
 xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
 {
@@ -289,27 +303,27 @@ xbd_remove_cm(struct xbd_command *cm, xb
 	xbd_removed_qentry(cm->cm_sc, index);
 }
 
-static __inline void
+static inline void
 xbd_initq_bio(struct xbd_softc *sc)
 {
 	bioq_init(&sc->xbd_bioq);
 }
 
-static __inline void
+static inline void
 xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
 {
 	bioq_insert_tail(&sc->xbd_bioq, bp);
 	xbd_added_qentry(sc, XBD_Q_BIO);
 }
 
-static __inline void
+static inline void
 xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
 {
 	bioq_insert_head(&sc->xbd_bioq, bp);
 	xbd_added_qentry(sc, XBD_Q_BIO);
 }
 
-static __inline struct bio *
+static inline struct bio *
 xbd_dequeue_bio(struct xbd_softc *sc)
 {
 	struct bio *bp;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306262039.r5QKd8Ko055944>