Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 27 Mar 2015 08:53:59 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r280744 - stable/10/usr.sbin/bhyve
Message-ID:  <201503270854.t2R8s0tx025919@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Fri Mar 27 08:53:59 2015
New Revision: 280744
URL: https://svnweb.freebsd.org/changeset/base/280744

Log:
  MFC r280037:
  Rewrite virtio block device driver to work asynchronously and use the block
  I/O interface.
  
  Asynchronous operation, based on r280026 change, allows to not block virtual
  CPU during I/O processing, that on slow/busy storage can take seconds.
  Use of recently improved block I/O interface allows to process multiple
  requests same time, that improves random I/O performance on wide storages.
  
  Benchmarks of virtual disk, backed by ZVOL on RAID10 pool of 4 HDDs, show
  ~3.5 times random read performance improvements, while no degradation on
  linear I/O.  Guest CPU usage during test dropped from 100% to almost zero.

Modified:
  stable/10/usr.sbin/bhyve/block_if.c
  stable/10/usr.sbin/bhyve/pci_virtio_block.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/block_if.c
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.c	Fri Mar 27 08:52:57 2015	(r280743)
+++ stable/10/usr.sbin/bhyve/block_if.c	Fri Mar 27 08:53:59 2015	(r280744)
@@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$");
 
 #define BLOCKIF_SIG	0xb109b109
 
-#define BLOCKIF_MAXREQ	33
+#define BLOCKIF_MAXREQ	64
 #define BLOCKIF_NUMTHR	8
 
 enum blockop {

Modified: stable/10/usr.sbin/bhyve/pci_virtio_block.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_virtio_block.c	Fri Mar 27 08:52:57 2015	(r280743)
+++ stable/10/usr.sbin/bhyve/pci_virtio_block.c	Fri Mar 27 08:53:59 2015	(r280744)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include "bhyverun.h"
 #include "pci_emul.h"
 #include "virtio.h"
+#include "block_if.h"
 
 #define VTBLK_RINGSZ	64
 
@@ -120,6 +121,13 @@ static int pci_vtblk_debug;
 #define DPRINTF(params) if (pci_vtblk_debug) printf params
 #define WPRINTF(params) printf params
 
+struct pci_vtblk_ioreq {
+	struct blockif_req		io_req;
+	struct pci_vtblk_softc	       *io_sc;
+	uint8_t			       *io_status;
+	uint16_t			io_idx;
+};
+
 /*
  * Per-device softc
  */
@@ -127,10 +135,10 @@ struct pci_vtblk_softc {
 	struct virtio_softc vbsc_vs;
 	pthread_mutex_t vsc_mtx;
 	struct vqueue_info vbsc_vq;
-	int		vbsc_fd;
-	int		vbsc_ischr;
-	struct vtblk_config vbsc_cfg;	
+	struct vtblk_config vbsc_cfg;
+	struct blockif_ctxt *bc;
 	char vbsc_ident[VTBLK_BLK_ID_BYTES];
+	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
 };
 
 static void pci_vtblk_reset(void *);
@@ -160,10 +168,34 @@ pci_vtblk_reset(void *vsc)
 }
 
 static void
+pci_vtblk_done(struct blockif_req *br, int err)
+{
+	struct pci_vtblk_ioreq *io = br->br_param;
+	struct pci_vtblk_softc *sc = io->io_sc;
+
+	/* convert errno into a virtio block error return */
+	if (err == EOPNOTSUPP || err == ENOSYS)
+		*io->io_status = VTBLK_S_UNSUPP;
+	else if (err != 0)
+		*io->io_status = VTBLK_S_IOERR;
+	else
+		*io->io_status = VTBLK_S_OK;
+
+	/*
+	 * Return the descriptor back to the host.
+	 * We wrote 1 byte (our status) to host.
+	 */
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
+	vq_endchains(&sc->vbsc_vq, 0);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+}
+
+static void
 pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
 {
 	struct virtio_blk_hdr *vbh;
-	uint8_t *status;
+	struct pci_vtblk_ioreq *io;
 	int i, n;
 	int err;
 	int iolen;
@@ -184,11 +216,14 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 	 */
 	assert(n >= 2 && n <= VTBLK_MAXSEGS + 2);
 
+	io = &sc->vbsc_ios[idx];
 	assert((flags[0] & VRING_DESC_F_WRITE) == 0);
 	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
 	vbh = iov[0].iov_base;
-
-	status = iov[--n].iov_base;
+	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
+	io->io_req.br_iovcnt = n - 2;
+	io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
+	io->io_status = iov[--n].iov_base;
 	assert(iov[n].iov_len == 1);
 	assert(flags[n] & VRING_DESC_F_WRITE);
 
@@ -200,8 +235,6 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
 	writeop = (type == VBH_OP_WRITE);
 
-	offset = vbh->vbh_sector * DEV_BSIZE;
-
 	iolen = 0;
 	for (i = 1; i < n; i++) {
 		/*
@@ -217,48 +250,28 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 	DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 
 		 writeop ? "write" : "read/ident", iolen, i - 1, offset));
 
-	err = 0;
 	switch (type) {
+	case VBH_OP_READ:
+		err = blockif_read(sc->bc, &io->io_req);
+		break;
 	case VBH_OP_WRITE:
-		if (pwritev(sc->vbsc_fd, iov + 1, i - 1, offset) < 0)
-			err = errno;
+		err = blockif_write(sc->bc, &io->io_req);
 		break;
-	case VBH_OP_READ:
-		if (preadv(sc->vbsc_fd, iov + 1, i - 1, offset) < 0)
-			err = errno;
+	case VBH_OP_FLUSH:
+	case VBH_OP_FLUSH_OUT:
+		err = blockif_flush(sc->bc, &io->io_req);
 		break;
 	case VBH_OP_IDENT:
 		/* Assume a single buffer */
 		strlcpy(iov[1].iov_base, sc->vbsc_ident,
 		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
-		err = 0;
-		break;
-	case VBH_OP_FLUSH:
-	case VBH_OP_FLUSH_OUT:
-		if (sc->vbsc_ischr) {
-			if (ioctl(sc->vbsc_fd, DIOCGFLUSH))
-				err = errno;
-		} else if (fsync(sc->vbsc_fd))
-			err = errno;
-		break;
+		pci_vtblk_done(&io->io_req, 0);
+		return;
 	default:
-		err = -ENOSYS;
-		break;
+		pci_vtblk_done(&io->io_req, EOPNOTSUPP);
+		return;
 	}
-
-	/* convert errno into a virtio block error return */
-	if (err == -ENOSYS)
-		*status = VTBLK_S_UNSUPP;
-	else if (err != 0)
-		*status = VTBLK_S_IOERR;
-	else
-		*status = VTBLK_S_OK;
-
-	/*
-	 * Return the descriptor back to the host.
-	 * We wrote 1 byte (our status) to host.
-	 */
-	vq_relchain(vq, idx, 1);
+	assert(err == 0);
 }
 
 static void
@@ -268,19 +281,18 @@ pci_vtblk_notify(void *vsc, struct vqueu
 
 	while (vq_has_descs(vq))
 		pci_vtblk_proc(sc, vq);
-	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
 }
 
 static int
 pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
 {
-	struct stat sbuf;
+	char bident[sizeof("XX:X:X")];
+	struct blockif_ctxt *bctxt;
 	MD5_CTX mdctx;
 	u_char digest[16];
 	struct pci_vtblk_softc *sc;
-	off_t size, sts, sto;
-	int fd;
-	int sectsz;
+	off_t size;
+	int i, sectsz, sts, sto;
 
 	if (opts == NULL) {
 		printf("virtio-block: backing device required\n");
@@ -290,43 +302,26 @@ pci_vtblk_init(struct vmctx *ctx, struct
 	/*
 	 * The supplied backing file has to exist
 	 */
-	fd = open(opts, O_RDWR);
-	if (fd < 0) {
+	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
+	bctxt = blockif_open(opts, bident);
+	if (bctxt == NULL) {       	
 		perror("Could not open backing file");
 		return (1);
 	}
 
-	if (fstat(fd, &sbuf) < 0) {
-		perror("Could not stat backing file");
-		close(fd);
-		return (1);
-	}
-
-	/*
-	 * Deal with raw devices
-	 */
-	size = sbuf.st_size;
-	sectsz = DEV_BSIZE;
-	sts = sto = 0;
-	if (S_ISCHR(sbuf.st_mode)) {
-		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
-		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
-			perror("Could not fetch dev blk/sector size");
-			close(fd);
-			return (1);
-		}
-		assert(size != 0);
-		assert(sectsz != 0);
-		if (ioctl(fd, DIOCGSTRIPESIZE, &sts) == 0 && sts > 0)
-			ioctl(fd, DIOCGSTRIPEOFFSET, &sto);
-	} else
-		sts = sbuf.st_blksize;
+	size = blockif_size(bctxt);
+	sectsz = blockif_sectsz(bctxt);
+	blockif_psectsz(bctxt, &sts, &sto);
 
 	sc = calloc(1, sizeof(struct pci_vtblk_softc));
-
-	/* record fd of storage device/file */
-	sc->vbsc_fd = fd;
-	sc->vbsc_ischr = S_ISCHR(sbuf.st_mode);
+	sc->bc = bctxt;
+	for (i = 0; i < VTBLK_RINGSZ; i++) {
+		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
+		io->io_req.br_callback = pci_vtblk_done;
+		io->io_req.br_param = io;
+		io->io_sc = sc;
+		io->io_idx = i;
+	}
 
 	pthread_mutex_init(&sc->vsc_mtx, NULL);
 
@@ -375,8 +370,11 @@ pci_vtblk_init(struct vmctx *ctx, struct
 
 	pci_lintr_request(pi);
 
-	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix()))
+	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
+		blockif_close(sc->bc);
+		free(sc);
 		return (1);
+	}
 	vi_set_io_bar(&sc->vbsc_vs, 0);
 	return (0);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201503270854.t2R8s0tx025919>