Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 1 May 2015 17:31:00 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r282307 - stable/10/usr.sbin/bhyve
Message-ID:  <201505011731.t41HV0cN065531@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Fri May  1 17:30:59 2015
New Revision: 282307
URL: https://svnweb.freebsd.org/changeset/base/282307

Log:
  MFC r281700:
  Workaround bhyve virtual disks operation on top of GEOM providers.
  
  GEOM does not support scatter/gather lists in its I/Os.  Such requests
  are cut in pieces by physio(), that may be problematic, if those pieces
  are not multiple of provider's sector size.  If such case is detected,
  move the data through temporary sequential buffer.

Modified:
  stable/10/usr.sbin/bhyve/block_if.c
  stable/10/usr.sbin/bhyve/block_if.h
  stable/10/usr.sbin/bhyve/pci_ahci.c
  stable/10/usr.sbin/bhyve/pci_virtio_block.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/block_if.c
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.c	Fri May  1 17:29:45 2015	(r282306)
+++ stable/10/usr.sbin/bhyve/block_if.c	Fri May  1 17:30:59 2015	(r282307)
@@ -85,6 +85,7 @@ struct blockif_ctxt {
 	int			bc_magic;
 	int			bc_fd;
 	int			bc_ischr;
+	int			bc_isgeom;
 	int			bc_candelete;
 	int			bc_rdonly;
 	off_t			bc_size;
@@ -198,27 +199,93 @@ blockif_complete(struct blockif_ctxt *bc
 }
 
 static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
 {
 	struct blockif_req *br;
 	off_t arg[2];
-	int err;
+	ssize_t clen, len, off, boff, voff;
+	int i, err;
 
 	br = be->be_req;
+	if (br->br_iovcnt <= 1)
+		buf = NULL;
 	err = 0;
-
 	switch (be->be_op) {
 	case BOP_READ:
-		if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
-			   br->br_offset) < 0)
-			err = errno;
+		if (buf == NULL) {
+			if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				   br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			if (pread(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(br->br_iov[i].iov_base + voff,
+				    buf + boff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			off += len;
+			br->br_resid -= len;
+		}
 		break;
 	case BOP_WRITE:
-		if (bc->bc_rdonly)
+		if (bc->bc_rdonly) {
 			err = EROFS;
-		else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
-			     br->br_offset) < 0)
-			err = errno;
+			break;
+		}
+		if (buf == NULL) {
+			if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				    br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(buf + boff,
+				    br->br_iov[i].iov_base + voff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			if (pwrite(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			off += len;
+			br->br_resid -= len;
+		}
 		break;
 	case BOP_FLUSH:
 		if (bc->bc_ischr) {
@@ -234,9 +301,11 @@ blockif_proc(struct blockif_ctxt *bc, st
 			err = EROFS;
 		else if (bc->bc_ischr) {
 			arg[0] = br->br_offset;
-			arg[1] = br->br_iov[0].iov_len;
+			arg[1] = br->br_resid;
 			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
 				err = errno;
+			else
+				br->br_resid = 0;
 		} else
 			err = EOPNOTSUPP;
 		break;
@@ -256,15 +325,20 @@ blockif_thr(void *arg)
 	struct blockif_ctxt *bc;
 	struct blockif_elem *be;
 	pthread_t t;
+	uint8_t *buf;
 
 	bc = arg;
+	if (bc->bc_isgeom)
+		buf = malloc(MAXPHYS);
+	else
+		buf = NULL;
 	t = pthread_self();
 
 	pthread_mutex_lock(&bc->bc_mtx);
 	for (;;) {
 		while (blockif_dequeue(bc, t, &be)) {
 			pthread_mutex_unlock(&bc->bc_mtx);
-			blockif_proc(bc, be);
+			blockif_proc(bc, be, buf);
 			pthread_mutex_lock(&bc->bc_mtx);
 			blockif_complete(bc, be);
 		}
@@ -275,6 +349,8 @@ blockif_thr(void *arg)
 	}
 	pthread_mutex_unlock(&bc->bc_mtx);
 
+	if (buf)
+		free(buf);
 	pthread_exit(NULL);
 	return (NULL);
 }
@@ -315,13 +391,14 @@ struct blockif_ctxt *
 blockif_open(const char *optstr, const char *ident)
 {
 	char tname[MAXCOMLEN + 1];
+	char name[MAXPATHLEN];
 	char *nopt, *xopts;
 	struct blockif_ctxt *bc;
 	struct stat sbuf;
 	struct diocgattr_arg arg;
 	off_t size, psectsz, psectoff;
 	int extra, fd, i, sectsz;
-	int nocache, sync, ro, candelete;
+	int nocache, sync, ro, candelete, geom;
 
 	pthread_once(&blockif_once, blockif_init);
 
@@ -375,7 +452,7 @@ blockif_open(const char *optstr, const c
         size = sbuf.st_size;
 	sectsz = DEV_BSIZE;
 	psectsz = psectoff = 0;
-	candelete = 0;
+	candelete = geom = 0;
 	if (S_ISCHR(sbuf.st_mode)) {
 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@@ -391,6 +468,8 @@ blockif_open(const char *optstr, const c
 		arg.len = sizeof(arg.value.i);
 		if (ioctl(fd, DIOCGATTR, &arg) == 0)
 			candelete = arg.value.i;
+		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
+			geom = 1;
 	} else
 		psectsz = sbuf.st_blksize;
 
@@ -403,6 +482,7 @@ blockif_open(const char *optstr, const c
 	bc->bc_magic = BLOCKIF_SIG;
 	bc->bc_fd = fd;
 	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+	bc->bc_isgeom = geom;
 	bc->bc_candelete = candelete;
 	bc->bc_rdonly = ro;
 	bc->bc_size = size;

Modified: stable/10/usr.sbin/bhyve/block_if.h
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.h	Fri May  1 17:29:45 2015	(r282306)
+++ stable/10/usr.sbin/bhyve/block_if.h	Fri May  1 17:30:59 2015	(r282307)
@@ -45,6 +45,7 @@ struct blockif_req {
 	struct iovec	br_iov[BLOCKIF_IOV_MAX];
 	int		br_iovcnt;
 	off_t		br_offset;
+	ssize_t		br_resid;
 	void		(*br_callback)(struct blockif_req *req, int err);
 	void		*br_param;
 };

Modified: stable/10/usr.sbin/bhyve/pci_ahci.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_ahci.c	Fri May  1 17:29:45 2015	(r282306)
+++ stable/10/usr.sbin/bhyve/pci_ahci.c	Fri May  1 17:30:59 2015	(r282307)
@@ -571,6 +571,7 @@ ahci_build_iov(struct ahci_port *p, stru
 	}
 
 	breq->br_iovcnt = j;
+	breq->br_resid = todo;
 	aior->done += todo;
 	aior->more = (aior->done < aior->len && i < prdtl);
 }
@@ -776,8 +777,7 @@ next:
 
 	breq = &aior->io_req;
 	breq->br_offset = elba * blockif_sectsz(p->bctx);
-	breq->br_iovcnt = 1;
-	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
+	breq->br_resid = elen * blockif_sectsz(p->bctx);
 
 	/*
 	 * Mark this command in-flight.

Modified: stable/10/usr.sbin/bhyve/pci_virtio_block.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_virtio_block.c	Fri May  1 17:29:45 2015	(r282306)
+++ stable/10/usr.sbin/bhyve/pci_virtio_block.c	Fri May  1 17:30:59 2015	(r282307)
@@ -198,7 +198,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 	struct pci_vtblk_ioreq *io;
 	int i, n;
 	int err;
-	int iolen;
+	ssize_t iolen;
 	int writeop, type;
 	off_t offset;
 	struct iovec iov[BLOCKIF_IOV_MAX + 2];
@@ -246,8 +246,9 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 		assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
 		iolen += iov[i].iov_len;
 	}
+	io->io_req.br_resid = iolen;
 
-	DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 
+	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", 
 		 writeop ? "write" : "read/ident", iolen, i - 1, offset));
 
 	switch (type) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201505011731.t41HV0cN065531>