Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 14 Sep 2009 23:07:42 GMT
From:      Gleb Kurtsou <gk@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 168553 for review
Message-ID:  <200909142307.n8EN7gZi091173@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=168553

Change 168553 by gk@gk_h1 on 2009/09/14 23:07:07

	implement mapped read and write

Affected files ...

.. //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 edit

Differences ...

==== //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 (text+ko) ====

@@ -51,18 +51,23 @@
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
+#include <sys/sf_buf.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/limits.h>
-
-#include <fs/pefs/pefs.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
+#include <fs/pefs/pefs.h>
+
 static int pefs_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 SYSCTL_INT(_debug, OID_AUTO, pefs_bug_bypass, CTLFLAG_RW,
 	&pefs_bug_bypass, 0, "");
@@ -631,32 +636,49 @@
 }
 
 static int
-pefs_vreg_grow(struct vnode *vp, u_quad_t nsize, struct ucred *cred)
+pefs_tryextend(struct vnode *vp, u_quad_t nsize, struct ucred *cred)
 {
 	struct vnode *lvp = PEFS_LOWERVP(vp);
-	struct vattr o_va;
+	struct vattr va;
 	struct uio *puio;
 	struct pefs_node *pn = VP_TO_PN(vp);
 	struct pefs_chunk pc;
 	struct pefs_ctx *ctx;
+	u_quad_t osize;
 	off_t offset;
 	size_t bsize, size;
 	int error;
 
-	error = VOP_GETATTR(lvp, &o_va, cred);
+	MPASS(vp->v_type == VREG);
+
+	error = VOP_GETATTR(lvp, &va, cred);
 	if (error)
 		return (error);
+	osize = va.va_size;
 
-	PEFSDEBUG("pefs_vreg_grow: old size %jd, new size %jd\n",
-	    nsize, o_va.va_size);
-	if (nsize <= o_va.va_size)
+	if (nsize <= osize)
 		return (0);
 
-	if (nsize - o_va.va_size >= INT_MAX)
-		return (EINVAL);
-	size = nsize - o_va.va_size;
+	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+		vn_lock(vp, LK_UPGRADE | LK_RETRY);
+		error = VOP_GETATTR(lvp, &va, cred);
+		if (error)
+			return (error);
+		osize = va.va_size;
+		if (nsize <= osize)
+			return (0);
+	}
+
+	PEFSDEBUG("pefs_tryextend: old size 0x%jx, new size 0x%jx\n", osize, nsize);
+
+	VATTR_NULL(&va);
+	va.va_size = nsize;
+	VOP_SETATTR(lvp, &va, cred);
+	vnode_pager_setsize(vp, nsize);
+
+	size = nsize - osize;
 	bsize = qmin(size, DFLTPHYS);
-	offset = o_va.va_size;
+	offset = osize;
 	pefs_chunk_create(&pc, pn, bsize);
 
 	ctx = pefs_ctx_get();
@@ -665,14 +687,13 @@
 		pefs_chunk_zero(&pc);
 		pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc);
 		puio = pefs_chunk_uio(&pc, offset, UIO_WRITE);
-		PEFSDEBUG("pefs_vreg_grow: resizing file; filling with zeros: offset=%jd, resid=%jd\n", offset, bsize);
+		PEFSDEBUG("pefs_tryextend: resizing file; filling with zeros: offset=0x%jx, resid=0x%jx\n", offset, bsize);
 		error = VOP_WRITE(lvp, puio, 0, cred);
 		if (error) {
 			/* try to reset */
-			size = o_va.va_size;
-			VATTR_NULL(&o_va);
-			o_va.va_size = size;
-			VOP_SETATTR(lvp, &o_va, cred);
+			VATTR_NULL(&va);
+			va.va_size = osize;
+			VOP_SETATTR(lvp, &va, cred);
 			break;
 		}
 		offset += bsize;
@@ -695,7 +716,6 @@
 pefs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
-	struct vnode *lvp;
 	struct ucred *cred = ap->a_cred;
 	struct vattr *vap = ap->a_vap;
 	int error;
@@ -723,22 +743,23 @@
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
-			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			if ((vp->v_mount->mnt_flag & MNT_RDONLY) ||
+			    pefs_no_keys(vp))
 				return (EROFS);
 			if (vp->v_type == VREG)
-				error = pefs_vreg_grow(vp, vap->va_size, cred);
+				error = pefs_tryextend(vp, vap->va_size, cred);
 			else
-				error = EOPNOTSUPP; // TODO pefs_vlnk_chsize
+				error = EOPNOTSUPP; /* TODO */
 			if (error)
 				return (error);
+			vnode_pager_setsize(vp, vap->va_size);
 			break;
  		default:
 			return (EOPNOTSUPP);
 		}
 	}
 
-	lvp = PEFS_LOWERVP(vp);
-	return (VOP_SETATTR(lvp, vap, cred));
+	return (VOP_SETATTR(PEFS_LOWERVP(vp), vap, cred));
 }
 
 /*
@@ -1041,6 +1062,15 @@
 	pefs_node_buf_free(pn);
 	VI_UNLOCK(vp);
 
+	if (vp->v_object != NULL) {
+		if (vp->v_object->resident_page_count > 0)
+			PEFSDEBUG("pefs_inactive: vobject has dirty pages: vp=%p count=%d\n",
+			    vp, vp->v_object->resident_page_count);
+		VM_OBJECT_LOCK(vp->v_object);
+		vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_UNLOCK(vp->v_object);
+	}
+
 	if ((pn->pn_flags & PN_WANTRECYCLE) || (pn->pn_flags & PN_HASKEY) == 0)
 		vrecycle(vp, td);
 
@@ -1206,7 +1236,6 @@
 		if (error)
 			break;
 
-		/* Nothing was written.. somehow */
 		if (pc.pc_size == puio->uio_resid)
 			break;
 		pefs_chunk_setsize(&pc, pc.pc_size - puio->uio_resid);
@@ -1536,6 +1565,33 @@
 	return (error);
 }
 
+static inline int
+pefs_getsize(struct vnode *vp, u_quad_t *sizep, struct ucred *cred)
+{
+	struct vattr va;
+	int error;
+
+	error = VOP_GETATTR(PEFS_LOWERVP(vp), &va, cred);
+	if (error == 0)
+		*sizep = va.va_size;
+
+	return (error);
+}
+
+static inline int
+pefs_ismapped(struct vnode *vp)
+{
+	vm_object_t object = vp->v_object;
+
+	if (object == NULL)
+		return (0);
+
+	if (object->resident_page_count > 0 || object->cache != NULL ||
+	    object->root != NULL)
+		return (1);
+	return (0);
+}
+
 static int
 pefs_read(struct vop_read_args *ap)
 {
@@ -1543,16 +1599,21 @@
 	struct vnode *lvp = PEFS_LOWERVP(vp);
 	struct uio *uio = ap->a_uio;
 	struct uio *puio;
+	struct ucred *cred = ap->a_cred;
 	struct pefs_node *pn = VP_TO_PN(vp);
 	struct pefs_chunk pc;
 	struct pefs_ctx *ctx;
-	ssize_t bsize, done;
-	int error = 0;
+	vm_page_t m;
+	vm_offset_t moffset;
+	u_quad_t fsize;
+	ssize_t bsize, msize, done;
+	int ioflag = ap->a_ioflag;
+	int error = 0, mapped, restart_decrypt;
 
 	if (vp->v_type == VDIR)
 		return (EISDIR);
 	if (!(pn->pn_flags & PN_HASKEY) || vp->v_type == VFIFO)
-		return (VOP_READ(lvp, uio, ap->a_ioflag, ap->a_cred));
+		return (VOP_READ(lvp, uio, ioflag, cred));
 	if (vp->v_type != VREG)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid == 0)
@@ -1560,40 +1621,72 @@
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
-	bsize = qmin(uio->uio_resid, DFLTPHYS);
+	mapped = pefs_ismapped(vp);
+	bsize = qmin(uio->uio_resid, mapped ? PAGE_SIZE : DFLTPHYS);
+	error = pefs_getsize(vp, &fsize, cred);
+	if (error != 0)
+		return (error);
 
 	ctx = pefs_ctx_get();
-	pefs_data_decrypt_start(ctx, &pn->pn_tkey, uio->uio_offset);
 	pefs_chunk_create(&pc, pn, bsize);
-	while (uio->uio_resid > 0) {
+	restart_decrypt = 1;
+	while (uio->uio_resid > 0 && uio->uio_offset < fsize) {
+		bsize = qmin(uio->uio_resid, bsize);
+		bsize = qmin(fsize - uio->uio_offset, bsize);
+		pefs_chunk_setsize(&pc, bsize);
+
+		if (mapped) {
+			moffset = uio->uio_offset & PAGE_MASK;
+			msize = qmin(PAGE_SIZE - moffset, bsize);
+
+			VM_OBJECT_LOCK(vp->v_object);
+lookupvpg:
+			m = vm_page_lookup(vp->v_object,
+			    OFF_TO_IDX(uio->uio_offset));
+			if (m != NULL && vm_page_is_valid(m, moffset, msize)) {
+				if (vm_page_sleep_if_busy(m, FALSE, "pefsmr"))
+					goto lookupvpg;
+				vm_page_busy(m);
+				VM_OBJECT_UNLOCK(vp->v_object);
+				PEFSDEBUG("pefs_read: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n",
+				    uio->uio_offset, moffset, msize);
+				error = uiomove_fromphys(&m, moffset, msize, uio);
+				VM_OBJECT_LOCK(vp->v_object);
+				vm_page_wakeup(m);
+				VM_OBJECT_UNLOCK(vp->v_object);
+				if (error != 0)
+					break;
+				restart_decrypt = 1;
+				continue;
+			} else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
+				/* FIXME: UIO_NOCOPY is not supported */
+				VM_OBJECT_UNLOCK(vp->v_object);
+				return (EIO);
+			}
+			VM_OBJECT_UNLOCK(vp->v_object);
+			/* Page not cached. Make next read page-aligned. */
+			pefs_chunk_setsize(&pc, msize);
+		}
+
+		PEFSDEBUG("pefs_read: mapped=%d m=%d offset=0x%jx size=0x%jx\n",
+		    mapped, m != NULL, uio->uio_offset, pc.pc_size);
 		puio = pefs_chunk_uio(&pc, uio->uio_offset, uio->uio_rw);
-		error = VOP_READ(lvp, puio, ap->a_ioflag, ap->a_cred);
-		if (error != 0) {
+		error = VOP_READ(lvp, puio, ioflag, cred);
+		if (error != 0)
 			break;
-		}
 
 		done = pc.pc_size - puio->uio_resid;
-#if 0
-		error = VOP_GETATTR(lvp, &va, ap->a_cred);
-		if (error != 0) {
-			pefs_chunk_free(&pc, pn);
-			return (error);
-		}
-		if (va.va_size < uio->uio_offset) {
-			/* Read past end of file */
-			done -= uio->uio_offset - va.va_size;
-			MPASS(done >= 0);
-		}
-#endif
 		if (done <= 0)
 			break;
 
 		pefs_chunk_setsize(&pc, done);
+		if (restart_decrypt) {
+			restart_decrypt = 0;
+			pefs_data_decrypt_start(ctx, &pn->pn_tkey,
+			    uio->uio_offset);
+		}
 		pefs_data_decrypt_update(ctx, &pn->pn_tkey, &pc);
 		pefs_chunk_copy(&pc, uio);
-
-		bsize = qmin(uio->uio_resid, bsize);
-		pefs_chunk_setsize(&pc, bsize);
 	}
 	pefs_ctx_free(ctx);
 	pefs_chunk_free(&pc, pn);
@@ -1606,19 +1699,28 @@
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *lvp = PEFS_LOWERVP(vp);
+	struct ucred *cred = ap->a_cred;
 	struct uio *uio = ap->a_uio;
 	struct uio *puio;
+	struct sf_buf *sf;
 	struct pefs_node *pn = VP_TO_PN(vp);
 	struct pefs_chunk pc;
 	struct pefs_ctx *ctx;
+	vm_page_t m = NULL;
+	vm_offset_t moffset;
+	vm_pindex_t idx;
+	u_quad_t nsize;
+	char *ma;
 	off_t offset;
-	ssize_t resid, bsize;
-	int error = 0;
+	ssize_t resid, bsize, msize;
+	int ioflag = ap->a_ioflag;
+	int restart_encrypt;
+	int error = 0, mapped;
 
 	if (vp->v_type == VDIR)
 		return (EISDIR);
 	if (vp->v_type == VFIFO)
-		return (error = VOP_WRITE(lvp, uio, ap->a_ioflag, ap->a_cred));
+		return (VOP_WRITE(lvp, uio, ioflag, cred));
 	if (vp->v_type != VREG)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid == 0)
@@ -1629,36 +1731,117 @@
 	if (!(pn->pn_flags & PN_HASKEY))
 		return (EROFS);
 
+	error = pefs_getsize(vp, &nsize, cred);
+	if (error != 0)
+		return (error);
+
+	if (ioflag & IO_APPEND) {
+		uio->uio_offset = nsize;
+		ioflag &= ~IO_APPEND;
+	}
+
 	offset = uio->uio_offset;
 	resid = uio->uio_resid;
 
-	error = pefs_vreg_grow(vp, offset, ap->a_cred);
-	if (error != 0)
-		return (error);
+	if (offset > nsize) {
+		error = pefs_tryextend(vp, offset, cred);
+		if (error != 0)
+			return (error);
+	}
+
+	mapped = pefs_ismapped(vp);
+	bsize = qmin(resid, mapped ? PAGE_SIZE : DFLTPHYS);
+
+	if (offset + resid > nsize) {
+		PEFSDEBUG("pefs_write: extend: 0x%jx (old size: 0x%jx)\n", offset + resid, nsize);
+		nsize = offset + resid;
+		vnode_pager_setsize(vp, nsize);
+	}
 
-	bsize = qmin(resid, DFLTPHYS);
 	ctx = pefs_ctx_get();
-	pefs_data_encrypt_start(ctx, &pn->pn_tkey, uio->uio_offset);
-	pefs_chunk_create(&pc, pn, bsize);
+	restart_encrypt = 1;
+	pefs_chunk_create(&pc, pn, mapped ? PAGE_SIZE : bsize);
 	while (resid > 0) {
+		bsize = qmin(resid, bsize);
+		if (mapped) {
+			moffset = offset & PAGE_MASK;
+			msize = qmin(PAGE_SIZE - moffset, bsize);
+			msize = qmin(nsize - offset, msize);
+			pefs_chunk_setsize(&pc, moffset + msize);
+
+			VM_OBJECT_LOCK(vp->v_object);
+lookupvpg:
+			idx = OFF_TO_IDX(offset);
+			m = vm_page_lookup(vp->v_object, idx);
+			if (m != NULL && vm_page_is_valid(m, 0, moffset + msize)) {
+				if (vm_page_sleep_if_busy(m, FALSE, "pefsmw"))
+					goto lookupvpg;
+				vm_page_busy(m);
+				vm_page_lock_queues();
+				vm_page_undirty(m);
+				vm_page_unlock_queues();
+				VM_OBJECT_UNLOCK(vp->v_object);
+				PEFSDEBUG("pefs_write: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n",
+				    offset, moffset, msize);
+				sched_pin();
+				sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
+				ma = (char *)sf_buf_kva(sf);
+				error = uiomove(ma + moffset, msize, uio);
+				memcpy(pc.pc_base, ma, pc.pc_size);
+				sf_buf_free(sf);
+				sched_unpin();
+				VM_OBJECT_LOCK(vp->v_object);
+				vm_page_wakeup(m);
+				VM_OBJECT_UNLOCK(vp->v_object);
+				if (error != 0) {
+					break;
+				}
+				if (moffset != 0) {
+					resid += moffset;
+					offset -= moffset;
+					restart_encrypt = 1;
+				}
+				goto lower_update;
+			} else if (__predict_false(vp->v_object->cache != NULL)) {
+				PEFSDEBUG("pefs_write: free cache: 0x%jx\n", offset - moffset);
+				vm_page_cache_free(vp->v_object, idx,
+				    idx + 1);
+			}
+			MPASS(m == NULL ||
+			    !vm_page_is_valid(m, moffset, msize));
+			VM_OBJECT_UNLOCK(vp->v_object);
+			/* Page align consequent writes */
+			pefs_chunk_setsize(&pc, msize);
+		} else {
+			pefs_chunk_setsize(&pc, bsize);
+		}
 		pefs_chunk_copy(&pc, uio);
+lower_update:
+		PEFSDEBUG("pefs_write: mapped=%d m=%d offset=0x%jx size=0x%jx\n",
+		    mapped, m != NULL, offset, pc.pc_size);
+		if (restart_encrypt) {
+			restart_encrypt = 0;
+			pefs_data_encrypt_start(ctx, &pn->pn_tkey, offset);
+		}
 		pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc);
 		puio = pefs_chunk_uio(&pc, offset, uio->uio_rw);
 
-		error = VOP_WRITE(lvp, puio, ap->a_ioflag, ap->a_cred);
+		/* IO_APPEND handled above to prevent offset change races. */
+		error = VOP_WRITE(lvp, puio, ioflag, cred);
 		if (error != 0)
 			break;
 
 		MPASS(puio->uio_resid == 0);
-		resid -= bsize;
-		offset += bsize;
+		resid -= pc.pc_size;
+		offset += pc.pc_size;
 
-		bsize = qmin(resid, bsize);
-		pefs_chunk_setsize(&pc, bsize);
 	}
 	pefs_ctx_free(ctx);
 	pefs_chunk_free(&pc, pn);
 
+	MPASS(resid == uio->uio_resid);
+	MPASS(offset == uio->uio_offset);
+
 	return (error);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200909142307.n8EN7gZi091173>