Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 6 Jan 2010 22:39:40 +0000 (UTC)
From:      Pawel Jakub Dawidek <pjd@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r201684 - head/sys/cddl/boot/zfs
Message-ID:  <201001062239.o06MdeSA061078@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pjd
Date: Wed Jan  6 22:39:40 2010
New Revision: 201684
URL: http://svn.freebsd.org/changeset/base/201684

Log:
  Teach the (gpt)zfsboot and zfsloader raidz code to use its buffers
  more efficiently.
  
  Before this patch, in the worst case memory use would increase
  exponentially on the number of drives in the raidz vdev.
  
  Submitted by:	Matt Reimer <mattjreimer@gmail.com>
  Sponsored by:	VPOP Technologies, Inc.
  Silence from:	dfr

Modified:
  head/sys/cddl/boot/zfs/zfssubr.c

Modified: head/sys/cddl/boot/zfs/zfssubr.c
==============================================================================
--- head/sys/cddl/boot/zfs/zfssubr.c	Wed Jan  6 22:19:57 2010	(r201683)
+++ head/sys/cddl/boot/zfs/zfssubr.c	Wed Jan  6 22:39:40 2010	(r201684)
@@ -454,7 +454,7 @@ vdev_raidz_reconstruct_q(raidz_col_t *co
 
 static void
 vdev_raidz_reconstruct_pq(raidz_col_t *cols, int nparity, int acols,
-    int x, int y)
+    int x, int y, void *temp_p, void *temp_q)
 {
 	uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
 	void *pdata, *qdata;
@@ -478,10 +478,8 @@ vdev_raidz_reconstruct_pq(raidz_col_t *c
 	xsize = cols[x].rc_size;
 	ysize = cols[y].rc_size;
 
-	cols[VDEV_RAIDZ_P].rc_data =
-		zfs_alloc_temp(cols[VDEV_RAIDZ_P].rc_size);
-	cols[VDEV_RAIDZ_Q].rc_data =
-		zfs_alloc_temp(cols[VDEV_RAIDZ_Q].rc_size);
+	cols[VDEV_RAIDZ_P].rc_data = temp_p;
+	cols[VDEV_RAIDZ_Q].rc_data = temp_q;
 	cols[x].rc_size = 0;
 	cols[y].rc_size = 0;
 
@@ -551,9 +549,12 @@ vdev_raidz_read(vdev_t *vdev, const blkp
 	uint64_t f = b % dcols;
 	uint64_t o = (b / dcols) << unit_shift;
 	uint64_t q, r, coff;
-	int c, c1, bc, col, acols, devidx, asize, n;
+	int c, c1, bc, col, acols, devidx, asize, n, max_rc_size;
 	static raidz_col_t cols[16];
 	raidz_col_t *rc, *rc1;
+	void *orig, *orig1, *temp_p, *temp_q;
+
+	orig = orig1 = temp_p = temp_q = NULL;
 
 	q = s / (dcols - nparity);
 	r = s - q * (dcols - nparity);
@@ -561,6 +562,7 @@ vdev_raidz_read(vdev_t *vdev, const blkp
 
 	acols = (q == 0 ? bc : dcols);
 	asize = 0;
+	max_rc_size = 0;
 	
 	for (c = 0; c < acols; c++) {
 		col = f + c;
@@ -577,6 +579,8 @@ vdev_raidz_read(vdev_t *vdev, const blkp
 		cols[c].rc_tried = 0;
 		cols[c].rc_skipped = 0;
 		asize += cols[c].rc_size;
+		if (cols[c].rc_size > max_rc_size)
+			max_rc_size = cols[c].rc_size;
 	}
 
 	asize = roundup(asize, (nparity + 1) << unit_shift);
@@ -777,8 +781,13 @@ reconstruct:
 			//ASSERT(c != acols);
 			//ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO || rc->rc_error == ESTALE);
 
+			if (temp_p == NULL)
+				temp_p = zfs_alloc_temp(max_rc_size);
+			if (temp_q == NULL)
+				temp_q = zfs_alloc_temp(max_rc_size);
+
 			vdev_raidz_reconstruct_pq(cols, nparity, acols,
-			    c1, c);
+			    c1, c, temp_p, temp_q);
 
 			if (zio_checksum_error(bp, buf) == 0)
 				return (0);
@@ -845,18 +854,12 @@ reconstruct:
 		return (EIO);
 	}
 
-	asize = 0;
-	for (c = 0; c < acols; c++) {
-		rc = &cols[c];
-		if (rc->rc_size > asize)
-			asize = rc->rc_size;
-	}
 	if (cols[VDEV_RAIDZ_P].rc_error == 0) {
 		/*
 		 * Attempt to reconstruct the data from parity P.
 		 */
-		void *orig;
-		orig = zfs_alloc_temp(asize);
+		if (orig == NULL)
+			orig = zfs_alloc_temp(max_rc_size);
 		for (c = nparity; c < acols; c++) {
 			rc = &cols[c];
 
@@ -874,8 +877,8 @@ reconstruct:
 		/*
 		 * Attempt to reconstruct the data from parity Q.
 		 */
-		void *orig;
-		orig = zfs_alloc_temp(asize);
+		if (orig == NULL)
+			orig = zfs_alloc_temp(max_rc_size);
 		for (c = nparity; c < acols; c++) {
 			rc = &cols[c];
 
@@ -895,9 +898,14 @@ reconstruct:
 		/*
 		 * Attempt to reconstruct the data from both P and Q.
 		 */
-		void *orig, *orig1;
-		orig = zfs_alloc_temp(asize);
-		orig1 = zfs_alloc_temp(asize);
+		if (orig == NULL)
+			orig = zfs_alloc_temp(max_rc_size);
+		if (orig1 == NULL)
+			orig1 = zfs_alloc_temp(max_rc_size);
+		if (temp_p == NULL)
+			temp_p = zfs_alloc_temp(max_rc_size);
+		if (temp_q == NULL)
+			temp_q = zfs_alloc_temp(max_rc_size);
 		for (c = nparity; c < acols - 1; c++) {
 			rc = &cols[c];
 
@@ -909,7 +917,7 @@ reconstruct:
 				memcpy(orig1, rc1->rc_data, rc1->rc_size);
 
 				vdev_raidz_reconstruct_pq(cols, nparity,
-				    acols, c, c1);
+				    acols, c, c1, temp_p, temp_q);
 
 				if (zio_checksum_error(bp, buf) == 0)
 					return (0);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001062239.o06MdeSA061078>