Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Feb 2021 09:01:01 GMT
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 223debfb954c - releng/13.0 - zfs: fix RAIDZ2/3 not healing parity with 2+ bad disks
Message-ID:  <202102180901.11I911Sp037718@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch releng/13.0 has been updated by mm:

URL: https://cgit.FreeBSD.org/src/commit/?id=223debfb954c924897aad393dc8b830cb1d507f8

commit 223debfb954c924897aad393dc8b830cb1d507f8
Author:     Martin Matuska <mm@FreeBSD.org>
AuthorDate: 2021-02-15 07:40:27 +0000
Commit:     Martin Matuska <mm@FreeBSD.org>
CommitDate: 2021-02-18 08:59:38 +0000

    zfs: fix RAIDZ2/3 not healing parity with 2+ bad disks
    
    From openzfs-master 62d4287f2 commit message:
      When scrubbing, (non-sequential) resilvering, or correcting a checksum
      error using RAIDZ parity, ZFS should heal any incorrect RAIDZ parity by
      overwriting it.  For example, if P disks are silently corrupted (P being
      the number of failures tolerated; e.g. RAIDZ2 has P=2), `zpool scrub`
      should detect and heal all the bad state on these disks, including
      parity.  This way if there is a subsequent failure we are fully
      protected.
    
      With RAIDZ2 or RAIDZ3, a block can have silent damage to a parity
      sector, and also damage (silent or known) to a data sector.  In this
      case the parity should be healed but it is not.
    
    Cherry-picked from openzfs 62d4287f279a0d184f8f332475f27af58b7aa87e
    Patch Author:   Matthew Ahrens <matthew.ahrens@delphix.com>
    
    Approved by:            re (gjb)
    Reviewed by:            delphij
    Differential Revision:  https://reviews.freebsd.org/D28681
    
    (cherry picked from commit f15e18a642cb3f7ebc747f8e9cdf11274140107d)
    (cherry picked from commit 6f21b7996a95b7d93d1687787b55e57289689066)
---
 sys/contrib/openzfs/module/zfs/vdev_raidz.c        |  10 --
 sys/contrib/openzfs/tests/runfiles/common.run      |   5 +-
 .../tests/functional/redundancy/Makefile.am        |   1 +
 .../functional/redundancy/redundancy_raidz.ksh     | 198 +++++++++++++++++++++
 4 files changed, 202 insertions(+), 12 deletions(-)

diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 989b90dc2635..5b152f38bd63 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -1922,16 +1922,6 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
 	if (checksum == ZIO_CHECKSUM_NOPARITY)
 		return (ret);
 
-	/*
-	 * All data columns must have been successfully read in order
-	 * to use them to generate parity columns for comparison.
-	 */
-	for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
-		rc = &rr->rr_col[c];
-		if (!rc->rc_tried || rc->rc_error != 0)
-			return (ret);
-	}
-
 	for (c = 0; c < rr->rr_firstdatacol; c++) {
 		rc = &rr->rr_col[c];
 		if (!rc->rc_tried || rc->rc_error != 0)
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 171db4c0c022..c0bfc09ac5b3 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -727,8 +727,9 @@ tags = ['functional', 'raidz']
 [tests/functional/redundancy]
 tests = ['redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3',
     'redundancy_draid_spare1', 'redundancy_draid_spare2',
-    'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz1',
-    'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe']
+    'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz',
+    'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3',
+    'redundancy_stripe']
 tags = ['functional', 'redundancy']
 
 [tests/functional/refquota]
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
index b2d4414b2906..7b85d6a1bf5f 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
@@ -9,6 +9,7 @@ dist_pkgdata_SCRIPTS = \
 	redundancy_draid_spare2.ksh \
 	redundancy_draid_spare3.ksh \
 	redundancy_mirror.ksh \
+	redundancy_raidz.ksh \
 	redundancy_raidz1.ksh \
 	redundancy_raidz2.ksh \
 	redundancy_raidz3.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh
new file mode 100644
index 000000000000..8d32e0603ae8
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh
@@ -0,0 +1,198 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by vStack. All rights reserved.
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	RAIDZ should provide redundancy
+#
+# STRATEGY:
+#	1. Create block device files for the test raidz pool
+#	2. For each parity value [1..3]
+#	    - create raidz pool
+#	    - fill it with some directories/files
+#	    - verify resilver by replacing devices
+#	    - verify scrub by zeroing devices
+#	    - destroy the raidz pool
+
+typeset -r devs=6
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		rm -f "$TEST_BASE_DIR/dev-$i"
+	done
+
+	set_tunable32 PREFETCH_DISABLE $prefetch_disable
+}
+
+function test_resilver # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+}
+
+function test_scrub # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+	typeset combrec=$4
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+	device=$TEST_BASE_DIR/dev-$i
+	log_must truncate -s ${dev_size_mb}M $device
+	disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 1 2 3; do
+	raid=raidz$nparity
+	dir=$TEST_BASE_DIR
+
+	log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+	log_must zfs set primarycache=metadata $TESTPOOL
+
+	log_must zfs create $TESTPOOL/fs
+	log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R
+
+	log_must zfs create -o compress=on $TESTPOOL/fs2
+	log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R
+
+	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+	log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R
+
+	typeset pool_size=$(get_pool_prop size $TESTPOOL)
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	test_resilver $TESTPOOL $nparity $dir
+	test_scrub $TESTPOOL $nparity $dir
+
+	log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "raidz redundancy test succeeded."



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202102180901.11I911Sp037718>