Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 22 Sep 2017 22:00:27 +0000 (UTC)
From:      Alan Somers <asomers@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r323937 - in projects/zfsd/head/tests/sys/cddl/zfs: include tests/hotspare tests/zfsd
Message-ID:  <201709222200.v8MM0R3M073636@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: asomers
Date: Fri Sep 22 22:00:26 2017
New Revision: 323937
URL: https://svnweb.freebsd.org/changeset/base/323937

Log:
  Fix intermittency in ZFS tests that disable SAS phys
  
  tests/sys/cddl/zfs/include/libsas.kshlib
  	In disable_sas_disk, wait for the disk to disappear before
  	returning.  If it doesn't disappear within 2 seconds, try disabling
  	the phy again.  This is necessary because disabling the phy
  	sometimes fails if there is a command outstanding at the time.  I
  	think the HBA's error recovery code might be resetting the phy in
  	that case.  Also, in enable_sas_disk, wait for the disk to reappear.
  
  tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
  tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
  tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh
  	Every place that was using (enable|disable)_sas_disk was already
  	waiting for the disk to (re|dis)appear, so move that code into a
  	common location in libsas.kshlib.  Also remove some superfluous
  	rescan_disk calls.
  
  Sponsored by:	Spectra Logic Corp

Modified:
  projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib
  projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
  projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh

Modified: projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib	Fri Sep 22 22:00:26 2017	(r323937)
@@ -154,9 +154,24 @@ function disable_sas_disk
 {
 	typeset EXPANDER=$1
 	typeset PHY=$2
+	typeset DISK=${3##*/}
 
 	# Disable the phy for this particular device
 	log_must camcontrol smppc $EXPANDER -v -p $PHY -o disable
+	# Wait up to 16 seconds for the disk to disappear.
+	for (( i=0; i<8; i=i+1)); do
+		# CAM waits 2 seconds to ensure the disk is really gone
+		sleep 2
+		if [ -c /dev/${DISK} ]; then
+			# Error recovery routines in the HBA sometimes reenable
+			# the phy if a command fails at the wrong time, so we
+			# may have to disable it multiple times.
+			log_must camcontrol smppc $EXPANDER -v -p $PHY -o disable
+		else
+			return
+		fi
+	done
+	log_fail "Disk $DISK never disappeared"
 }
 
 # Given an expander and phy on that expander, enable the phy.
@@ -169,6 +184,7 @@ function enable_sas_disk
 
 	# Send a link reset to bring the device back
 	log_must camcontrol smppc $EXPANDER -p $PHY -o linkreset
+	wait_for_disk_to_reappear 30 $EXPANDER $PHY
 }
 
 function rescan_disks
@@ -181,4 +197,18 @@ function rescan_disks
 	for device in $(echo $* | sort -u); do
 		log_must camcontrol rescan $device >/dev/null
 	done
+}
+
+function wait_for_disk_to_reappear
+{
+	typeset -i timeout=$1
+	typeset EXPANDER=$2
+	typeset PHY=$3
+
+	for ((; $timeout > 0; timeout=$timeout-1)); do
+		find_disk_by_phy $EXPANDER $PHY
+		[ -n "$FOUNDDISK" -a -e "/dev/$FOUNDDISK" ] && return
+		$SLEEP 1
+	done
+	log_fail "ERROR: Disk at ${EXPANDER}:${PHY} never reappeared"
 }

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -57,7 +57,6 @@ cleanup() {
 	[[ $DISK0_PHY != 0 ]] && enable_sas_disk $DISK0_EXPANDER $DISK0_PHY
 	[[ $SPARE0_PHY != 0 ]] && enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
 	[[ $SPARE1_PHY != 0 ]] && enable_sas_disk $SPARE1_EXPANDER $SPARE1_PHY
-	rescan_disks
 	if poolexists $TESTPOOL; then
 		# Test failed, provide something useful.
 		log_note "For reference, here is the final $TESTPOOL status:"
@@ -132,9 +131,8 @@ disable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
 log_must $ZPOOL replace $TESTPOOL $SPARE0_GUID $SPARE1_NAME
 wait_until_resilvered
 
-enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
 enable_sas_disk $DISK0_EXPANDER $DISK0_PHY
-rescan_disks
+enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
 
 log_must destroy_pool $TESTPOOL
 

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib	Fri Sep 22 22:00:26 2017	(r323937)
@@ -27,20 +27,6 @@
 
 # Common routines used by multiple zfsd tests
 
-function wait_for_disk_to_reappear
-{
-	typeset -i timeout=$1
-	typeset EXPANDER=$2
-	typeset PHY=$3
-
-	for ((; $timeout > 0; timeout=$timeout-1)); do
-		find_disk_by_phy $EXPANDER $PHY
-		[ -n "$FOUNDDISK" -a -e "/dev/$FOUNDDISK" ] && return
-		$SLEEP 1
-	done
-	log_fail "ERROR: Disk at ${EXPANDER}:${PHY} never reappeared"
-}
-
 function wait_for_pool_dev_state_change
 {
 	typeset -i timeout=$1
@@ -109,11 +95,8 @@ function do_autoreplace
 	# Remove a vdev by disabling its SAS phy
 	find_verify_sas_disk $REMOVAL_DISK
 	log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
-	disable_sas_disk $EXPANDER $PHY
+	disable_sas_disk $EXPANDER $PHY $REMOVAL_DISK
 
-	# Check to make sure the disk is gone
-	log_mustnot camcontrol inquiry $REMOVAL_DISK
-
 	# Check to make sure ZFS sees the disk as removed
 	wait_for_pool_removal 30
 
@@ -137,8 +120,6 @@ function do_autoreplace
 	# Reenable the missing dev's SAS phy
 	log_note "Reenabling phy on expander $EXPANDER phy $PHY"
 	enable_sas_disk $EXPANDER $PHY
-	rescan_disks $EXPANDER
-	wait_for_disk_to_reappear 30 $EXPANDER $PHY
 
 	# Erase the missing dev's ZFS label
 	log_must $ZPOOL labelclear -f $( find_disks $FOUNDDISK )
@@ -146,11 +127,8 @@ function do_autoreplace
 	# Disable the missing dev's SAS phy again
 	find_verify_sas_disk $FOUNDDISK
 	log_note "Disabling \"$FOUNDDISK\" on expander $EXPANDER phy $PHY"
-	disable_sas_disk $EXPANDER $PHY
+	disable_sas_disk $EXPANDER $PHY $FOUNDDISK
 
-	# Check to make sure the disk is gone
-	log_mustnot camcontrol inquiry $REMOVAL_DISK
-
 	# Import the pool
 	log_must $ZPOOL import $TESTPOOL
 	# Wait 5 seconds before enabling the phy so zfsd.log will be easier
@@ -160,8 +138,6 @@ function do_autoreplace
 	# Reenable the missing dev's SAS phy
 	log_note "Reenabling phy on expander $EXPANDER phy $PHY"
 	enable_sas_disk $EXPANDER $PHY
-	rescan_disks $EXPANDER
-	wait_for_disk_to_reappear 30 $EXPANDER $PHY
 }
 
 function autoreplace_cleanup
@@ -172,7 +148,6 @@ function autoreplace_cleanup
 	if [ -n "$REMOVAL_DISK" -a -n "$EXPANDER" -a -n "$PHY" ]; then
 		log_note "Renabling ${EXPANDER}:${PHY} for disk ${REMOVAL_DISK}"
 		enable_sas_disk $EXPANDER $PHY
-		rescan_disks $EXPANDER
 
 		# For debugging purposes, log the partial output of
 		# camcontrol to see if the disk actually came back.

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -73,10 +73,6 @@ function verify_assertion # spare_dev
 	log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
 	disable_sas_disk $EXPANDER $PHY
 
-	# Check to make sure the disk is gone
-	find_disk_by_phy $EXPANDER $PHY
-	[ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
 	# Check to make sure ZFS sees the disk as removed
 	wait_for_pool_removal 20
 
@@ -87,7 +83,6 @@ function verify_assertion # spare_dev
 	# Reenable the  missing disk
 	log_note "Reenabling phy on expander $EXPANDER phy $PHY"
 	enable_sas_disk $EXPANDER $PHY
-	wait_for_disk_to_reappear 20 $EXPANDER $PHY
 
 	# Check that the disk has rejoined the pool & resilvered
 	wait_for_pool_dev_state_change 20 $REMOVAL_DISK ONLINE

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -75,10 +75,6 @@ function verify_assertion # spare_dev
 	log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
 	disable_sas_disk $EXPANDER $PHY
 
-	# Check to make sure the disk is gone
-	find_disk_by_phy $EXPANDER $PHY
-	[ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
 	# Check to make sure ZFS sees the disk as removed
 	wait_for_pool_removal 20
 
@@ -90,7 +86,6 @@ function verify_assertion # spare_dev
 	# Reenable the  missing disk
 	log_note "Reenabling phy on expander $EXPANDER phy $PHY"
 	enable_sas_disk $EXPANDER $PHY
-	wait_for_disk_to_reappear 20 $EXPANDER $PHY
 }
 
 typeset REMOVAL_DISK=$DISK0

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -73,10 +73,6 @@ function verify_assertion # spare_dev
 	log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
 	disable_sas_disk $EXPANDER $PHY
 
-	# Check to make sure the disk is gone
-	find_disk_by_phy $EXPANDER $PHY
-	[ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
 	# Check to make sure ZFS sees the disk as removed
 	wait_for_pool_removal 20
 
@@ -90,9 +86,6 @@ function verify_assertion # spare_dev
 	# Reenable the  missing disk
 	log_note "Reenabling phy on expander $EXPANDER phy $PHY"
 	enable_sas_disk $EXPANDER $PHY
-
-	# Check that the disk has returned
-	wait_for_disk_to_reappear 20 $EXPANDER $PHY
 
 	# Import the pool
 	log_must $ZPOOL import $TESTPOOL

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -65,10 +65,6 @@ for type in "raidz" "mirror"; do
 	# there is I/O active to the
 	disable_sas_disk $EXPANDER $PHY
 
-	# Check to make sure disk is gone.
-	find_disk_by_phy $EXPANDER $PHY
-	[ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
 	# Write out data to make sure we can do I/O after the disk failure
 	log_must $DD if=/dev/zero of=$TESTDIR/$TESTFILE bs=1m count=512
 
@@ -78,7 +74,6 @@ for type in "raidz" "mirror"; do
 	# Re-enable the disk, we don't want to leave it turned off
 	log_note "Re-enabling phy $PHY on expander $EXPANDER"
 	enable_sas_disk $EXPANDER $PHY
-	wait_for_disk_to_reappear 20 $EXPANDER $PHY
 
 	# Disk should auto-join the zpool & be resilvered.
 	wait_for_pool_dev_state_change 20 $REMOVAL_DISK ONLINE

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -102,14 +102,7 @@ for type in "raidz" "mirror"; do
 		# there is I/O active to the
 		disable_sas_disk $EXPANDER $PHY
 	done
-	rescan_disks
 
-	# Now go through the list of disks, and make sure they are all gone.
-	for CURDISK in ${TMPDISKS[*]}; do
-		# Check to make sure disk is gone.
-		log_mustnot camcontrol inquiry $CURDISK
-	done
-
 	# Make sure that the pool status is "UNAVAIL".  We have taken all
 	# of the drives offline, so it should be.
 	log_must is_pool_state $TESTPOOL UNAVAIL
@@ -121,7 +114,6 @@ for type in "raidz" "mirror"; do
 		log_note "Re-enabling phy ${PHY_LIST[$CURDISK]} on expander ${EXPANDER_LIST[$CURDISK]}"
 		enable_sas_disk ${EXPANDER_LIST[$CURDISK]} ${PHY_LIST[$CURDISK]}
 	done
-	rescan_disks
 
 	unset DISK_FOUND
 	typeset -A DISK_FOUND

Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh	Fri Sep 22 21:55:41 2017	(r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh	Fri Sep 22 22:00:26 2017	(r323937)
@@ -63,10 +63,6 @@ function remove_disk
 	# Disable the first disk.
 	disable_sas_disk $EXPANDER $PHY
 
-	# Check to make sure disk is gone.
-	find_disk_by_phy $EXPANDER $PHY
-	[ -n "$FOUNDDISK" ] && log_fail "Disk \"$DISK\" was not removed"
-
 	# Check to make sure ZFS sees the disk as removed
 	wait_for_pool_dev_state_change 20 $DISK "REMOVED|UNAVAIL"
 }
@@ -86,8 +82,6 @@ function reconnect_disk
 	enable_sas_disk $EXPANDER $PHY
 
 	log_note "Checking to see whether disk has reappeared"
-	# Make sure the disk is back in the topology
-	wait_for_disk_to_reappear 20 $EXPANDER $PHY
 
 	prev_disk=$(find_disks $DISK)
 	cur_disk=$(find_disks $FOUNDDISK)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709222200.v8MM0R3M073636>