Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 2 Mar 2000 00:10:05 -0800 (PST)
From:      jhood@sitaranetworks.com, cgull@owl.org
To:        freebsd-bugs@FreeBSD.org
Subject:   kern/17098: /boot/loader hangs on switch to second drive
Message-ID:  <200003020810.AAA47290@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/17098; it has been noted by GNATS.

From: jhood@sitaranetworks.com, cgull@owl.org
To: FreeBSD-gnats-submit@freebsd.org
Cc: grog@lemis.com
Subject: kern/17098: /boot/loader hangs on switch to second drive
Date: Wed, 1 Mar 2000 13:24:19 -0500 (EST)

 >Number:         17098
 >Category:       kern
 >Synopsis:       /boot/loader hangs on switch to second drive
 >Confidential:   no
 >Severity:       serious
 >Priority:       medium
 >Responsible:    freebsd-bugs
 >State:          open
 >Quarter:        
 >Keywords:       
 >Date-Required:
 >Class:          sw-bug
 >Submitter-Id:   current-users
 >Arrival-Date:   Wed Mar  1 10:30:01 PST 2000
 >Closed-Date:
 >Last-Modified:
 >Originator:     John Hood
 >Release:        FreeBSD 3.2-RELEASE i386
 >Organization:
 Sitara Networks
 >Environment:
 
 i386 3.2-RELEASE + local mods, FreeBSD installs on two drives
 
 >Description:
 
 The boot loader often hangs when requested to boot a kernel from a
 drive other than the one it started from.
 
 >How-To-Repeat:
 
 Set up a system with boot environment & kernels on two "fast" drives,
 preferably of wildly different geometry and sizes.  (Floppy I/O may be
 slow enough to cover up this problem.)  Install the sample loader.rc
 on boot drive, editing as necessary.  Reboot & wait forever, if you
 are lucky-- the bug is a bit shy sometimes.
 
 Sample loader.rc:
 
 \ Loader.rc
 \ 1 trace!
 set currdev=disk2s1a:	\ Something other than $loaddev
 \ 11000 ms  \ Uncomment this to get a working load
 \ show
 \
 \ Includes additional commands
 include /boot/loader.4th
 \ Reads and processes loader.rc
 start
 \ Unless set otherwise, autoboot is automatic at this point
 
 
 >Fix:
 	
 There are two problems here:  the block cache code and (presumably)
 the UFS code.
 
 The block cache, as implemented, has no mechanism for distinguishing
 which device a block or block request is for.  When a different device
 is selected, it may return a block from the wrong device.  Debugging
 this was complicated by the 2s block discard timeout-- debugging
 printfs to a serial console would make the loader work, as would
 executing loader commands/words by hand :)
 
 Secondarily, when this happens, some other part of the loader reacts
 poorly to bogus data and hangs-- I'd guess that it's the UFS code, but
 I've not traced the problem.
 
 The block-cache problem exists in any version of the loader that has
 the block cache implemented.
 
 Minimalistic i386-only fix for the block-cache problem follows-- diffs
 are against a locally-modified 3.2-RELEASE.  Since the loader's device
 architecture does not have a globally-visible way of referring to a
 specific device and unit, this appears to be the best way to pass the
 necessary info into the block cache, short of wholesale
 rearchitecting.
 
   --john hood
 
 diff -ur /sys/boot/common/bcache.c ./common/bcache.c
 --- /sys/boot/common/bcache.c	Sat Feb  6 09:27:29 1999
 +++ ./common/bcache.c	Fri Feb 18 17:35:19 2000
 @@ -23,7 +23,7 @@
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   *
 - *	$Id: bcache.c,v 1.4.2.1 1999/02/06 14:27:29 dcs Exp $
 + *	$Id: bcache.c,v 1.2 2000/02/18 22:35:19 jhood Exp $
   */
  
  /*
 @@ -62,17 +62,33 @@
  static int		bcache_hits, bcache_misses, bcache_ops, bcache_bypasses;
  static int		bcache_bcount;
  
 +static void		*bcache_dkstrategy;
 +static int		bcache_dkunit;
 +
  static void	bcache_insert(caddr_t buf, daddr_t blkno);
  static int	bcache_lookup(caddr_t buf, daddr_t blkno);
  
  /*
 + * Invalidate the cache
 + */
 +void
 +bcache_flush(void)
 +{
 +    int		i;
 +
 +    if (bcache_data != NULL) {
 +        for (i = 0; i < bcache_nblks; i++) {
 +	    bcache_ctl[i].bc_count = -1;
 +	    bcache_ctl[i].bc_blkno = -1;
 +	}
 +    }
 +}
 +/*
   * Initialise the cache for (nblks) of (bsize).
   */
  int
  bcache_init(int nblks, size_t bsize)
  {
 -    int		i;
 -
      /* discard any old contents */
      if (bcache_data != NULL) {
  	free(bcache_data);
 @@ -97,11 +113,9 @@
  	return(ENOMEM);
      }
  
 -    /* Invalidate the cache */
 -    for (i = 0; i < bcache_nblks; i++) {
 -	bcache_ctl[i].bc_count = -1;
 -	bcache_ctl[i].bc_blkno = -1;
 -    }
 +    bcache_dkstrategy = NULL;
 +
 +    /* bcache_flush() will happen on first call to bcache_strategy */
  
      return(0);
  }
 @@ -130,6 +144,16 @@
  	DEBUG("bypass %d from %d", size / bcache_blksize, blk);
  	bcache_bypasses++;
  	return(dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
 +    }
 +
 +    /* has a new device/unit been requested?  flush cache */
 +    if ((bcache_dkstrategy != dd->dv_strategy) || 
 +	(bcache_dkunit != dd->dv_dkunit)) {
 +	    DEBUG("cache flush, lastunit = %d newunit = %d", 
 +		   bcache_dkunit, dd->dv_dkunit);
 +	    bcache_flush();
 +	    bcache_dkstrategy = dd->dv_strategy;
 +	    bcache_dkunit = dd->dv_dkunit;
      }
  
      nblk = size / bcache_blksize;
 diff -ur /sys/boot/common/bootstrap.h ./common/bootstrap.h
 --- /sys/boot/common/bootstrap.h	Sat Feb  6 09:27:29 1999
 +++ ./common/bootstrap.h	Fri Feb 18 17:35:20 2000
 @@ -23,7 +23,7 @@
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   *
 - *	$Id: bootstrap.h,v 1.18.2.1 1999/02/06 14:27:29 dcs Exp $
 + *	$Id: bootstrap.h,v 1.2 2000/02/18 22:35:20 jhood Exp $
   */
  
  #include <sys/types.h>
 @@ -84,6 +84,7 @@
  struct bcache_devdata
  {
      int         (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size, void *buf, size_t *rsize);
 +    int		dv_dkunit;
      void	*dv_devdata;
  };
  
 diff -ur /sys/boot/i386/libi386/biosdisk.c ./i386/libi386/biosdisk.c
 --- /sys/boot/i386/libi386/biosdisk.c	Tue Mar 16 09:58:25 1999
 +++ ./i386/libi386/biosdisk.c	Fri Feb 18 17:35:47 2000
 @@ -23,7 +23,7 @@
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   *
 - *	$Id: biosdisk.c,v 1.20.2.4 1999/03/16 14:58:25 dcs Exp $
 + *	$Id: biosdisk.c,v 1.3 2000/02/18 22:35:47 jhood Exp $
   */
  
  /*
 @@ -573,6 +575,8 @@
      struct bcache_devdata	bcd;
      
      bcd.dv_strategy = bd_realstrategy;
 +    bcd.dv_dkunit = ((struct open_disk *)(((struct i386_devdesc *)
 +		    devdata)->d_kind.biosdisk.data))->od_dkunit;
      bcd.dv_devdata = devdata;
      return(bcache_strategy(&bcd, rw, dblk, size, buf, rsize));
  }
 
 >Release-Note:
 >Audit-Trail:
 >Unformatted:
 
 
 To Unsubscribe: send mail to majordomo@FreeBSD.org
 with "unsubscribe freebsd-bugs" in the body of the message
 
 


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200003020810.AAA47290>