Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 27 Sep 1995 19:40:59 +0100
From:      hohmuth@inf.tu-dresden.de
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   kern/745: occasional filesystem inconsistencies, and "panic: ffs_valloc: dup alloc"
Message-ID:  <199509271840.TAA00602@irs201.inf.tu-dresden.de>
Resent-Message-ID: <199509271900.MAA10163@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         745
>Category:       kern
>Synopsis:       occasional filesystem inconsistencies, and "panic: ffs_valloc: dup alloc"
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Sep 27 12:00:01 PDT 1995
>Last-Modified:
>Originator:     Michael Hohmuth
>Organization:
Dept. of Computer Science, TU Dresden, Germany
>Release:        FreeBSD 2.0.5-RELEASE i386
>Environment:

"uname -a" yields:
FreeBSD olymp.inf.tu-dresden.de 2.0.5-RELEASE FreeBSD 2.0.5-RELEASE #0: Wed Jul  5 12:39:10  1995     root@olymp.inf.tu-dresden.de:/usr/src/sys/compile/OLYMP  i386

Main board is an ASUS SP3G + i486DX4 with the SCSI devices attached
directly to the motherboard's NCR53c810 controller.  16 MB RAM.

The main board and the SCSI disks are configured as suggested by
Stefan Esser <se@zpr.uni-koeln.de> (who's one of the authors of the
NCR device driver and who uses a similar configuration; he answered
when I reported a similar problem in comp.unix.bsd.freebsd.misc about
a month ago), as follows: Main board has all PCI options enabled
except ISA-GAT; "refresh" is configured as "normal".  Disks have auto
reallocation on read/write errors, early recovery and error posting
enabled.

There are three SCSI devices: Two equivalent SCSI disks and one tape
drive (see the logs below for exact model names).  SCSI cabling is
short (all devices are in one casing).

One disk drive has only one partition which contains the FreeBSD
system.  The other disk just contains a swap partition.

Following in this section are the kernel config file and the kernel's
logged boot-up messages.

The kernel config file looks like this:
-------------------- begin kernel config file "OLYMP" --------------------
#
# GENERIC -- Generic machine with WD/AHx/NCR/BTx family disks
#
#	GENERIC,v 1.45.2.3 1995/06/05 21:50:41 jkh Exp
#

machine		"i386"
#cpu		"I386_CPU"
cpu		"I486_CPU"
#cpu		"I586_CPU"
ident		OLYMP
maxusers	10

#options		MATH_EMULATE		#Support for x87 emulation
options		INET			#InterNETworking
options		FFS			#Berkeley Fast Filesystem
options		NFS			#Network Filesystem
options		MSDOSFS			#MSDOS Filesystem
options		"CD9660"		#ISO 9660 Filesystem
options		PROCFS			#Process filesystem
options		"COMPAT_43"		#Compatible with BSD 4.3
options		"SCSI_DELAY=15"		#Be pessimistic about Joe SCSI device
#options		BOUNCE_BUFFERS		#include support for DMA bounce buffers
options		UCONSOLE		#Allow users to grab the console

config		kernel	root on wd0 

controller	isa0
controller	pci0

controller	fdc0	at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr
disk		fd0	at fdc0 drive 0
disk		fd1	at fdc0 drive 1
tape		ft0	at fdc0 drive 2

controller	wdc0	at isa? port "IO_WD1" bio irq 14 vector wdintr
disk		wd0	at wdc0 drive 0
disk		wd1	at wdc0 drive 1

#controller	wdc1	at isa? port "IO_WD2" bio irq 15 vector wdintr
#disk		wd2	at wdc1 drive 0
#disk		wd3	at wdc1 drive 1

controller	ncr0
controller	ahc0

#controller	bt0	at isa? port "IO_BT0" bio irq ? vector btintr
#controller	uha0	at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr
controller	ahc1	at isa? bio irq ? vector ahcintr
controller	ahb0	at isa? bio irq ? vector ahbintr
controller	aha0	at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr
#controller	aic0    at isa? port 0x340 bio irq 11 vector aicintr
#controller	nca0	at isa? port 0x1f88 bio irq 10 vector ncaintr
#controller	nca1	at isa? port 0x350 bio irq 5 vector ncaintr
#controller	sea0	at isa? bio irq 5 iomem 0xc8000 iosiz 0x2000 vector seaintr

controller	scbus0

device		sd0

device		st0

device		cd0	#Only need one of these, the code dynamically grows

#device		wt0	at isa? port 0x300 bio irq 5 drq 1 vector wtintr
#device		mcd0	at isa? port 0x300 bio irq 10 vector mcdintr
#device		mcd1	at isa? port 0x340 bio irq 11 vector mcdintr

#controller	matcd0	at isa? port ? bio

#device		scd0	at isa? port 0x230 bio

# syscons is the default console driver, resembling an SCO console
device		sc0	at isa? port "IO_KBD" tty irq 1 vector scintr
# Enable this and PCVT_FREEBSD for pcvt vt220 compatible console driver
#device		vt0	at isa? port "IO_KBD" tty irq 1 vector pcrint
#options		"PCVT_FREEBSD=210"	# pcvt running on FreeBSD 2.1
#options		XSERVER			# include code for XFree86

device		npx0	at isa? port "IO_NPX" irq 13 vector npxintr

device		sio0	at isa? port "IO_COM1" tty irq 4 vector siointr
device		sio1	at isa? port "IO_COM2" tty irq 3 vector siointr
#device		sio2	at isa? port "IO_COM3" tty irq 5 vector siointr
#device		sio3	at isa? port "IO_COM4" tty irq 9 vector siointr

device		lpt0	at isa? port? tty irq 7 vector lptintr
#device		lpt1	at isa? port? tty
#device		lpt2	at isa? port? tty

# Order is important here due to intrusive probes, do *not* alphabetize
# this list of network interfaces until the probes have been fixed.
# Right now it appears that the ie0 must be probed before ep0. See
# revision 1.20 of this file.
#device de0
device ed0 at isa? port 0x280 net irq  5 iomem 0xd8000 vector edintr
#device ed1 at isa? port 0x300 net irq  5 iomem 0xd8000 vector edintr
#device ie0 at isa? port 0x360 net irq  7 iomem 0xd0000 vector ieintr
#device ep0 at isa? port 0x300 net irq 10 vector epintr
#device ix0 at isa? port 0x300 net irq 10 iomem 0xd0000 iosiz 32768 vector ixintr
device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr
#device lnc0 at isa? port 0x280 net irq 10 drq 0 vector lncintr
#device lnc1 at isa? port 0x300 net irq 10 drq 0 vector lncintr
#device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr
#device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr

pseudo-device	loop
pseudo-device	ether
pseudo-device	log
pseudo-device	sl	1
# ijppp uses tun instead of ppp device
#pseudo-device	ppp	1
pseudo-device	tun	1
pseudo-device	pty	16
pseudo-device	gzip		# Exec gzipped a.out's
-------------------- end kernel config file "OLYMP" --------------------

Here are the boot up messages:
-------------------- begin boot up messages --------------------
Sep 27 18:12:21 olymp /kernel: FreeBSD 2.0.5-RELEASE #0: Wed Jul  5 12:39:10  1995
Sep 27 18:12:21 olymp /kernel:     root@olymp.inf.tu-dresden.de:/usr/src/sys/compile/OLYMP
Sep 27 18:12:21 olymp /kernel: CPU: i486 DX4 (486-class CPU)
Sep 27 18:12:21 olymp /kernel:   Origin = "GenuineIntel"  Id = 0x480  Stepping=0
Sep 27 18:12:21 olymp /kernel:   Features=0x3<FPU,VME>
Sep 27 18:12:21 olymp /kernel: real memory  = 16384000 (4000 pages)
Sep 27 18:12:21 olymp /kernel: avail memory = 14999552 (3662 pages)
Sep 27 18:12:21 olymp /kernel: Probing for devices on the ISA bus:
Sep 27 18:12:21 olymp /kernel: sc0 at 0x60-0x6f irq 1 on motherboard
Sep 27 18:12:21 olymp /kernel: sc0: VGA color <16 virtual consoles, flags=0x0>
Sep 27 18:12:21 olymp /kernel: ed0 not found at 0x280
Sep 27 18:12:22 olymp /kernel: sio0 at 0x3f8-0x3ff irq 4 on isa
Sep 27 18:12:22 olymp /kernel: sio0: type 16550A
Sep 27 18:12:22 olymp /kernel: sio1 at 0x2f8-0x2ff irq 3 on isa
Sep 27 18:12:22 olymp /kernel: sio1: type 16550A
Sep 27 18:12:22 olymp /kernel: lpt0 at 0x378-0x37f irq 7 on isa
Sep 27 18:12:22 olymp /kernel: lpt0: Interrupt-driven port
Sep 27 18:12:22 olymp /kernel: lp0: TCP/IP capable interface
Sep 27 18:12:22 olymp /kernel: fdc0 at 0x3f0-0x3f7 irq 6 drq 2 on isa
Sep 27 18:12:22 olymp /kernel: fdc0: NEC 72065B
Sep 27 18:12:22 olymp /kernel: fd0: 1.44MB 3.5in
Sep 27 18:12:22 olymp /kernel: wdc0 not found at 0x1f0
Sep 27 18:12:22 olymp /kernel: ahc1 not found
Sep 27 18:12:22 olymp /kernel: ahb0 not found
Sep 27 18:12:22 olymp /kernel: aha0 not found at 0x330
Sep 27 18:12:22 olymp /kernel: le0 at 0x300-0x30f irq 5 maddr 0xd0000 msize 65536 on isa
Sep 27 18:12:22 olymp /kernel: le0: DE200 ethernet address 08:00:2b:1d:aa:46
Sep 27 18:12:22 olymp /kernel: npx0 on motherboard
Sep 27 18:12:22 olymp /kernel: npx0: INT 16 interface
Sep 27 18:12:22 olymp /kernel: Probing for devices on the pci0 bus:
Sep 27 18:12:22 olymp /kernel: 	configuration mode 2 allows 16 devices.
Sep 27 18:12:22 olymp /kernel: chip0 <Intel 82424ZX cache DRAM controller> rev 4 on pci0:0
Sep 27 18:12:22 olymp /kernel: ncr0 <ncr 53c810 scsi> rev 2 int a irq 9 on pci0:1
Sep 27 18:12:22 olymp /kernel: 	reg20: virtual=0xf2a91000 physical=0xfbfef000 size=0x100
Sep 27 18:12:22 olymp /kernel: ncr0: restart (scsi reset).
Sep 27 18:12:22 olymp /kernel: ncr0 scanning for targets 0..6 (V2 pl21 95/03/21)
Sep 27 18:12:23 olymp /kernel: ncr0 waiting for scsi devices to settle
Sep 27 18:12:23 olymp /kernel: (ncr0:0:0): "SEAGATE ST31230N 0300" type 0 fixed SCSI 2
Sep 27 18:12:23 olymp /kernel: sd0(ncr0:0:0): Direct-Access 
Sep 27 18:12:23 olymp /kernel: sd0(ncr0:0:0): FAST SCSI-2 100ns (10 Mb/sec) offset 8.
Sep 27 18:12:23 olymp /kernel: 1010MB (2069860 512 byte sectors)
Sep 27 18:12:23 olymp /kernel: (ncr0:1:0): "SEAGATE ST31230N 0300" type 0 fixed SCSI 2
Sep 27 18:12:23 olymp /kernel: sd1(ncr0:1:0): Direct-Access 
Sep 27 18:12:23 olymp /kernel: sd1(ncr0:1:0): FAST SCSI-2 100ns (10 Mb/sec) offset 8.
Sep 27 18:12:23 olymp /kernel: 1010MB (2069860 512 byte sectors)
Sep 27 18:12:23 olymp /kernel: (ncr0:2:0): "WANGTEK 5150ES SCSI ES41 B230" type 1 removable SCSI 1
Sep 27 18:12:23 olymp /kernel: st0(ncr0:2:0): Sequential-Access 
Sep 27 18:12:23 olymp /kernel: st0(ncr0:2:0): asynchronous.
Sep 27 18:12:23 olymp /kernel: 
Sep 27 18:12:23 olymp /kernel: st0(ncr0:2:0): asynchronous.
Sep 27 18:12:23 olymp /kernel: drive offline
Sep 27 18:12:24 olymp /kernel: chip1 <Intel 82378IB PCI-ISA bridge> rev 132 on pci0:2
Sep 27 18:12:24 olymp /kernel: vga0 <VGA-compatible display device> rev 0 int a irq ?? on pci0:4
Sep 27 18:12:24 olymp /kernel: pci0: uses 8388864 bytes of memory from fb000000 upto fbfef0ff.
Sep 27 18:12:24 olymp /kernel: pci0: uses 256 bytes of I/O space from e800 upto e8ff.
Sep 27 18:12:24 olymp /kernel: changing root device to sd0a
-------------------- end boot up messages --------------------

>Description:

Every once in a while, my daily "fsck -n" run reports truncated,
unreferenced and/or bad inodes or names pointing to unallocated
inodes.  So far, any real damage to data files hasn't occured (as far
as I have checked); seems like it's always just the filesystem's
meta-data which is corrupt.  (Exactly the same errors also occur when
`fsck'ing in single-user mode with r/o-mounted file system, so this is
not just a problem specific to running `fsck' in multi-user.)

The same problem may also have caused the following panic:

    mode=0100644, inum=18573, fs=/
    panic: ffs_valloc: dup alloc

When I reported a similar problem to comp.unix.bsd.freebsd.misc about
a month ago (subject = "2.0.5R: bad inodes once a week...", msg-id =
<41vgso$pk0@irzr17.inf.tu-dresden.de>), Stefan Esser
<se@zpr.uni-koeln.de> (co-author of the NCR device driver) responded
and helped me to tune my main board and SCSI setup and make sure that
the SCSI disks report possible problems to the device driver (see
Config section above).  However, the kernel has never logged any
device problems.

For these reasons (only meta-data corruption and no hardware problems
logged) I don't believe in a hardware failure; rather, I guess there
might be something wrong with the inode (or other meta-data) update
code.  However I wonder why I seem to be the only one who's
experiencing this; perhaps my kernel configuration is the key to this.

>How-To-Repeat:

in single-user, with r/o-mounted disk:

    # fsck -n
    ** /dev/rsd0a (NO WRITE)
    ** Last Mounted on /
    ** Root file system
    ** Phase 1 - Check Blocks and Sizes
    ** Phase 2 - Check Pathnames
    UNALLOCATED  I=45811  OWNER=sr1 MODE=0
    SIZE=0 MTIME=Sep 27 17:40 1995 
    NAME=/usr/home/sr1/etc/harvest-cache/cache/88/1688

    REMOVE? no

    ** Phase 3 - Check Connectivity
    ** Phase 4 - Check Reference Counts
    ** Phase 5 - Check Cyl groups
    BLK(S) MISSING IN BIT MAPS
    SALVAGE? no

    SUMMARY INFORMATION BAD
    SALVAGE? no

    CLEAN FLAG NOT SET IN SUPERBLOCK
    FIX? no

    63800 files, 708021 used, 294504 free (13072 frags, 35179 blocks, 1.3% fragmentation)


Here's the output copied from my Usenet article mentioned above:


    ** /dev/rsd0a (NO WRITE)
    ** Last Mounted on /
    ** Root file system
    ** Phase 1 - Check Blocks and Sizes
    PARTIALLY TRUNCATED INODE I=180124
    SALVAGE? no

    423867417 BAD I=180124
    -1273412684 BAD I=180124
    1135876419 BAD I=180124
    423867417 BAD I=180124
    ** Phase 2 - Check Pathnames
    ** Phase 3 - Check Connectivity
    ** Phase 4 - Check Reference Counts
    UNREF FILE  I=9994  OWNER=sr1 MODE=100644
    SIZE=593 MTIME=Aug 28 21:16 1995 
    RECONNECT? no


    CLEAR? no

    UNREF FILE  I=153262  OWNER=sr1 MODE=100644
    SIZE=1414 MTIME=Aug 28 20:47 1995 
    RECONNECT? no


    CLEAR? no

    UNREF FILE  I=175527  OWNER=sr1 MODE=100644
    SIZE=8490 MTIME=Aug 28 21:03 1995 
    RECONNECT? no


    CLEAR? no

    BAD/DUP FILE I=180124  OWNER=sr1 MODE=100644
    SIZE=126629 MTIME=Aug 28 21:06 1995 
    CLEAR? no

    UNREF FILE  I=184335  OWNER=sr1 MODE=100644
    SIZE=3259 MTIME=Aug 28 21:15 1995 
    RECONNECT? no


    CLEAR? no

    UNREF FILE  I=202484  OWNER=sr1 MODE=100644
    SIZE=407 MTIME=Aug 28 21:16 1995 
    RECONNECT? no


    CLEAR? no

    UNREF FILE  I=238524  OWNER=sr1 MODE=100644
    SIZE=477 MTIME=Aug 28 21:16 1995 
    RECONNECT? no


    CLEAR? no

    ** Phase 5 - Check Cyl groups
    FREE BLK COUNT(S) WRONG IN SUPERBLK
    SALVAGE? no

    BLK(S) MISSING IN BIT MAPS
    SALVAGE? no

    SUMMARY INFORMATION BAD
    SALVAGE? no

    CLEAN FLAG NOT SET IN SUPERBLOCK
    FIX? no

    60922 files, 683350 used, 319143 free (11295 frags, 38481 blocks, 1.1% fragmentation)

>Fix:

My workaround is to regularly `fsck' the disks... :(

>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199509271840.TAA00602>