Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Mar 2011 19:54:23 +0100
From:      Marius Strobl <marius@alchemy.franken.de>
To:        Roger Hammerstein <cheeky.m@live.com>
Cc:        pjd@freebsd.org, dim@freebsd.org, freebsd-sparc64@freebsd.org, mm@freebsd.org
Subject:   Re: sparc64 hang with zfs v28
Message-ID:  <20110310185423.GA50419@alchemy.franken.de>
In-Reply-To: <BAY147-w5926709E28BF613C663D48F9C90@phx.gbl>
References:  <BAY147-w7342A790723770DCE8BD4F9C00@phx.gbl> <BAY147-w229E76D137B7A0DCEB3696F9C70@phx.gbl> <20110307192239.GA31314@alchemy.franken.de> <BAY147-w5926709E28BF613C663D48F9C90@phx.gbl>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Mar 09, 2011 at 10:03:24AM -0500, Roger Hammerstein wrote:
> 
> 
> > Thinking about it this might be caused by the binutils regression
> > also affecting userland. If a world built with the following patch
> > in place still behaves the same you should better contact the ZFS
> > maintainers though:
> > http://people.freebsd.org/~marius/elfxx-sparc.c.diff
> 
> I kept the same cvsup from Sunday, added that patch and rebuilt world and kernel.
> 
> Now 'kldload zfs' or 'zpool status' locks the machine up, no serial console,
> no network.  It doesn't respond to breaks on the serial console. 
>  (I didn't put the alternate break sequence in).
> 

Sorry, when porting that fix from another version of binutils to what
is in head I introduced a bug. I've corrected the patch at the above
URL accodingly. With that version applied to a r219086 checkout (i.e.
right before the ZFS v28 import) now both pre-loading modules and
loading them after boot as well as ZFS works again:
v215# kldload geom_mirror.ko
GEOM_MIRROR: Device mirror/gm0 launched (1/1).
v215# kldload zfs.ko
ZFS NOTICE: Prefetch is disabled by default if less than 4GB of RAM is present;
            to enable, add "vfs.zfs.prefetch_disable=0" to /boot/loader.conf.

ZFS filesystem version 4
ZFS storage pool version 15
v215#
v215# mdconfig -a -t malloc -s 100m
md0
v215# mdconfig -a -t malloc -s 100m
md1
v215# zpool create tank mirror /dev/md0 /dev/md1
v215# zpool list
NAME   SIZE   USED  AVAIL    CAP  HEALTH  ALTROOT
tank  95.5M  85.5K  95.4M     0%  ONLINE  -
v215#
v215# zpool status
  pool: tank
 state: ONLINE
 scrub: none requested
config:

        NAME        STATE     READ WRITE CKSUM
        tank        ONLINE       0     0     0
          mirror    ONLINE       0     0     0
            md0     ONLINE       0     0     0
            md1     ONLINE       0     0     0

errors: No known data errors

Dimitry, are you ok with that version being committed to head?

As for the remaining ZFS hangs you're seeing I can reproduce these
with a r219092 checkout (i.e. right after the ZFS v28 import plus
fixes) and the above patch applied, so this problem appears to be
orthogonal:
v215# kldload zfs.ko
ZFS NOTICE: Prefetch is disabled by default if less than 4GB of RAM is present;
            to enable, add "vfs.zfs.prefetch_disable=0" to /boot/loader.conf.
ZFS filesystem version 5
ZFS storage pool version 28
v215# mdconfig -a -t malloc -s 100m
md0
v215# mdconfig -a -t malloc -s 100m
md1
v215# zpool create tank mirror /dev/md0 /dev/md1
^Z
<hangs here>

The end of the corresponding ktrace dump is:
<...>
v215# kdump | tail
  1683 initial thread RET   madvise 0
  1683 initial thread CALL  close(0x5)
  1683 initial thread RET   close 0
  1683 initial thread CALL  __sysctl(0x7fdffff8778,0x2,0x7fdffff8840,0x7fdffff88a8,0x40d41168,0x13)
  1683 initial thread SCTL  "sysctl.name2oid"
  1683 initial thread RET   __sysctl 0
  1683 initial thread CALL  __sysctl(0x7fdffff8840,0x4,0x40e4f158,0x7fdffff8978,0,0)
  1683 initial thread SCTL  "vfs.zfs.version.spa"
  1683 initial thread RET   __sysctl 0
  1683 initial thread CALL  ioctl(0x3,0xd5985a00 ,0x7fdffff8a60)

Please work with the ZFS maintainers (CC'ed) to get this fixed.

Marius

> 
> With the kernel.old from before your patch, 'kldload zfs' will work.
> 
> falcon# kldload zfs
> 
> ZFS NOTICE: Prefetch is disabled by default if less than 4GB of RAM is present;
>             to enable, add "vfs.zfs.prefetch_disable=0" to /boot/loader.conf.
> ZFS filesystem version 5
> ZFS storage pool version 28
> falcon#
> falcon#
> falcon#
> falcon#
> falcon# kldstat
> Id Refs Address            Size     Name
>  1    9 0xc0000000 e42878   kernel
>  2    1 0xc14a2000 32e000   zfs.ko
>  3    1 0xc17d0000 104000   opensolaris.ko
> falcon#
> 
> 'zpool status' will still eat an entire cpu.
> 
>  1020 root        1  49    0 22712K  2888K CPU1    1   0:00 99.92% zpool
> 
> falcon# procstat -kk 1020
>   PID    TID COMM             TDNAME           KSTACK
>  1020 100063 zpool            initial thread   <running>
> falcon#
> 
> 
> falcon# zpool status
> load: 0.76  cmd: zpool 1020 [running] 82.09r 0.00u 0.04s 97% 2856k
> load: 0.76  cmd: zpool 1020 [running] 82.33r 0.00u 0.04s 97% 2856k
> load: 0.76  cmd: zpool 1020 [running] 82.55r 0.00u 0.04s 97% 2856k
> load: 0.76  cmd: zpool 1020 [running] 82.75r 0.00u 0.04s 97% 2856k
> load: 0.76  cmd: zpool 1020 [running] 82.93r 0.00u 0.04s 97% 2856k
> 
> 
> 
> 
> =================
> a truss of 'zpool status' without the zfs module loaded gets stuck after
> this:  (zpool status will cause the zfs module to get loaded)
> 
> falcon# truss zpool status
> mmap(0x0,32768,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1076183040 (0x40254000)
> issetugid(0x40256000,0x0,0x0,0x0,0x1000000000000000,0x2000000000000000) = 0 (0x0)
> open("/etc/libmap.conf",O_RDONLY,0666)           ERR#2 'No such file or directory'
> open("/var/run/ld-elf.so.hints",O_RDONLY,030036223340) = 3 (0x3)
> read(3,"tnhE\0\0\0\^A\0\0\0\M^@\0\0\0-\0"...,128) = 128 (0x80)
> mmap(0x0,40960,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1076215808 (0x4025c000)
> lseek(3,0x80,SEEK_SET)                           = 128 (0x80)
> read(3,"/lib:/usr/lib:/usr/lib/compat:/u"...,45) = 45 (0x2d)
> close(3)                                         = 0 (0x0)
> access("/lib/libavl.so.2",0)                     = 0 (0x0)
> open("/lib/libavl.so.2",O_RDONLY,0666)           = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=9467939,size=8416,blksize=16384 }) = 0 (0x0)
> pread(0x3,0x40358a78,0x2000,0x0,0x0,0x40367220)  = 8192 (0x2000)
> mmap(0x0,1056768,PROT_NONE,MAP_PRIVATE|MAP_ANON|MAP_NOCORE,-1,0x0) = 1077313536 (0x40368000)
> mmap(0x40368000,8192,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE,3,0x0) = 1077313536 (0x40368000)
> mmap(0x40468000,8192,PROT_READ|PROT_WRITE|PROT_EXEC,MAP_PRIVATE|MAP_FIXED,3,0x0) = 1078362112 (0x40468000)
> close(3)          
> <SNIP more libs>
> 
> access("/lib/libthr.so.3",0)                     = 0 (0x0)
> open("/lib/libthr.so.3",O_RDONLY,020)            = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=9467937,size=116936,blksize=16384 }) = 0 (0x0)
> pread(0x3,0x40358a78,0x2000,0x0,0x0,0x40367188)  = 8192 (0x2000)
> mmap(0x0,1204224,PROT_NONE,MAP_PRIVATE|MAP_ANON|MAP_NOCORE,-1,0x0) = 1092362240 (0x411c2000)
> mmap(0x411c2000,106496,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE,3,0x0) = 1092362240 (0x411c2000)
> mmap(0x412da000,16384,PROT_READ|PROT_WRITE|PROT_EXEC,MAP_PRIVATE|MAP_FIXED,3,0x18000) = 1093509120 (0x412da000)
> mprotect(0x412de000,40960,PROT_READ|PROT_WRITE|PROT_EXEC) = 0 (0x0)
> close(3)                                         = 0 (0x0)
> mmap(0x0,40960,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1076256768 (0x40266000)
> mmap(0x0,106496,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1076297728 (0x40270000)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGT
> TOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sysarch(0x1,0x41082fb8,0xc0792e40,0x40f75653,0x412dbde8,0x800005) = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGT
> TOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGT
> TOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> getpid()                                         = 1002 (0x3ea)
> __sysctl(0x7fdffffe0b8,0x2,0x412e7f38,0x7fdffffe0c0,0x0,0x0) = 0 (0x0)
> __sysctl(0x7fdffffdeb8,0x2,0x7fdffffdf80,0x7fdffffdfe8,0x411d96d8,0xd) = 0 (0x0)
> __sysctl(0x7fdffffdf80,0x3,0x412e6e48,0x7fdffffe0c0,0x0,0x0) = 0 (0x0)
> readlink("/etc/malloc.conf",0x7fdffffda06,1024)  ERR#2 'No such file or directory'
> issetugid(0xffffffffffffffff,0xffffffffffffffff,0x400,0x40e965f6,0x41092968,0x400000) = 0 (0x0)
> break(0x2176d8)                                  = 0 (0x0)
> break(0x2176d8)                                  = 0 (0x0)
> break(0x400000)                                  = 0 (0x0)
> mmap(0x0,4194304,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1093566464 (0x412e8000)
> mmap(0x416e8000,1146880,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 1097760768 (0x416e8000)
> munmap(0x412e8000,1146880)                       = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> thr_self(0x41404400,0x412e6e48,0x2000,0x412dd4a8,0x412e7f44,0x412dd450) = 0 (0x0)
> mmap(0x7fdffbfe000,8192,PROT_NONE,MAP_ANON,-1,0x0) = 8787498885120 (0x7fdffbfe000)
> thr_set_name(0x186dd,0x411d9768,0x0,0x1000,0xffffffffffffffff,0x0) = 0 (0x0)
> rtprio_thread(0x0,0x186dd,0x7fdffffdfec,0x22,0xffffffffffffffff,0x0) = 0 (0x0)
> sysarch(0x2,0x410853b0,0x600,0x80,0x0,0x0)       = 0 (0x0)
> sigaction(32,{ 0x411ce740 SA_SIGINFO ss_t },0x0) = 0 (0x0)
> sigprocmask(SIG_UNBLOCK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,0x0,0x0)                 = 0 (0x0)
> sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|
> 
> 
> 
> And it stops right there after SIGINT.
> 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20110310185423.GA50419>