Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 07 Aug 1999 22:59:40 +0200
From:      Peter Holm <peter@holm.cc>
To:        Freebsd-hackers@freebsd.org
Subject:   NFS V3 and mkdir bug
Message-ID:  <37AC9E3B.49CD3DA6@holm.cc>

next in thread | raw e-mail | index | archive | help
I have a test program that will fail under nfs v3, but not under v2. The
same test program works
fine under ufs.
The error involves mkdir() and heavy nfs load. I have two different
situations that fails:
1) mkdir() followed by stat(), where the stat() fails consistently with
ENOENT
2) A sequence of mkdir() followed by a sequence of rmdir(). Some of the
rmdir()
     will fail, but if repeated eventually succeed.

Here's some of my findings:

$ ps auxww | grep dir2
root     316  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.27 ./dir2 210

root     319  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.22 ./dir2 210

root     322  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.18 ./dir2 210

root     325  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.26 ./dir2 210

root     331  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.28 ./dir2 210

root     334  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.19 ./dir2 210

root     337  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.30 ./dir2 210

root     340  0.0  1.0  1524 1292  p0  D     8:59pm -2341055:-5.36
./dir2 210
root     343  0.0  1.0  1524 1292  p0  D     8:59pm   0:01.25 ./dir2 210

root     346  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.18 ./dir2 210

root     349  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.26 ./dir2 210

root     352  0.0  1.0  1524 1228  p0  D     8:59pm  11:36.72 ./dir2 210

root     355  0.0  1.0  1524 1292  p0  D     8:59pm   0:01.22 ./dir2 210

root     358  0.0  1.0  1524 1228  p0  D     8:59pm   0:01.32 ./dir2 210

console:
calcru: negative time of -693963490 usec for pid 340 (dir2)

mkdir.
p00340.d0210/d209/d208/d207/d206/d205/d204/d203/d202/d201/d200/d199/d198/d197/d196/d195/d194/d193/d192/d191/d190/d189/d188/d187/d186/d185/d184/d183/d182/d181/d180/d179/d178/d177/d176/d175/d174/d173/d172/d171/d170/d169/d168/d167/d166/d165/d164/d163/d162/d161/d160/d159/d158/d157/d156/d155/d154/d153/d152/d151/d150/d149/d148/d147/d146/d145/d144/d143/d142/d141/d140/d139/d138/d137/d136/d135/d134/d133/d132/d131/d130/d129/d128/d127/d126/d125/d124/d123/d122/d121/d120/d119/d118/d117/d116/d115/d114/d113/d112/d111/d110/d109/d108/d107/d106/d105/d104/d103/d102/d101/d100/d99/d98/d97/d96/d95/d94/d93/d92/d91/d90/d89/d88/d87/d86/d85/d84/d83/d82/d81/d80/d79/d78/d77/d76/d75/d74/d73/d72/d71.
No such file or directory (dir2.c:21)

--
mkdir: error = 2
Debugger("mkdir")
Stopped at      Debugger+0x37:  movl    $0,in_Debugger


(kgdb) up 9
#9  0xc0192a29 in mkdir (p=0xc7c034a0, uap=0xc86ecf80)
    at ../../kern/vfs_syscalls.c:2636
2636                    Debugger("mkdir");
(kgdb) l
2631
2632            NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE,
SCARG(uap, path), p);
2633            nd.ni_cnd.cn_flags |= WILLBEDIR;
2634            if ((error = namei(&nd)) != 0) {
2635                    printf("mkdir: error = %d\n", error); /* PHO */
2636                    Debugger("mkdir");
2637                    return (error);
2638            }
2639            vp = nd.ni_vp;
2640            if (vp != NULL) {
(kgdb) p error
$1 = 0
(kgdb) p nd
$2 = {
  ni_dirp = 0xbfba05a4
"p00292.d0210/d209/d208/d207/d206/d205/d204/d203/d202/d201/d200/d199/d198/d197/d196/d195/d194/d193/d192/d191/d190/d189/d188/d187/d186/d185/d184/d183/d182/d181/d180/d179/d178/d177/d176/d175/d174/d173/d1"...,

  ni_segflg = UIO_USERSPACE, ni_startdir = 0x0, ni_rootdir = 0xc7c01e00,

  ni_vp = 0x0, ni_dvp = 0xc8627180, ni_pathlen = 6,
  ni_next = 0xc7c165d8 "/d117", ni_loopcnt = 0, ni_cnd = {cn_nameiop =
1,
    cn_flags = 540680, cn_proc = 0xc7c034a0, cn_cred = 0xc0a72400,
    cn_pnbuf = 0xc7c16400 "", cn_nameptr = 0xc7c165d4 "d118/d117",
    cn_namelen = 4, cn_hash = 254, cn_consume = 0}}
(kgdb)

Here's the test program:
/* $Id$ */
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>


void
error(char *op, char* arg, char* file, int line) {
 fprintf(stderr,"%s. %s. %s (%s:%d)\n",
  op, arg, sys_errlist[errno], file, line);
}

void
mkDir(char *path, int level) {
 char newPath[4096];
 while (mkdir(path, 0770) == -1) {
  error("mkdir", path, __FILE__, __LINE__);
  sleep(1);
 }
 if (level > 1) {
  sprintf(newPath,"%s/d%d", path, level-1);
  mkDir(newPath, level-1);
 }
}

void
rmDir(char *path, int level) {
 char newPath[4096];
 if (level > 1) {
  sprintf(newPath,"%s/d%d", path, level-1);
  rmDir(newPath, level-1);
 }
 if (rmdir(path) == -1) {
  error("rmdir", path, __FILE__, __LINE__);
  exit(2);
 }
}

int
main(int argc, char **argv) {
 int levels, pid;
 char path[128];

 if (argc != 2) {
  fprintf(stderr, "Usage: %s <levels>\n", argv[0]);
  exit(1);
 }

 levels = 10;
 sscanf(argv[1], "%d", &levels);
 if (levels > 210) levels = 210;
 pid = getpid();

 umask(0);
 sprintf(path,"p%05d.d%04d", pid, levels);
 mkDir(path, levels);
 rmDir(path, levels);
 return 0;
}

And finally I also had a single crash:

current#  gdb -k -s kernel.debug  -e /var/crash/kernel.6 -c
/var/crash/vmcore.6
IdlePTD 3932160
initial pcb at 33cfc0
panicstr: ffs_valloc: dup alloc
panic messages:
---
panic: ffs_valloc: dup alloc

---
#0  boot (howto=256) at ../../kern/kern_shutdown.c:291
291   dumppcb.pcb_cr3 = rcr3();
(kgdb) bt
#0  boot (howto=256) at ../../kern/kern_shutdown.c:291
#1  0xc016710d in panic (fmt=0xc02f69c1 "ffs_valloc: dup alloc")
    at ../../kern/kern_shutdown.c:505
#2  0xc0224103 in ffs_valloc (pvp=0xc8744a80, mode=16888,
cred=0xc0b94384,
    vpp=0xc85d8a04) at ../../ufs/ffs/ffs_alloc.c:605
#3  0xc0236353 in ufs_mkdir (ap=0xc85d8bc4) at
../../ufs/ufs/ufs_vnops.c:1307
#4  0xc02374a1 in ufs_vnoperate (ap=0xc85d8bc4)
    at ../../ufs/ufs/ufs_vnops.c:2316
#5  0xc01cc26d in nfsrv_mkdir (nfsd=0xc0b94300, slp=0xc09e4600,
    procp=0xc7c05de0, mrq=0xc85d8dc4) at vnode_if.h:611
#6  0xc01da76e in nfssvc_nfsd (nsd=0xc85d8e80, argp=0x8071bc0 "",
p=0xc7c05de0)
    at ../../nfs/nfs_syscalls.c:650
#7  0xc01da08f in nfssvc (p=0xc7c05de0, uap=0xc85d8f80)
    at ../../nfs/nfs_syscalls.c:346
#8  0xc026d496 in syscall (frame={tf_fs = 47, tf_es = 47, tf_ds = 47,
      tf_edi = 4, tf_esi = 1, tf_ebp = -1077944892, tf_isp = -933392428,

      tf_ebx = 0, tf_edx = -1077944336, tf_ecx = 0, tf_eax = 155,
      tf_trapno = 12, tf_err = 2, tf_eip = 134517008, tf_cs = 31,
      tf_eflags = 646, tf_esp = -1077945284, tf_ss = 47})
    at ../../i386/i386/trap.c:1056
#9  0xc025e526 in Xint0x80_syscall ()
#10 0x80480e9 in ?? ()
(kgdb) quit
current# exit

Any suggestions as where to investigate?

Regards
--
Peter Holm | mailto:peter@holm.cc | http://login.dknet.dk/~pho/




To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?37AC9E3B.49CD3DA6>