Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Apr 2017 12:42:26 +0800
From:      Julian Elischer <julian@freebsd.org>
To:        Pete French <petefrench@ingresso.co.uk>, stable@freebsd.org
Subject:   Re: moutnroot failing on zpools in Azure after upgrade from 10 to 11 due to lack of waiting for da0
Message-ID:  <9f9bbb0e-2824-700f-1eac-8b904f91618b@freebsd.org>
In-Reply-To: <20170408110100.GB14604@brick>
References:  <E1cnOkS-0000oL-Ia@dilbert.ingresso.co.uk> <20170408110100.GB14604@brick>

next in thread | previous in thread | raw e-mail | index | archive | help
On 8/4/17 7:01 pm, Edward Tomasz NapieraƂa wrote:
> On 0313T1206, Pete French wrote:
>> I have a number of machines in Azure, all booting from ZFS and, until
>> the weekend, running 10.3 perfectly happily.
>>
>> I started upgrading these to 11. The first went fine, the second would
>> not boot. Looking at the boot diagnistics it is having problems finding the
>> root pool to mount. I see this is the diagnostic output:
>>
>> 	storvsc0: <Hyper-V IDE Storage Interface> on vmbus0
>> 	Solaris: NOTICE: Cannot find the pool label for 'rpool'
>> 	Mounting from zfs:rpool/ROOT/default failed with error 5.
>> 	Root mount waiting for: storvsc
>> 	(probe0:blkvsc0:0:storvsc1: 0:<Hyper-V IDE Storage Interface>0):  on vmbus0
>> 	storvsc scsi_status = 2
>> 	(da0:blkvsc0:0:0:0): UNMAPPED
>> 	(probe1:blkvsc1:0:1:0): storvsc scsi_status = 2
>> 	hvheartbeat0: <Hyper-V Heartbeat> on vmbus0
>> 	da0 at blkvsc0 bus 0 scbus2 target 0 lun 0
>>
>> As you can see, the drive da0 only appears after it has tried, and failed,
>> to mount the root pool.
> Does the same problem still happen with recent 11-STABLE?

There is a fix for this floating around,  we applied at work.
  Our systems are 10.3, but I think it wouldn't be  a bad thing to add 
generally
as it could (if we let it) solve the problem we sometimes see with nfs 
as well
as with azure.

p4 diff2 -du 
//depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#1 
//depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#3
==== //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#1 (text) 
- //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#3 (text) 
==== content
@@ -126,8 +126,8 @@
  static int root_mount_mddev;
  static int root_mount_complete;

-/* By default wait up to 3 seconds for devices to appear. */
-static int root_mount_timeout = 3;
+/* By default wait up to 30 seconds for devices to appear. */
+static int root_mount_timeout = 30;
  TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);

  struct root_hold_token *
@@ -690,7 +690,7 @@
      char *errmsg;
      struct mntarg *ma;
      char *dev, *fs, *opts, *tok;
-    int delay, error, timeout;
+    int delay, error, timeout, err_stride;

      error = parse_token(conf, &tok);
      if (error)
@@ -727,11 +727,20 @@
          goto out;
      }

+    /*
+     * For ZFS we can't simply wait for a specific device
+     * as we only know the pool name. To work around this,
+     * parse_mount() will retry the mount later on.
+     *
+     * While retrying for NFS could be implemented similarly
+     * it is currently not supported.
+     */
+    delay = hz / 10;
+    timeout = root_mount_timeout * hz;
+
      if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
          dev[0] != '\0' && !parse_mount_dev_present(dev)) {
          printf("mountroot: waiting for device %s ...\n", dev);
-        delay = hz / 10;
-        timeout = root_mount_timeout * hz;
          do {
              pause("rmdev", delay);
              timeout -= delay;
@@ -741,16 +750,34 @@
              goto out;
          }
      }
+    /* Timeout keeps counting down */

-    ma = NULL;
-    ma = mount_arg(ma, "fstype", fs, -1);
-    ma = mount_arg(ma, "fspath", "/", -1);
-    ma = mount_arg(ma, "from", dev, -1);
-    ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
-    ma = mount_arg(ma, "ro", NULL, 0);
-    ma = parse_mountroot_options(ma, opts);
-    error = kernel_mount(ma, MNT_ROOTFS);
+    err_stride=0;
+    do {
+        ma = NULL;
+        ma = mount_arg(ma, "fstype", fs, -1);
+        ma = mount_arg(ma, "fspath", "/", -1);
+        ma = mount_arg(ma, "from", dev, -1);
+        ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
+        ma = mount_arg(ma, "ro", NULL, 0);
+        ma = parse_mountroot_options(ma, opts);

+        error = kernel_mount(ma, MNT_ROOTFS);
+        /* UFS only does it once */
+        if (strcmp(fs, "zfs") != 0)
+            break;
+        timeout -= delay;
+        if (timeout > 0 && error) {
+            if (err_stride <= 0 ) {
+                    printf("Mounting from %s:%s failed with error %d. "
+                    "%d seconds left. Retrying.\n", fs, dev, error,
+                    timeout / hz);
+            }
+            err_stride += 1;
+            err_stride %= 50;
+            pause("rmzfs", delay);
+        }
+    } while (timeout > 0 && error);
   out:
      if (error) {
          printf("Mounting from %s:%s failed with error %d",

>
> _______________________________________________
> freebsd-stable@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/freebsd-stable
> To unsubscribe, send any mail to "freebsd-stable-unsubscribe@freebsd.org"
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?9f9bbb0e-2824-700f-1eac-8b904f91618b>