Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Dec 2013 19:19:51 +0000
From:      "Bentkofsky, Michael" <MBentkofsky@verisign.com>
To:        "freebsd-net@freebsd.org" <freebsd-net@freebsd.org>
Cc:        "rwatson@freebsd.org" <rwatson@freebsd.org>, "George Neville-Neil \(gnn@freebsd.org\)" <gnn@freebsd.org>, "bms@FreeBSD.org" <bms@freebsd.org>
Subject:   =?Windows-1252?Q?kern/185043:_Kernel_panic:_Sleeping_thread_(tid_=85, _pid?= =?Windows-1252?Q?_=85)_owns_a_non-sleepable_lock_from_netinet/in=5Fmulti.?= =?Windows-1252?Q?c?=
Message-ID:  <080FBD5B7A09F845842100A6DE79623346E3C522@BRN1WNEXMBX01.vcorp.ad.vrsn.com>

next in thread | raw e-mail | index | archive | help
I have just submitted PR kern/185043 and wanted to follow-up to freebsd-net=
 with a patch that fixes the obvious paths leaving in_multi_lock incorrectl=
y locked.

Index: sys/netinet/in_mcast.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- sys/netinet/in_mcast.c (revision 259264)
+++ sys/netinet/in_mcast.c (working copy)
@@ -1492,7 +1492,7 @@
    error =3D inm_merge(inm, imf);
    if (error) {
          CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-          goto out_imf_rollback;
+          goto out_in_multi_locked;
    }
     CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -1500,6 +1500,8 @@
    if (error)
          CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
 out_imf_rollback:
@@ -2168,8 +2170,12 @@
    if (is_new) {
          error =3D in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
              &inm);
-          if (error)
+          if (error) {
+                        CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
+                            __func__);
+                        IN_MULTI_UNLOCK();
               goto out_imo_free;
+                }
          imo->imo_membership[idx] =3D inm;
    } else {
          CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2177,20 +2183,21 @@
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
          CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
          error =3D igmp_change_state(inm);
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
    }
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
-out_imf_rollback:
    INP_WLOCK_ASSERT(inp);
    if (error) {
          imf_rollback(imf);
@@ -2394,7 +2401,7 @@
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
           CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2405,9 +2412,10 @@
          }
    }
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
-out_imf_rollback:
    if (error)
          imf_rollback(imf);
    else
@@ -2641,7 +2649,7 @@
    error =3D inm_merge(inm, imf);
    if (error) {
          CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-          goto out_imf_rollback;
+          goto out_in_multi_locked;
    }
     CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2649,6 +2657,8 @@
    if (error)
          CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
 out_imf_rollback:

=3D=3D=3D=3D=3D End of patch =3D=3D=3D=3D=3D

The PR also cites a follow-up problem that this patch does not address, nam=
ely that the same technique for causing the problem (subscribing to a multi=
cast address using quagga=92s ospfd and doing /etc/rc.d/netif restart) can =
result in a race condition leading to this kernel assertion: if_freemulti: =
protospec not NULL. The PR describes the particular execution path and comm=
ents suggesting this might be a known problem. Since I have triggered the c=
ase, I am interested in patching this as well and need more time to conside=
r a more thorough solution. Input is appreciated, of course.

Hopefully the patch above is obvious enough to be accepted.

=93This message (including any attachments) is intended only for the use of=
 the individual or entity to which it is addressed, and may contain informa=
tion that is non-public, proprietary, privileged, confidential and exempt f=
rom disclosure under applicable law or may be constituted as attorney work =
product. If you are not the intended recipient, you are hereby notified tha=
t any use, dissemination, distribution, or copying of this communication is=
 strictly prohibited. If you have received this message in error, notify se=
nder immediately and delete this message immediately.=94



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?080FBD5B7A09F845842100A6DE79623346E3C522>