Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 19 Nov 2018 10:26:51 +0100
From:      Sylvain GALLIANO <sg@efficientip.com>
To:        markj@freebsd.org
Cc:        freebsd-current@freebsd.org
Subject:   Re: Panic on kern_event.c
Message-ID:  <CAHdyrksNptg1cMorN9Y5xMdQb2UoZPBLOQRZ7huwMS=Afjt5%2BQ@mail.gmail.com>
In-Reply-To: <20181118003554.GC2799@raichu>
References:  <CAHdyrkvqGp8PGFaCSGgeDFC7wBhjnHK4eL99WM5fMO_yZ_u5KA@mail.gmail.com> <20181107043503.GB30861@raichu> <CAHdyrkt42cn8%2BKqhp-jQ9iZNnreypMT1qybNTcFtx8JivKggZA@mail.gmail.com> <20181115221019.GA2514@raichu> <CAHdyrksHLvzXDkjoy2PpiTgb%2BmEKHJ979rwcW3RJx32qdAyJzg@mail.gmail.com> <20181116154210.GB17379@raichu> <CAHdyrku4OPRr1Ku0WF3XT3vK_dqvNzWN%2BMYz7pXTkiNJakfJGQ@mail.gmail.com> <20181118003554.GC2799@raichu>

next in thread | previous in thread | raw e-mail | index | archive | help
With this latest patch, after stressing syslog-ng few minutes, it do not
log anymore and a simple kill do not work (I have to do kill -9)

Le dim. 18 nov. 2018 =C3=A0 01:35, Mark Johnston <markj@freebsd.org> a =C3=
=A9crit :

> On Fri, Nov 16, 2018 at 05:56:54PM +0100, Sylvain GALLIANO wrote:
> > Le ven. 16 nov. 2018 =C3=A0 16:42, Mark Johnston <markj@freebsd.org> a =
=C3=A9crit
> :
> >
> > > On Fri, Nov 16, 2018 at 03:47:39PM +0100, Sylvain GALLIANO wrote:
> > > > Le jeu. 15 nov. 2018 =C3=A0 23:10, Mark Johnston <markj@freebsd.org=
> a
> =C3=A9crit
> > > :
> > > >
> > > > > On Thu, Nov 08, 2018 at 05:05:03PM +0100, Sylvain GALLIANO wrote:
> > > > > > Hi,
> > > > > >
> > > > > > I replaced
> > > > > > << printf("XXX knote %p already in tailq  status:%x kq_count:%d
> [%p
> > > %p]
> > > > > >
> > > > >
> > >
> %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_pr=
ev,__LINE__);
> > > > > > by
> > > > > > >> panic("XXX knote %p already in tailq  status:%x kq_count:%d
> [%p
> > > %p]
> > > > > >
> > > > >
> > >
> %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_pr=
ev,__LINE__);
> > > > > >
> > > > > > Here is the stack during panic:
> > > > > > panic: XXX knote 0xfffff801e1c6ddc0 already in tailq  status:1
> > > kq_count:2
> > > > > > [0 0xfffff8000957a978]  2671
> > > > > >
> > > > > Could you please give the following patch a try?
> > > > >
> > > > > If possible, could you also ktrace one of the active syslog-ng
> > > processes
> > > > > for some time, perhaps 15 seconds, and share the kdump?  I have
> been
> > > > > trying to reproduce the problem without any luck.
> > > > >
> > > > Unfortunately patched kernel is not stable:
> > > > - some processes run at 100% CPU (STOP state) and cannot be killed
> > > > - sometime the system completely freeze (need a hard reboot)
>
> Could you please give this updated a patch a try?
>
> diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
> index d9c670e29d60..f989d612bac4 100644
> --- a/sys/kern/kern_event.c
> +++ b/sys/kern/kern_event.c
> @@ -224,6 +224,7 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_R=
W,
>  #define KQ_LOCK(kq) do {                                               \
>         mtx_lock(&(kq)->kq_lock);                                       \
>  } while (0)
> +#define        KQ_LOCKPTR(kq)  (&(kq)->kq_lock)
>  #define KQ_FLUX_WAKEUP(kq) do {
>       \
>         if (((kq)->kq_state & KQ_FLUXWAIT) =3D=3D KQ_FLUXWAIT) {         =
   \
>                 (kq)->kq_state &=3D ~KQ_FLUXWAIT;                        =
 \
> @@ -687,8 +688,11 @@ filt_timerexpire(void *knx)
>         struct kq_timer_cb_data *kc;
>
>         kn =3D knx;
> +       KQ_OWNED(kn->kn_kq);
>         kn->kn_data++;
> -       KNOTE_ACTIVATE(kn, 0);  /* XXX - handle locking */
> +
> +       if (!kn_in_flux(kn) || (kn->kn_status & KN_SCAN) !=3D 0)
> +               KNOTE_ACTIVATE(kn, 1);
>
>         if ((kn->kn_flags & EV_ONESHOT) !=3D 0)
>                 return;
> @@ -753,7 +757,7 @@ filt_timerattach(struct knote *kn)
>                 kn->kn_flags |=3D EV_CLEAR;       /* automatically set */
>         kn->kn_status &=3D ~KN_DETACHED;          /* knlist_add clears it=
 */
>         kn->kn_ptr.p_v =3D kc =3D malloc(sizeof(*kc), M_KQUEUE, M_WAITOK)=
;
> -       callout_init(&kc->c, 1);
> +       callout_init_mtx(&kc->c, KQ_LOCKPTR(kn->kn_kq), 0);
>         filt_timerstart(kn, to);
>
>         return (0);
> @@ -772,8 +776,10 @@ filt_timerstart(struct knote *kn, sbintime_t to)
>                 kc->next =3D to + sbinuptime();
>                 kc->to =3D to;
>         }
> +       KQ_LOCK(kn->kn_kq);
>         callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
>             PCPU_GET(cpuid), C_ABSOLUTE);
> +       KQ_UNLOCK(kn->kn_kq);
>  }
>
>  static void
> @@ -826,7 +832,6 @@ filt_timertouch(struct knote *kn, struct kevent *kev,
> u_long type)
>                         KQ_LOCK(kq);
>                         if (kn->kn_status & KN_QUEUED)
>                                 knote_dequeue(kn);
> -
>                         kn->kn_status &=3D ~KN_ACTIVE;
>                         kn->kn_data =3D 0;
>                         KQ_UNLOCK(kq);
> @@ -1843,12 +1848,13 @@ kqueue_scan(struct kqueue *kq, int maxevents,
> struct kevent_copyops *k_ops,
>         }
>
>         TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
> +       marker->kn_status |=3D KN_QUEUED;
>         influx =3D 0;
>         while (count) {
>                 KQ_OWNED(kq);
>                 kn =3D TAILQ_FIRST(&kq->kq_head);
>
> -               if ((kn->kn_status =3D=3D KN_MARKER && kn !=3D marker) ||
> +               if (((kn->kn_status & KN_MARKER) !=3D 0 && kn !=3D marker=
) ||
>                     kn_in_flux(kn)) {
>                         if (influx) {
>                                 influx =3D 0;
> @@ -1861,24 +1867,21 @@ kqueue_scan(struct kqueue *kq, int maxevents,
> struct kevent_copyops *k_ops,
>                 }
>
>                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
> -               if ((kn->kn_status & KN_DISABLED) =3D=3D KN_DISABLED) {
> -                       kn->kn_status &=3D ~KN_QUEUED;
> -                       kq->kq_count--;
> +               kn->kn_status &=3D ~KN_QUEUED;
> +               if ((kn->kn_status & KN_DISABLED) =3D=3D KN_DISABLED)
>                         continue;
> -               }
>                 if (kn =3D=3D marker) {
>                         KQ_FLUX_WAKEUP(kq);
>                         if (count =3D=3D maxevents)
>                                 goto retry;
>                         goto done;
>                 }
> +               kq->kq_count--;
>                 KASSERT(!kn_in_flux(kn),
>                     ("knote %p is unexpectedly in flux", kn));
>
>                 if ((kn->kn_flags & EV_DROP) =3D=3D EV_DROP) {
> -                       kn->kn_status &=3D ~KN_QUEUED;
>                         kn_enter_flux(kn);
> -                       kq->kq_count--;
>                         KQ_UNLOCK(kq);
>                         /*
>                          * We don't need to lock the list since we've
> @@ -1888,9 +1891,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, struc=
t
> kevent_copyops *k_ops,
>                         KQ_LOCK(kq);
>                         continue;
>                 } else if ((kn->kn_flags & EV_ONESHOT) =3D=3D EV_ONESHOT)=
 {
> -                       kn->kn_status &=3D ~KN_QUEUED;
>                         kn_enter_flux(kn);
> -                       kq->kq_count--;
>                         KQ_UNLOCK(kq);
>                         /*
>                          * We don't need to lock the list since we've
> @@ -1910,10 +1911,9 @@ kqueue_scan(struct kqueue *kq, int maxevents,
> struct kevent_copyops *k_ops,
>                         if (kn->kn_fop->f_event(kn, 0) =3D=3D 0) {
>                                 KQ_LOCK(kq);
>                                 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal)=
;
> -                               kn->kn_status &=3D ~(KN_QUEUED | KN_ACTIV=
E |
> -                                   KN_SCAN);
> +                               /* XXXMJ races with EVFILT_TIMER
> activation. */
> +                               kn->kn_status &=3D ~(KN_ACTIVE | KN_SCAN)=
;
>                                 kn_leave_flux(kn);
> -                               kq->kq_count--;
>                                 kn_list_unlock(knl);
>                                 influx =3D 1;
>                                 continue;
> @@ -1937,11 +1937,10 @@ kqueue_scan(struct kqueue *kq, int maxevents,
> struct kevent_copyops *k_ops,
>                                 }
>                                 if (kn->kn_flags & EV_DISPATCH)
>                                         kn->kn_status |=3D KN_DISABLED;
> -                               kn->kn_status &=3D ~(KN_QUEUED | KN_ACTIV=
E);
> -                               kq->kq_count--;
> -                       } else
> -                               TAILQ_INSERT_TAIL(&kq->kq_head, kn,
> kn_tqe);
> -
> +                               kn->kn_status &=3D ~KN_ACTIVE;
> +                       } else if ((kn->kn_status & KN_QUEUED) =3D=3D 0)
> +                               knote_enqueue(kn);
> +
>                         kn->kn_status &=3D ~KN_SCAN;
>                         kn_leave_flux(kn);
>                         kn_list_unlock(knl);
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAHdyrksNptg1cMorN9Y5xMdQb2UoZPBLOQRZ7huwMS=Afjt5%2BQ>