Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 3 Dec 2012 21:25:52 -0500
From:      Ryan Stone <rysto32@gmail.com>
To:        "freebsd-hackers@freebsd.org" <freebsd-hackers@freebsd.org>
Subject:   [PATCH] Bugs in DTrace debug locking code
Message-ID:  <CAFMmRNwYf4FZYfe_PXeJN7xV2OPMt51QJ4NpsJCo6Ba9k4hK3g@mail.gmail.com>

next in thread | raw e-mail | index | archive | help
DTrace has an unused logging facility built-in.  The logging is intended to
be safe to be called from the handler of a DTrace probe (what DTrace calls
"probe context").  Because a DTrace probe could be enabled almost anywhere
in the kernel, this means that it can't use standard FreeBSD
synchronization primitives, and so it has rolled its own spin lock.  As so
often happens when rolling your own synchronization, there are several bugs
here:

1) It doesn't use spinlock_enter/spinlock_exit, which means that a thread
holding a spin lock can be switched out.  A subsequent thread could be
starved trying to acquire the lock (worse, there's the chance of a nasty
case of priority inversion here, where a low-priority thread holds the lock
but cannot run because a high-priority thread is spinning on that same
lock).  This is the bug that caused me to start looking into this code: I
saw a situation where a user thread had taken the lock and subsequently
been switched out, and a softclock thread starved waiting for the spin lock.

2) The code uses per-CPU buffers and thus has a lock for every CPU.  It
ends up doing the following in many cases:

dtrace_debug_lock(curcpu);
//...
dtrace_debug_unlock(curcpu);

There is nothing that guarantees that the thread can not have migrated to a
new CPU in the meantime.

3) The locks do not use acquire and release memory barriers.

4) The locks are all right next to each other in memory, so false sharing
will defeat much of the purpose of having per-CPU buffers in the first
place.

5) The locks do not record their owner, make it difficult to debug problems
with them.

(There is a sixth problem, namely that this code is compiled into the
kernel when nothing in the tree uses this log facility, but there is still
some code paths which check for log messages.  That's why I saw my crash in
#1 in the first place.  I plan on taking it out in a separate commit.)

The following patch addresses these issues.  I've tried stress-testing
DTrace but I've been unable to reproduce my original crash on head.  Any
comments or objections?

Index: sys/cddl/dev/dtrace/dtrace_debug.c
===================================================================
--- sys/cddl/dev/dtrace/dtrace_debug.c  (revision 243795)
+++ sys/cddl/dev/dtrace/dtrace_debug.c  (working copy)
@@ -39,6 +39,7 @@

 struct dtrace_debug_data {
        char bufr[DTRACE_DEBUG_BUFR_SIZE];
+       uintptr_t lock;
        char *first;
        char *last;
        char *next;
@@ -46,12 +47,14 @@

 static char dtrace_debug_bufr[DTRACE_DEBUG_BUFR_SIZE];

-static volatile u_long dtrace_debug_flag[MAXCPU];
-
 static void
 dtrace_debug_lock(int cpu)
 {
-       while (dtrace_cmpset_long(&dtrace_debug_flag[cpu], 0, 1) == 0)
+       uintptr_t tid;
+
+       tid = (uintptr_t)curthread;
+       spinlock_enter();
+       while (atomic_cmpset_acq_long(&dtrace_debug_data[cpu].lock, 0, tid)
== 0)
                /* Loop until the lock is obtained. */
                ;
 }
@@ -59,7 +62,8 @@
 static void
 dtrace_debug_unlock(int cpu)
 {
-       dtrace_debug_flag[cpu] = 0;
+       atomic_store_rel_long(&dtrace_debug_data[cpu].lock, 0);
+       spinlock_exit();
 }

 static void
@@ -151,10 +155,11 @@
  */

 static __inline void
-dtrace_debug__putc(char c)
+dtrace_debug__putc(int cpu, char c)
 {
-       struct dtrace_debug_data *d = &dtrace_debug_data[curcpu];
+       struct dtrace_debug_data *d;

+       d = &dtrace_debug_data[cpu];
        *d->next++ = c;

        if (d->next == d->last)
@@ -172,24 +177,30 @@
 static void __used
 dtrace_debug_putc(char c)
 {
-       dtrace_debug_lock(curcpu);
+       int cpu;

-       dtrace_debug__putc(c);
+       cpu = curcpu;
+       dtrace_debug_lock(cpu);

-       dtrace_debug_unlock(curcpu);
+       dtrace_debug__putc(cpu, c);
+
+       dtrace_debug_unlock(cpu);
 }

 static void __used
 dtrace_debug_puts(const char *s)
 {
-       dtrace_debug_lock(curcpu);
+       int cpu;

+       cpu = curcpu;
+       dtrace_debug_lock(cpu);
+
        while (*s != '\0')
-               dtrace_debug__putc(*s++);
+               dtrace_debug__putc(cpu, *s++);

-       dtrace_debug__putc('\0');
+       dtrace_debug__putc(cpu, '\0');

-       dtrace_debug_unlock(curcpu);
+       dtrace_debug_unlock(cpu);
 }

 /*
@@ -219,7 +230,7 @@
 #define MAXNBUF (sizeof(intmax_t) * NBBY + 1)

 static void
-dtrace_debug_vprintf(const char *fmt, va_list ap)
+dtrace_debug_vprintf(int cpu, const char *fmt, va_list ap)
 {
        char nbuf[MAXNBUF];
        const char *p, *percent, *q;
@@ -243,10 +254,10 @@
                width = 0;
                while ((ch = (u_char)*fmt++) != '%' || stop) {
                        if (ch == '\0') {
-                               dtrace_debug__putc('\0');
+                               dtrace_debug__putc(cpu, '\0');
                                return;
                        }
-                       dtrace_debug__putc(ch);
+                       dtrace_debug__putc(cpu, ch);
                }
                percent = fmt - 1;
                qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0;
@@ -266,7 +277,7 @@
                        ladjust = 1;
                        goto reswitch;
                case '%':
-                       dtrace_debug__putc(ch);
+                       dtrace_debug__putc(cpu, ch);
                        break;
                case '*':
                        if (!dot) {
@@ -301,7 +312,7 @@
                        num = (u_int)va_arg(ap, int);
                        p = va_arg(ap, char *);
                        for (q = dtrace_debug_ksprintn(nbuf, num, *p++,
NULL, 0); *q;)
-                               dtrace_debug__putc(*q--);
+                               dtrace_debug__putc(cpu, *q--);

                        if (num == 0)
                                break;
@@ -309,19 +320,20 @@
                        for (tmp = 0; *p;) {
                                n = *p++;
                                if (num & (1 << (n - 1))) {
-                                       dtrace_debug__putc(tmp ? ',' : '<');
+                                       dtrace_debug__putc(cpu,
+                                           tmp ? ',' : '<');
                                        for (; (n = *p) > ' '; ++p)
-                                               dtrace_debug__putc(n);
+                                               dtrace_debug__putc(cpu, n);
                                        tmp = 1;
                                } else
                                        for (; *p > ' '; ++p)
                                                continue;
                        }
                        if (tmp)
-                               dtrace_debug__putc('>');
+                               dtrace_debug__putc(cpu, '>');
                        break;
                case 'c':
-                       dtrace_debug__putc(va_arg(ap, int));
+                       dtrace_debug__putc(cpu, va_arg(ap, int));
                        break;
                case 'D':
                        up = va_arg(ap, u_char *);
@@ -329,12 +341,12 @@
                        if (!width)
                                width = 16;
                        while(width--) {
-                               dtrace_debug__putc(hex2ascii(*up >> 4));
-                               dtrace_debug__putc(hex2ascii(*up & 0x0f));
+                               dtrace_debug__putc(cpu, hex2ascii(*up >>
4));
+                               dtrace_debug__putc(cpu, hex2ascii(*up &
0x0f));
                                up++;
                                if (width)
                                        for (q=p;*q;q++)
-                                               dtrace_debug__putc(*q);
+                                               dtrace_debug__putc(cpu, *q);
                        }
                        break;
                case 'd':
@@ -406,12 +418,12 @@

                        if (!ladjust && width > 0)
                                while (width--)
-                                       dtrace_debug__putc(padc);
+                                       dtrace_debug__putc(cpu, padc);
                        while (n--)
-                               dtrace_debug__putc(*p++);
+                               dtrace_debug__putc(cpu, *p++);
                        if (ladjust && width > 0)
                                while (width--)
-                                       dtrace_debug__putc(padc);
+                                       dtrace_debug__putc(cpu, padc);
                        break;
                case 't':
                        tflag = 1;
@@ -485,32 +497,32 @@
                        if (!ladjust && padc != '0' && width
                            && (width -= tmp) > 0)
                                while (width--)
-                                       dtrace_debug__putc(padc);
+                                       dtrace_debug__putc(cpu, padc);
                        if (neg)
-                               dtrace_debug__putc('-');
+                               dtrace_debug__putc(cpu, '-');
                        if (sharpflag && num != 0) {
                                if (base == 8) {
-                                       dtrace_debug__putc('0');
+                                       dtrace_debug__putc(cpu, '0');
                                } else if (base == 16) {
-                                       dtrace_debug__putc('0');
-                                       dtrace_debug__putc('x');
+                                       dtrace_debug__putc(cpu, '0');
+                                       dtrace_debug__putc(cpu, 'x');
                                }
                        }
                        if (!ladjust && width && (width -= tmp) > 0)
                                while (width--)
-                                       dtrace_debug__putc(padc);
+                                       dtrace_debug__putc(cpu, padc);

                        while (*p)
-                               dtrace_debug__putc(*p--);
+                               dtrace_debug__putc(cpu, *p--);

                        if (ladjust && width && (width -= tmp) > 0)
                                while (width--)
-                                       dtrace_debug__putc(padc);
+                                       dtrace_debug__putc(cpu, padc);

                        break;
                default:
                        while (percent < fmt)
-                               dtrace_debug__putc(*percent++);
+                               dtrace_debug__putc(cpu, *percent++);
                        /*
                         * Since we ignore an formatting argument it is no
                         * longer safe to obey the remaining formatting
@@ -522,23 +534,26 @@
                }
        }

-       dtrace_debug__putc('\0');
+       dtrace_debug__putc(cpu, '\0');
 }

 void
 dtrace_debug_printf(const char *fmt, ...)
 {
        va_list ap;
+       int cpu;

-       dtrace_debug_lock(curcpu);

+       cpu = curcpu;
+       dtrace_debug_lock(cpu);
+
        va_start(ap, fmt);

-       dtrace_debug_vprintf(fmt, ap);
+       dtrace_debug_vprintf(cpu, fmt, ap);

        va_end(ap);

-       dtrace_debug_unlock(curcpu);
+       dtrace_debug_unlock(cpu);
 }

 #else



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAFMmRNwYf4FZYfe_PXeJN7xV2OPMt51QJ4NpsJCo6Ba9k4hK3g>