Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 28 Oct 2004 16:00:24 -0700
From:      Julian Elischer <julian@elischer.org>
To:        David Xu <davidxu@freebsd.org>
Cc:        John Baldwin <jhb@freebsd.org>
Subject:   Re: Infinite loop bug in libc_r on 4.x with condition variables a nd signals
Message-ID:  <41817A08.9000706@elischer.org>
In-Reply-To: <41804D8E.2030003@freebsd.org>
References:  <41804394.7020306@elischer.org> <41804D8E.2030003@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help


David Xu wrote:

> Here is the cvs log:
>
> Revision  Changes    Path
>   1.58      +1 -0      src/lib/libpthread/thread/thr_create.c
>   1.14      +1 -1      src/lib/libpthread/thread/thr_find_thread.c
>   1.115     +27 -10    src/lib/libpthread/thread/thr_kern.c
>   1.119     +15 -11    src/lib/libpthread/thread/thr_private.h
>   1.81      +1 -2      src/lib/libpthread/thread/thr_sig.c 

commit message was:
1. Move thread list flags into new separate member, and atomically
   put DEAD thread on GC list, this closes a race between pthread_join
   and thr_cleanup.
2. Introduce a mutex to protect tcb initialization, tls allocation and
   deallocation code in rtld seems no lock protection or it is broken,
   under stress testing, memory is corrupted.


translates to:

> julian@julian:cvs diff -u
> cvs server: Diffing .
> Index: thr_create.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_create.c,v
> retrieving revision 1.57
> diff -u -r1.57 thr_create.c
> --- thr_create.c        12 Aug 2004 12:12:12 -0000      1.57
> +++ thr_create.c        28 Oct 2004 22:55:58 -0000
> @@ -234,6 +234,7 @@
>                         new_thread->specific_data_count = 0;
>                         new_thread->cleanup = NULL;
>                         new_thread->flags = 0;
> +                       new_thread->tlflags = 0;
>                         new_thread->continuation = NULL;
>                         new_thread->wakeup_time.tv_sec = -1;
>                         new_thread->lock_switch = 0;
> Index: thr_find_thread.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_find_thread.c,v
> retrieving revision 1.13
> diff -u -r1.13 thr_find_thread.c
> --- thr_find_thread.c   17 Jul 2003 23:02:30 -0000      1.13
> +++ thr_find_thread.c   28 Oct 2004 22:55:58 -0000
> @@ -90,7 +90,7 @@
>                 if (curthread != NULL)
>                         curthread->critical_count--;
>                 if ((thread->refcount == 0) &&
> -                   (thread->flags & THR_FLAGS_GC_SAFE) != 0)
> +                   (thread->tlflags & TLFLAGS_GC_SAFE) != 0)
>                         THR_GCLIST_ADD(thread);
>                 KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
>                 _kse_critical_leave(crit);
> Index: thr_kern.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_kern.c,v
> retrieving revision 1.112
> diff -u -r1.112 thr_kern.c
> --- thr_kern.c  15 Aug 2004 16:28:05 -0000      1.112
> +++ thr_kern.c  28 Oct 2004 22:55:58 -0000
> @@ -139,6 +139,9 @@
>  static struct thread_hash_head thr_hashtable[THREAD_HASH_QUEUES];
>  #define        THREAD_HASH(thrd)       ((unsigned long)thrd % 
> THREAD_HASH_QUEUE
> S)
>  
> +/* Lock for thread tcb constructor/destructor */
> +static pthread_mutex_t         _tcb_mutex;
> +
>  #ifdef DEBUG_THREAD_KERN
>  static void    dump_queues(struct kse *curkse);
>  #endif
> @@ -166,7 +169,7 @@
>                     struct pthread_sigframe *psf);
>  static int     thr_timedout(struct pthread *thread, struct timespec 
> *curtime);
>  static void    thr_unlink(struct pthread *thread);
> -static void    thr_destroy(struct pthread *thread);
> +static void    thr_destroy(struct pthread *curthread, struct pthread 
> *thread);
>  static void    thread_gc(struct pthread *thread);
>  static void    kse_gc(struct pthread *thread);
>  static void    kseg_gc(struct pthread *thread);
> @@ -240,7 +243,7 @@
>                         _thr_stack_free(&thread->attr);
>                         if (thread->specific != NULL)
>                                 free(thread->specific);
> -                       thr_destroy(thread);
> +                       thr_destroy(curthread, thread);
>                 }
>         }
>  
> @@ -285,14 +288,14 @@
>         /* Free the free threads. */
>         while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
>                 TAILQ_REMOVE(&free_threadq, thread, tle);
> -               thr_destroy(thread);
> +               thr_destroy(curthread, thread);
>         }
>         free_thread_count = 0;
>  
>         /* Free the to-be-gc'd threads. */
>         while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
>                 TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
> -               thr_destroy(thread);
> +               thr_destroy(curthread, thread);
>         }
>         TAILQ_INIT(&gc_ksegq);
>         _gc_count = 0;
> @@ -381,6 +384,7 @@
>                 if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
>                     _kse_lock_wait, _kse_lock_wakeup) != 0)
>                         PANIC("Unable to initialize thread list lock");
> +               _pthread_mutex_init(&_tcb_mutex, NULL);
>                 active_kse_count = 0;
>                 active_kseg_count = 0;
>                 _gc_count = 0;
> @@ -1204,7 +1208,6 @@
>                 thread->kseg = _kse_initial->k_kseg;
>                 thread->kse = _kse_initial;
>         }
> -       thread->flags |= THR_FLAGS_GC_SAFE;
>  
>         /*
>          * We can't hold the thread list lock while holding the
> @@ -1213,6 +1216,7 @@
>         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
>         DBG_MSG("Adding thread %p to GC list\n", thread);
>         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
> +       thread->tlflags |= TLFLAGS_GC_SAFE;
>         THR_GCLIST_ADD(thread);
>         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
>         if (sys_scope) {
> @@ -1252,7 +1256,7 @@
>         /* Check the threads waiting for GC. */
>         for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = 
> td_next) {
>                 td_next = TAILQ_NEXT(td, gcle);
> -               if ((td->flags & THR_FLAGS_GC_SAFE) == 0)
> +               if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
>                         continue;
>                 else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
>                     ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 
> 0)) {
> @@ -2382,7 +2386,14 @@
>         if ((thread == NULL) &&
>             ((thread = malloc(sizeof(struct pthread))) != NULL)) {
>                 bzero(thread, sizeof(struct pthread));
> -               if ((thread->tcb = _tcb_ctor(thread, curthread == 
> NULL)) == NULL
> ) {
> +               if (curthread) {
> +                       _pthread_mutex_lock(&_tcb_mutex);
> +                       thread->tcb = _tcb_ctor(thread, 0 /* not 
> initial tls */)
> ;
> +                       _pthread_mutex_unlock(&_tcb_mutex);
> +               } else {
> +                       thread->tcb = _tcb_ctor(thread, 1 /* initial 
> tls */);
> +               }
> +               if (thread->tcb == NULL) {
>                         free(thread);
>                         thread = NULL;
>                 } else {
> @@ -2418,7 +2429,7 @@
>                 thread->name = NULL;
>         }
>         if ((curthread == NULL) || (free_thread_count >= 
> MAX_CACHED_THREADS)) {
> -               thr_destroy(thread);
> +               thr_destroy(curthread, thread);
>         } else {
>                 /* Add the thread to the free thread list. */
>                 crit = _kse_critical_enter();
> @@ -2431,14 +2442,20 @@
>  }
>  
>  static void
> -thr_destroy(struct pthread *thread)
> +thr_destroy(struct pthread *curthread, struct pthread *thread)
>  {
>         int i;
>  
>         for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
>                 _lockuser_destroy(&thread->lockusers[i]);
>         _lock_destroy(&thread->lock);
> -       _tcb_dtor(thread->tcb);
> +       if (curthread) {
> +               _pthread_mutex_lock(&_tcb_mutex);
> +               _tcb_dtor(thread->tcb);
> +               _pthread_mutex_unlock(&_tcb_mutex);
> +       } else {
> +               _tcb_dtor(thread->tcb);
> +       }
>         free(thread->siginfo);
>         free(thread);
>  }
> Index: thr_private.h
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_private.h,v
> retrieving revision 1.118
> diff -u -r1.118 thr_private.h
> --- thr_private.h       7 Aug 2004 15:15:38 -0000       1.118
> +++ thr_private.h       28 Oct 2004 22:55:59 -0000
> @@ -753,9 +753,13 @@
>  #define THR_FLAGS_IN_RUNQ      0x0004  /* in run queue using pqe link */
>  #define        THR_FLAGS_EXITING       0x0008  /* thread is exiting */
>  #define        THR_FLAGS_SUSPENDED     0x0010  /* thread is suspended */
> -#define        THR_FLAGS_GC_SAFE       0x0020  /* thread safe for 
> cleaning */
> -#define        THR_FLAGS_IN_TDLIST     0x0040  /* thread in all 
> thread list */
> -#define        THR_FLAGS_IN_GCLIST     0x0080  /* thread in gc list */
> +
> +       /* Thread list flags; only set with thread list lock held. */
> +#define        TLFLAGS_GC_SAFE         0x0001  /* thread safe for 
> cleaning */
> +#define        TLFLAGS_IN_TDLIST       0x0002  /* thread in all 
> thread list */
> +#define        TLFLAGS_IN_GCLIST       0x0004  /* thread in gc list */
> +       int                     tlflags;
> +
>         /*
>          * Base priority is the user setable and retrievable priority
>          * of the thread.  It is only affected by explicit calls to
> @@ -897,30 +901,30 @@
>   * the gc list.
>   */
>  #define        THR_LIST_ADD(thrd) do {                                 \
> -       if (((thrd)->flags & THR_FLAGS_IN_TDLIST) == 0) {       \
> +       if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) == 0) {       \
>                 TAILQ_INSERT_HEAD(&_thread_list, thrd, tle);    \
>                 _thr_hash_add(thrd);                            \
> -               (thrd)->flags |= THR_FLAGS_IN_TDLIST;           \
> +               (thrd)->tlflags |= TLFLAGS_IN_TDLIST;           \
>         }                                                       \
>  } while (0)
>  #define        THR_LIST_REMOVE(thrd) do {                              \
> -       if (((thrd)->flags & THR_FLAGS_IN_TDLIST) != 0) {       \
> +       if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) != 0) {       \
>                 TAILQ_REMOVE(&_thread_list, thrd, tle);         \
>                 _thr_hash_remove(thrd);                         \
> -               (thrd)->flags &= ~THR_FLAGS_IN_TDLIST;          \
> +               (thrd)->tlflags &= ~TLFLAGS_IN_TDLIST;          \
>         }                                                       \
>  } while (0)
>  #define        THR_GCLIST_ADD(thrd) do {                               \
> -       if (((thrd)->flags & THR_FLAGS_IN_GCLIST) == 0) {       \
> +       if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) == 0) {       \
>                 TAILQ_INSERT_HEAD(&_thread_gc_list, thrd, gcle);\
> -               (thrd)->flags |= THR_FLAGS_IN_GCLIST;           \
> +               (thrd)->tlflags |= TLFLAGS_IN_GCLIST;           \
>                 _gc_count++;                                    \
>         }                                                       \
>  } while (0)
>  #define        THR_GCLIST_REMOVE(thrd) do {                            \
> -       if (((thrd)->flags & THR_FLAGS_IN_GCLIST) != 0) {       \
> +       if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) != 0) {       \
>                 TAILQ_REMOVE(&_thread_gc_list, thrd, gcle);     \
> -               (thrd)->flags &= ~THR_FLAGS_IN_GCLIST;          \
> +               (thrd)->tlflags &= ~TLFLAGS_IN_GCLIST;          \
>                 _gc_count--;                                    \
>         }                                                       \
>  } while (0)
> Index: thr_sig.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_sig.c,v
> retrieving revision 1.79
> diff -u -r1.79 thr_sig.c
> --- thr_sig.c   13 Jul 2004 22:52:11 -0000      1.79
> +++ thr_sig.c   28 Oct 2004 22:55:59 -0000
> @@ -1195,8 +1195,7 @@
>  thr_sigframe_save(struct pthread *thread, struct pthread_sigframe *psf)
>  {
>         /* This has to initialize all members of the sigframe. */
> -       psf->psf_flags =
> -           thread->flags & (THR_FLAGS_PRIVATE | THR_FLAGS_IN_TDLIST);
> +       psf->psf_flags = thread->flags & THR_FLAGS_PRIVATE;
>         psf->psf_interrupted = thread->interrupted;
>         psf->psf_timeout = thread->timeout;
>         psf->psf_state = thread->state;
> julian@julian:
>
> Julian Elischer wrote:
>
>> David, do you have revision numbers of what needs to be MFC'd?
>>
>>
>> David Xu wrote:
>>
>>
>>> Daniel Eischen wrote:
>>>
>>>
>>>>> FWIW, we are having (I think) the same problem on 5.3 with 
>>>>> libpthread. The
>>>>>
>>>>> panic there is in the mutex code about an assertion failing 
>>>>> because a thread
>>>>> is on a syncq when it is not supposed to be.
>>>>>  
>>>>
>>>>
>>>>
>>>> David and I recently fixed some races in pthread_join() and
>>>> pthread_exit() in -current libpthread.  Don't know if those
>>>> were responsible...
>>>>
>>>>
>>>>
>>>
>>> That fix should be MFCed ASAP.
>>>
>>>
>>>> Here's a test program that shows correct behavior with both
>>>> libc_r and libpthread in -current.
>>>>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?41817A08.9000706>