Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 12 Sep 2000 21:32:23 +0100 (BST)
From:      Doug Rabson <dfr@nlsystems.com>
To:        John Baldwin <jhb@pike.osd.bsdi.com>
Cc:        alpha@freebsd.org
Subject:   Re: Mutex's aren't recursing
Message-ID:  <Pine.BSF.4.21.0009122128540.86297-100000@salmon.nlsystems.com>
In-Reply-To: <200009120028.RAA71076@pike.osd.bsdi.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Mon, 11 Sep 2000, John Baldwin wrote:

> Hmm, well, after debugging some, it seems that the mutexes on the alpha are
> broken wrt recursion.  To see the code I'm using, grab the patchset
> http://www.FreeBSD.org/~jhb/patches/alpha.ithreads.patch.  The code in
> question are the mtx_enter and mtx_enter_hard functions in
> sys/alpha/include/mutex.h and sys/alpha/alpha/syncy_machdep.c, respectively.
> The problem seems to be that when we recurse on a lock, we aren't marking the
> lock as recursed, and we aren't incrementing the recursion count.  However,
> the code we are using to do this is almost exactly the same between i386
> and alpha (and it works on i386).  As a result, when the "inner" function
> releases Giant, it doesn't see that Giant is recursed, so it releases it
> directly.  When the "outer" function then releases Giant, it is already free,
> resulting in an assertion failure if INVARIANTS is on, or in a panic later
> on as reported by other people on here.
> 
> The following snippet of KTR output show's Giant being acquired twice but
> the recursion field (`r') staying at zero:
> 
> 590 0:019738556 cpu0 machine/mutex.h.511        REL Giant [0xfffffc0000666170] at ../../alpha/alpha/interrupt.c:123 r=0
> 589 0:019685941 cpu0 machine/mutex.h.472        GOT Giant [0xfffffc0000666170] at ../../alpha/alpha/interrupt.c:121 r=0
> 588 0:019681066 cpu0 ../../alpha/alpha/interrupt.c.115  clock interrupt
> 587 0:019665286 cpu0 machine/mutex.h.472        GOT Giant [0xfffffc0000666170] at ../../alpha/alpha/ipl_funcs.c:138 r=0
> 586 0:019611469 cpu0 machine/mutex.h.511        REL malloc [0xfffffc000065f650] at ../../kern/kern_malloc.c:283 r=0
> 585 0:019563381 cpu0 machine/mutex.h.472        GOT malloc [0xfffffc000065f650] at ../../kern/kern_malloc.c:157 r=0
> 
> I've stared at the mutex code in question but don't see where it is
> breaking.  Anyone else see anything that might be wrong?

I sent you some mail yesterday about this. I got the constraints wrong for
the inline assembler in atomic_cmpset. I disassembled some of the code in
interrupt.c which is trying to enter the mutex. You can clearly see that
it is misusing t1 as both an input and output to the inline.

0xfffffc00005542c4 <interrupt+900>:	lda	t1,8(zero)
0xfffffc00005542c8 <interrupt+904>:	ldq	t0,64(t7)
0xfffffc00005542cc <interrupt+908>:	ldq_l	t1,0(s1)
0xfffffc00005542d0 <interrupt+912>:	cmpeq	t1,t1,t2
0xfffffc00005542d4 <interrupt+916>:	beq	t2,0xfffffc00005542e4 <interrupt+932>
0xfffffc00005542d8 <interrupt+920>:	mov	t0,t1
0xfffffc00005542dc <interrupt+924>:	stq_c	t1,0(s1)

I'm just about to start testing this patch which should fix the problem
and also provides efficent forms for
atomic_{add,subtract,set,clear}_{32,64}. I also fixed the interrupt
problems with the spin locks (I think).

Index: atomic.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/include/atomic.h,v
retrieving revision 1.3
diff -u -r1.3 atomic.h
--- atomic.h	2000/09/06 11:20:53	1.3
+++ atomic.h	2000/09/12 20:19:05
@@ -44,15 +44,149 @@
 void atomic_add_16(volatile u_int16_t *, u_int16_t);
 void atomic_subtract_16(volatile u_int16_t *, u_int16_t);
 
-void atomic_set_32(volatile u_int32_t *, u_int32_t);
-void atomic_clear_32(volatile u_int32_t *, u_int32_t);
-void atomic_add_32(volatile u_int32_t *, u_int32_t);
-void atomic_subtract_32(volatile u_int32_t *, u_int32_t);
-
-void atomic_set_64(volatile u_int64_t *, u_int64_t);
-void atomic_clear_64(volatile u_int64_t *, u_int64_t);
-void atomic_add_64(volatile u_int64_t *, u_int64_t);
-void atomic_subtract_64(volatile u_int64_t *, u_int64_t);
+static __inline void atomic_set_32(volatile u_int32_t *p, u_int32_t v)
+{
+	u_int32_t temp;
+
+	__asm __volatile (
+		"1:\tldl_l %0, %2\n\t"		/* load old value */
+		"bis %0, %3, %0\n\t"		/* calculate new value */
+		"stl_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_clear_32(volatile u_int32_t *p, u_int32_t v)
+{
+	u_int32_t temp;
+
+	__asm __volatile (
+		"1:\tldl_l %0, %2\n\t"		/* load old value */
+		"bic %0, %3, %0\n\t"		/* calculate new value */
+		"stl_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t v)
+{
+	u_int32_t temp;
+
+	__asm __volatile (
+		"1:\tldl_l %0, %2\n\t"		/* load old value */
+		"addl %0, %3, %0\n\t"		/* calculate new value */
+		"stl_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t v)
+{
+	u_int32_t temp;
+
+	__asm __volatile (
+		"1:\tldl_l %0, %2\n\t"		/* load old value */
+		"subl %0, %3, %0\n\t"		/* calculate new value */
+		"stl_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_set_64(volatile u_int64_t *p, u_int64_t v)
+{
+	u_int64_t temp;
+
+	__asm __volatile (
+		"1:\tldq_l %0, %2\n\t"		/* load old value */
+		"bis %0, %3, %0\n\t"		/* calculate new value */
+		"stq_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_clear_64(volatile u_int64_t *p, u_int64_t v)
+{
+	u_int64_t temp;
+
+	__asm __volatile (
+		"1:\tldq_l %0, %2\n\t"		/* load old value */
+		"bic %0, %3, %0\n\t"		/* calculate new value */
+		"stq_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t v)
+{
+	u_int64_t temp;
+
+	__asm __volatile (
+		"1:\tldq_l %0, %2\n\t"		/* load old value */
+		"addq %0, %3, %0\n\t"		/* calculate new value */
+		"stq_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
+
+static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t v)
+{
+	u_int64_t temp;
+
+	__asm __volatile (
+		"1:\tldq_l %0, %2\n\t"		/* load old value */
+		"subq %0, %3, %0\n\t"		/* calculate new value */
+		"stq_c %0, %1\n\t"		/* attempt to store */
+		"beq %0, 2f\n\t"		/* spin if failed */
+		"mb\n\t"			/* drain to memory */
+		".section .text3,\"ax\"\n"	/* improve branch prediction */
+		"2:\tbr 1b\n"			/* try again */
+		".previous\n"
+		: "=&r" (temp), "=m" (*p)
+		: "m" (*p), "r" (v)
+		: "memory");
+}
 
 #define atomic_set_char		atomic_set_8
 #define atomic_clear_char	atomic_clear_8
@@ -95,7 +229,7 @@
 		".section .text3,\"ax\"\n"	/* improve branch prediction */
 		"3:\tbr 1b\n"			/* try again */
 		".previous\n"
-		: "=&r" (ret), "=r" (temp), "=m" (*p)
+		: "=&r" (ret), "=&r" (temp), "=m" (*p)
 		: "r" (cmpval), "r" (newval), "m" (*p)
 		: "memory");
 
@@ -123,7 +257,7 @@
 		".section .text3,\"ax\"\n"	/* improve branch prediction */
 		"3:\tbr 1b\n"			/* try again */
 		".previous\n"
-		: "=&r" (ret), "=r" (temp), "=m" (*p)
+		: "=&r" (ret), "=&r" (temp), "=m" (*p)
 		: "r" (cmpval), "r" (newval), "m" (*p)
 		: "memory");
 
Index: mutex.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/include/mutex.h,v
retrieving revision 1.5
diff -u -r1.5 mutex.h
--- mutex.h	2000/09/09 23:18:47	1.5
+++ mutex.h	2000/09/12 09:13:25
@@ -223,9 +223,9 @@
 extern char STR_IEN[];
 extern char STR_IDIS[];
 #endif	/* MTX_STRS */
-#define	ASS_IEN		MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)	\
+#define	ASS_IEN		MPASS2((alpha_pal_rdps() & ALPHA_PSL_IPL_MASK)	\
 			       == ALPHA_PSL_IPL_HIGH, STR_IEN)
-#define	ASS_IDIS	MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)	\
+#define	ASS_IDIS	MPASS2((alpha_pal_rdps() & ALPHA_PSL_IPL_MASK)	\
 			       != ALPHA_PSL_IPL_HIGH, STR_IDIS)
 #endif	/* INVARIANTS */
 
@@ -326,7 +326,7 @@
  */
 
 #define	_getlock_spin_block(mp, tid, type) do {				\
-	u_int _ipl = alpha_pal_rdps() & ALPHA_PSL_IPL_MASK;		\
+	u_int _ipl = alpha_pal_swpipl(ALPHA_PSL_IPL_HIGH);		\
 	if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
 		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _ipl);	\
 	else {								\
@@ -544,8 +544,8 @@
  * Simple assembly macros to get and release non-recursive spin locks
  */
 #define MTX_ENTER(lck)				\
-	call_pal PAL_OSF1_rdps;			\
-	and	v0, ALPHA_PSL_IPL_MASK, v0;	\
+	ldiq	a0, ALPHA_PSL_IPL_HIGH;		\
+	call_pal PAL_OSF1_swpipl;		\
 1:	ldq_l	a0, lck+MTX_LOCK;		\
 	cmpeq	a0, MTX_UNOWNED, a1;		\
 	beq	a1, 1b;				\
@@ -553,9 +553,7 @@
 	stq_c	a0, lck+MTX_LOCK;		\
 	beq	a0, 1b;				\
 	mb;					\
-	stl	v0, lck+MTX_SAVEIPL;		\
-	ldq	a0, ALPHA_PSL_IPL_HIGH;		\
-	call_pal PSL_OSF1_swpipl
+	stl	v0, lck+MTX_SAVEIPL
 
 #define MTX_EXIT(lck)				\
 	mb;					\


-- 
Doug Rabson				Mail:  dfr@nlsystems.com
Nonlinear Systems Ltd.			Phone: +44 20 8348 3944




To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-alpha" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.4.21.0009122128540.86297-100000>