Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 14 Mar 2014 18:31:08 +0000
From:      "Meyer, Conrad" <conrad.meyer@isilon.com>
To:        Bryan Drewery <bdrewery@FreeBSD.org>
Cc:        "freebsd-hackers@freebsd.org" <freebsd-hackers@freebsd.org>, "Meyer, Conrad" <conrad.meyer@isilon.com>
Subject:   [PATCH] amd64/pcpu.h: Use Clang builtins for clarity when referencing thread's pcpu
Message-ID:  <1394821826-19412-1-git-send-email-conrad.meyer@isilon.com>

next in thread | raw e-mail | index | archive | help
We can efficiently reference thread-local pcpu members via the %gs
register with Clang-annotated C code, in place of inline GNU assembly.

Motivations:
  - Use C in leiu of inline assembly for clarity
  - Clang's static analyser may be better able to understand PCPU_*
    macros using the C constructs rather than inline assembly
    (unverified)

Sponsored by: EMC/Isilon storage division
Signed-off-by: Conrad Meyer <conrad.meyer@isilon.com>
Reviewed-by: Max Laier <mlaier@FreeBSD.org>
---
This is more of a "what do you think?" than a pull request. It seems like u=
sing
annotated C instead of asm is nice (in particular, Clang detects casts from
pointers typed with one segment to another, or unsegmented type). On the ot=
her
hand, this is code that doesn't change frequently, and we may still need to
support GCC for some time. So adding a second, parallel implementation just
doubles room for bugs.

Open questions:
  - How long is GCC intended to be supported as a compiler?
  - How atomic does PCPU_INC() need to be? It looks like it updates cpu-loc=
al
    counters; as long as it's a single asm instruction, should it be fine
    w.r.t. interrupts? The existing implementation does NOT use the 'lock; =
' prefix.

See the following simple example:

$ cat gstest.c
    #include <sys/cdefs.h>
    #include <stdint.h>

    #define GS_RELATIVE __attribute__((address_space(256)))

    struct pcpu {
            void *curthread;
            struct pcpu *self;
    };

    struct pcpu *
    __curpcpu(void)
    {
            volatile struct pcpu * GS_RELATIVE *res =3D
                (volatile struct pcpu * GS_RELATIVE *)
                    __offsetof(struct pcpu, self);
            return (*res);
    }
$ clang -Wall -Wextra -O1 -c gstest.c
$ objdump -d gstest.o

    gstest.o:     file format elf64-x86-64

    Disassembly of section .text:

    0000000000000000 <__curpcpu>:
       0:   65 48 8b 04 25 08 00    mov    %gs:0x8,%rax
       7:   00 00
       9:   c3                      retq

Support has been present since at least April 9, 2009 (when
documentation of the feature was first added to
LanguageExtensions.html). So all Clang versions in BSD core (BSD9,
BSD10) should support the feature.
---
 sys/amd64/include/pcpu.h | 98 +++++++++++++++++++++++++++++++++++++++++---=
----
 1 file changed, 84 insertions(+), 14 deletions(-)

diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index fe898e9..68892fc 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -81,7 +81,7 @@ extern struct pcpu *pcpup;
 #define	PCPU_PTR(member)	(&pcpup->pc_ ## member)
 #define	PCPU_SET(member, val)	(pcpup->pc_ ## member =3D (val))
=20
-#elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
+#elif defined(__clang__) || (defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE=
___TYPEOF))
=20
 /*
  * Evaluates to the byte offset of the per-cpu variable name.
@@ -95,6 +95,80 @@ extern struct pcpu *pcpup;
 #define	__pcpu_type(name)						\
 	__typeof(((struct pcpu *)0)->name)
=20
+#if defined(__clang__)
+
+#define __GS_RELATIVE __attribute__((address_space(256)))
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define	__PCPU_PTR(name) __extension__ ({					\
+	volatile __pcpu_type(name) __GS_RELATIVE *__p;				\
+										\
+	__p =3D (volatile __pcpu_type(name) __GS_RELATIVE *)__pcpu_offset(name);	=
\
+	__p;									\
+})
+
+#define	__PCPU_PTRX(name) __extension__ ({				\
+	volatile __pcpu_type(pc_prvspace) __GS_RELATIVE *__p;		\
+	__pcpu_type(name) *__mp;					\
+									\
+	__p =3D __PCPU_PTR(pc_prvspace);					\
+	__mp =3D &(*__p)->name;						\
+	__mp;								\
+})
+#define PCPU_PTR(member)	__PCPU_PTRX(pc_ ## member)
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define	__PCPU_GET(name) __extension__ ({				\
+	*__PCPU_PTR(name);						\
+})
+
+/*
+ * Adds the value to the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_ADD(name, val) do {					\
+	__pcpu_type(name) __val;					\
+	volatile __pcpu_type(name) __GS_RELATIVE *__ptr;		\
+									\
+	__val =3D (val);							\
+	__ptr =3D __PCPU_PTR(name);					\
+	*__ptr +=3D __val;						\
+} while (0)
+
+/*
+ * Increments the value of the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_INC(name) __PCPU_ADD(name, 1)
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define	__PCPU_SET(name, val) do {					\
+	__pcpu_type(name) __val;					\
+	volatile __pcpu_type(name) __GS_RELATIVE *__ptr;		\
+									\
+	__val =3D (val);							\
+	__ptr =3D __PCPU_PTR(name);					\
+	*__ptr =3D __val;							\
+} while (0)
+
+#define	curthread __extension__ ({					\
+	*((volatile __pcpu_type(pc_curthread) __GS_RELATIVE *)		\
+		__pcpu_offset(pc_curthread));				\
+})
+
+#define	curpcb __extension__ ({						\
+	*((volatile __pcpu_type(pc_curpcb) __GS_RELATIVE *)		\
+		__pcpu_offset(pc_curpcb));				\
+})
+
+#else /* !__clang__ */
+
 /*
  * Evaluates to the address of the per-cpu variable name.
  */
@@ -200,17 +274,7 @@ extern struct pcpu *pcpup;
 	}								\
 }
=20
-#define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
-#define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
-#define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
-#define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
-#define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
-
 #define	OFFSETOF_CURTHREAD	0
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wnull-dereference"
-#endif
 static __inline __pure2 struct thread *
 __curthread(void)
 {
@@ -220,9 +284,6 @@ __curthread(void)
 	    : "m" (*(char *)OFFSETOF_CURTHREAD));
 	return (td);
 }
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
 #define	curthread		(__curthread())
=20
 #define	OFFSETOF_CURPCB		32
@@ -236,6 +297,15 @@ __curpcb(void)
 }
 #define	curpcb		(__curpcb())
=20
+#define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
+
+#endif /* __clang__ */
+
+#define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
+#define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
+#define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
+#define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
+
 #define	IS_BSP()	(PCPU_GET(cpuid) =3D=3D 0)
=20
 #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) =
*/
--=20
1.8.5.3



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1394821826-19412-1-git-send-email-conrad.meyer>