Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 2 Apr 2013 19:40:17 +0400
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Luigi Rizzo <rizzo@iet.unipi.it>
Cc:        arch@FreeBSD.org, kib@FreeBSD.org
Subject:   Re: [CFR][CFT] counter(9): new API for faster and raceless counters
Message-ID:  <20130402154017.GN76816@FreeBSD.org>
In-Reply-To: <20130402145722.GA9161@onelab2.iet.unipi.it>
References:  <20130401115128.GZ76816@FreeBSD.org> <20130402013758.GA96904@onelab2.iet.unipi.it> <20130402141717.GK76816@glebius.int.ru> <20130402145722.GA9161@onelab2.iet.unipi.it>

next in thread | previous in thread | raw e-mail | index | archive | help

--z87VqPJ/HsYrR2WM
Content-Type: text/plain; charset=koi8-r
Content-Disposition: inline

  Luigi,

On Tue, Apr 02, 2013 at 04:57:22PM +0200, Luigi Rizzo wrote:
L> > Here is patch for review. It adds 4 more primitives:
L> > 
L> > counter_enter();
L> > counter_u64_add_prot(c, x);
L> > counter_u64_subtract_prot(c, x);
L> > counter_exit();
L> 
L> thanks for the patch. I have three more comments:
L> 
L> - is it really necessary to have the "subtract" version ?
L>   Couldn't one just make "x" an int64_t ? or it gives
L>   too many warnings at runtime maybe ?

Agreed. See patch.

L> - (this can be fixed later) in the i386 version, counter_enter()
L>   and counter_exit() have an if statement which may become quite
L>   expensive if mispredicted. Also, the test is repeated 3 times in
L>   counter_u64_add() (enter/add/exit). Hopefully the compiler
L>   optimizes out the extra calls, but the previous version seemed
L>   more readable. Anyways, at some point we should figure out
L>   whether putting likely/unlikely annotations on the result of
L>   (cpu_feature & CPUID_CX8) may improve performance where it matters.

Agreed. See patch.

L> - do you plan to provide an API to initialize a counter to 0 or a
L>   specific value ? I suppose this is done implicitly on allocation,
L>   but there are cases (e.g. ipfw) where the application explicitly
L>   zeroes counters.

There already is counter_u64_zero().

L> > So 63% speedup, not speaking on the fact that in such a tight loop 98% of
L> > parallel updates are lost on racy counter :)
L> > 
L> > A tight loop with atomic_add() is 22 times (twenty two times) slower than
L> > new counter. I didn't bother to run ministat :)
L> 
L> yes i think this really makes justice of the improvements of the new code
L> (i am a bit unclear on what actual test you ran / how many counter_u64_add()
L> per syscall you have, but i do expect the racy counters to be much slower
L> and much less reliable, and the 20x slowdown with atomics is completely
L> expected.)

The test made 2 * 10^6 iterations of updating a counter in a for loop.

-- 
Totus tuus, Glebius.

--z87VqPJ/HsYrR2WM
Content-Type: text/x-diff; charset=koi8-r
Content-Disposition: attachment; filename="counter_API_extend.diff"

Index: sys/sparc64/include/counter.h
===================================================================
--- sys/sparc64/include/counter.h	(revision 249002)
+++ sys/sparc64/include/counter.h	(working copy)
@@ -31,22 +31,21 @@
 
 #include <sys/pcpu.h>
 
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
-}
+#define	counter_u64_add_prot(c, inc)		do {	\
+	CRITICAL_ASSERT(td);				\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
 
 static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/ia64/include/counter.h
===================================================================
--- sys/ia64/include/counter.h	(revision 249002)
+++ sys/ia64/include/counter.h	(working copy)
@@ -31,22 +31,21 @@
 
 #include <sys/pcpu.h>
 
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
-}
+#define	counter_u64_add_prot(c, inc)		do {	\
+	CRITICAL_ASSERT(td);				\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
 
 static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c	(revision 249002)
+++ sys/netinet/ip_input.c	(working copy)
@@ -293,7 +293,7 @@ void
 kmod_ipstat_dec(int statnum)
 {
 
-	counter_u64_subtract((counter_u64_t )&V_ipstatp + statnum, 1);
+	counter_u64_add((counter_u64_t )&V_ipstatp + statnum, -1);
 }
 
 static int
Index: sys/netinet/ip_var.h
===================================================================
--- sys/netinet/ip_var.h	(revision 249002)
+++ sys/netinet/ip_var.h	(working copy)
@@ -175,7 +175,7 @@ VNET_DECLARE(struct ipstat_p, ipstatp);
 #define	IPSTAT_ADD(name, val)	counter_u64_add(V_ipstatp.name, (val))
 #define	IPSTAT_SUB(name, val)	counter_u64_subtract(V_ipstatp.name, (val))
 #define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
-#define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
+#define	IPSTAT_DEC(name)	IPSTAT_ADD(name, -1)
 
 /*
  * Kernel module consumers must use this accessor macro.
Index: sys/i386/include/counter.h
===================================================================
--- sys/i386/include/counter.h	(revision 249002)
+++ sys/i386/include/counter.h	(working copy)
@@ -33,8 +33,18 @@
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
+#define	counter_enter()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_enter();			\
+} while (0)
+
+#define	counter_exit()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_exit();			\
+} while (0)
+
 static inline void
-counter_64_inc_8b(uint64_t *p, uint64_t inc)
+counter_64_inc_8b(uint64_t *p, int64_t inc)
 {
 
 	__asm __volatile(
@@ -52,8 +62,16 @@ static inline void
 	: "memory", "cc", "eax", "edx", "ebx", "ecx");
 }
 
+#define	counter_u64_add_prot(c, inc)		do {	\
+	if ((cpu_feature & CPUID_CX8) == 0) {		\
+		CRITICAL_ASSERT(td);			\
+		*(uint64_t *)zpcpu_get(c) += (inc);	\
+	} else						\
+		counter_64_inc_8b((c), (inc));		\
+} while (0)
+
 static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
 	if ((cpu_feature & CPUID_CX8) == 0) {
@@ -65,11 +83,4 @@ static inline void
 	}
 }
 
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
-	counter_u64_add(c, -(int64_t)dec);
-}
-
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/amd64/include/counter.h
===================================================================
--- sys/amd64/include/counter.h	(revision 249002)
+++ sys/amd64/include/counter.h	(working copy)
@@ -33,8 +33,13 @@
 
 extern struct pcpu __pcpu[1];
 
+#define	counter_enter()	do {} while (0)
+#define	counter_exit()	do {} while (0)
+
+#define	counter_u64_add_prot(c, i)	counter_u64_add(c, i)
+
 static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
 	__asm __volatile("addq\t%1,%%gs:(%0)"
@@ -43,14 +48,4 @@ static inline void
 	    : "memory", "cc");
 }
 
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
-	__asm __volatile("subq\t%1,%%gs:(%0)"
-	    :
-	    : "r" ((char *)c - (char *)&__pcpu[0]), "r" (dec)
-	    : "memory", "cc");
-}
-
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/arm/include/counter.h
===================================================================
--- sys/arm/include/counter.h	(revision 249002)
+++ sys/arm/include/counter.h	(working copy)
@@ -31,22 +31,21 @@
 
 #include <sys/pcpu.h>
 
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
-}
+#define	counter_u64_add_prot(c, inc)		do {	\
+	CRITICAL_ASSERT(td);				\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
 
 static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/powerpc/include/counter.h
===================================================================
--- sys/powerpc/include/counter.h	(revision 249002)
+++ sys/powerpc/include/counter.h	(working copy)
@@ -33,8 +33,13 @@
 
 #if defined(AIM) && defined(__powerpc64__)
 
+#define	counter_enter()	do {} while (0)
+#define	counter_exit()	do {} while (0)
+
+#define	counter_u64_add_prot(c, i)	counter_u64_add(c, i)
+
 static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 	uint64_t ccpu, old;
 
@@ -50,33 +55,23 @@ static inline void
 	    : "cc", "memory");
 }
 
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
-	counter_u64_add(c, -dec);
-}
-
 #else	/* !AIM || !64bit */
 
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
-}
+#define	counter_u64_add_prot(c, inc)		do {	\
+	CRITICAL_ASSERT(td);				\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
 
 static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
-#endif	/* AIM 64bit */
-
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/mips/include/counter.h
===================================================================
--- sys/mips/include/counter.h	(revision 249002)
+++ sys/mips/include/counter.h	(working copy)
@@ -31,22 +31,21 @@
 
 #include <sys/pcpu.h>
 
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
-}
+#define	counter_u64_add_prot(c, inc)		do {	\
+	CRITICAL_ASSERT(td);				\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
 
 static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */

--z87VqPJ/HsYrR2WM--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130402154017.GN76816>