Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 2 Apr 2013 18:17:17 +0400
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Luigi Rizzo <rizzo@iet.unipi.it>, kib@FreeBSD.org
Cc:        arch@FreeBSD.org
Subject:   Re: [CFR][CFT] counter(9): new API for faster and raceless counters
Message-ID:  <20130402141717.GK76816@glebius.int.ru>
In-Reply-To: <20130402013758.GA96904@onelab2.iet.unipi.it>
References:  <20130401115128.GZ76816@FreeBSD.org> <20130402013758.GA96904@onelab2.iet.unipi.it>

next in thread | previous in thread | raw e-mail | index | archive | help

--YyxzkC/DtE3JUx8+
Content-Type: text/plain; charset=koi8-r
Content-Disposition: inline

  Luigi,

On Tue, Apr 02, 2013 at 03:37:58AM +0200, Luigi Rizzo wrote:
L> API:
L> 
L> > o MI implementation of counter_u64_add() is:
L> > 
L> >      critical_enter();
L> >      *(uint64_t *)zpcpu_get(c) += inc;
L> >      critical_exit();
L> 
L> - there are several places which use multiple counters
L>   (e.g. packet and byte counters, global and per flow/socket),
L>   so i wonder if it may help to provide a "protected" version of
L>   counter_u64_add() that requires the critical_enter/exit
L>   only once. Something like
L> 
L> 	PROTECT_COUNTERS(
L> 		safe_counter_u64_add(c, x);
L> 		safe_counter_u64_add(c, x);
L> 		safe_counter_u64_add(c, x);
L> 	);
L> 
L>   where PROTECT_COUNTERS() would translate into the critical_enter/exit
L>   where required, and nothing on other architectures.

Here is patch for review. It adds 4 more primitives:

counter_enter();
counter_u64_add_prot(c, x);
counter_u64_subtract_prot(c, x);
counter_exit();

L> BENCHMARK:
L> 
L> > I've got a simple benchmark. A syscall that solely updates a counter is
L> > implemented. Number of processes is spawned, equal to number of cores,
L> > each process binds itself to a dedicated CPU and calls the syscall 10^6
L> > times and exits. Parent wait(2)s for them all and I measure real time of
L> 
L> - I am under the impression that these benchmarks are dominated
L>   by the syscall time, and the new counters would exhibit a lot
L>   better relative performance (compared to racy or atomic)
L>   by doing 10..100 counter ops per syscall. Any chance to retry
L>   the test in this configuration ?

Ok, as you wish.

Apparently compiler optimises things like:

	for (int i = 0; i < rounds; i++)
		the_counter += v;

To avoid optimisations here I declared the_counter as volatile. Is the
benchmark fair after that? Anyway, here are results for (rounds == 2000000000):

x counter_u64_add(), result == 2000000000 * ncpus
+ racy increment, result == 2022232241 (!!!)
+------------------------------------------------------------------------------+
|  x                                                                   +       |
|  x                                                                   +       |
|  x                                                                   ++      |
|  x                                                                   ++      |
|  x        x                                                         +++     +|
||_MA__|                                                             |_MA_|    |
+------------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10             5          5.44          5.01         5.053    0.13605963
+  10          8.16          8.53           8.2         8.235    0.10721215
Difference at 95.0% confidence
        3.182 +/- 0.115089
        62.9725% +/- 2.27764%
        (Student's t, pooled s = 0.122488)

So 63% speedup, not speaking on the fact that in such a tight loop 98% of
parallel updates are lost on racy counter :)

A tight loop with atomic_add() is 22 times (twenty two times) slower than
new counter. I didn't bother to run ministat :)

-- 
Totus tuus, Glebius.

--YyxzkC/DtE3JUx8+
Content-Type: text/x-diff; charset=koi8-r
Content-Disposition: attachment; filename="counter_API_extend.diff"

Index: sys/sparc64/include/counter.h
===================================================================
--- sys/sparc64/include/counter.h	(revision 249002)
+++ sys/sparc64/include/counter.h	(working copy)
@@ -31,22 +31,33 @@
 
 #include <sys/pcpu.h>
 
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
+
+#define	counter_u64_add_prot(c, inc)		do {	\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	*(uint64_t *)zpcpu_get(c) -= (dec);		\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/ia64/include/counter.h
===================================================================
--- sys/ia64/include/counter.h	(revision 249002)
+++ sys/ia64/include/counter.h	(working copy)
@@ -31,22 +31,33 @@
 
 #include <sys/pcpu.h>
 
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
+
+#define	counter_u64_add_prot(c, inc)		do {	\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	*(uint64_t *)zpcpu_get(c) -= (dec);		\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/i386/include/counter.h
===================================================================
--- sys/i386/include/counter.h	(revision 249002)
+++ sys/i386/include/counter.h	(working copy)
@@ -33,6 +33,16 @@
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
+#define	counter_enter()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_enter();			\
+} while (0)
+
+#define	counter_exit()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_exit();			\
+} while (0)
+
 static inline void
 counter_64_inc_8b(uint64_t *p, uint64_t inc)
 {
@@ -52,24 +62,36 @@ counter_64_inc_8b(uint64_t *p, uint64_t inc)
 	: "memory", "cc", "eax", "edx", "ebx", "ecx");
 }
 
+#define	counter_u64_add_prot(c, inc)		do {	\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		*(uint64_t *)zpcpu_get(c) += (inc);	\
+	else						\
+		counter_64_inc_8b((c), (inc));		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		*(uint64_t *)zpcpu_get(c) -= (dec);	\
+	else						\
+		counter_64_inc_8b((c), -(int64_t)(dec));\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	if ((cpu_feature & CPUID_CX8) == 0) {
-		critical_enter();
-		*(uint64_t *)zpcpu_get(c) += inc;
-		critical_exit();
-	} else {
-		counter_64_inc_8b(c, inc);
-	}
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	counter_u64_add(c, -(int64_t)dec);
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/amd64/include/counter.h
===================================================================
--- sys/amd64/include/counter.h	(revision 249002)
+++ sys/amd64/include/counter.h	(working copy)
@@ -33,6 +33,12 @@
 
 extern struct pcpu __pcpu[1];
 
+#define	counter_enter()	do {} while (0)
+#define	counter_exit()	do {} while (0)
+
+#define	counter_u64_add_prot(c, i)	counter_u64_add(c, i)
+#define	counter_u64_subtract_prot(c, i)	counter_u64_subtract(c, i)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
Index: sys/arm/include/counter.h
===================================================================
--- sys/arm/include/counter.h	(revision 249002)
+++ sys/arm/include/counter.h	(working copy)
@@ -31,22 +31,33 @@
 
 #include <sys/pcpu.h>
 
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
+
+#define	counter_u64_add_prot(c, inc)		do {	\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	*(uint64_t *)zpcpu_get(c) -= (dec);		\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/powerpc/include/counter.h
===================================================================
--- sys/powerpc/include/counter.h	(revision 249002)
+++ sys/powerpc/include/counter.h	(working copy)
@@ -33,6 +33,12 @@
 
 #if defined(AIM) && defined(__powerpc64__)
 
+#define	counter_enter()	do {} while (0)
+#define	counter_exit()	do {} while (0)
+
+#define	counter_u64_add_prot(c, i)	counter_u64_add(c, i)
+#define	counter_u64_subtract_prot(c, i)	counter_u64_subtract(c, i)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
@@ -59,24 +65,33 @@ counter_u64_subtract(counter_u64_t c, uint64_t dec
 
 #else	/* !AIM || !64bit */
 
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
+
+#define	counter_u64_add_prot(c, inc)		do {	\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	*(uint64_t *)zpcpu_get(c) -= (dec);		\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
-#endif	/* AIM 64bit */
-
 #endif	/* ! __MACHINE_COUNTER_H__ */
Index: sys/mips/include/counter.h
===================================================================
--- sys/mips/include/counter.h	(revision 249002)
+++ sys/mips/include/counter.h	(working copy)
@@ -31,22 +31,33 @@
 
 #include <sys/pcpu.h>
 
+#define	counter_enter()	critical_enter()
+#define	counter_exit()	critical_exit()
+
+#define	counter_u64_add_prot(c, inc)		do {	\
+	*(uint64_t *)zpcpu_get(c) += (inc);		\
+} while (0)
+
+#define	counter_u64_subtract_prot(c, dec)	do {	\
+	*(uint64_t *)zpcpu_get(c) -= (dec);		\
+} while (0)
+
 static inline void
 counter_u64_add(counter_u64_t c, uint64_t inc)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) += inc;
-	critical_exit();
+	counter_enter();
+	counter_u64_add_prot(c, inc);
+	counter_exit();
 }
 
 static inline void
 counter_u64_subtract(counter_u64_t c, uint64_t dec)
 {
 
-	critical_enter();
-	*(uint64_t *)zpcpu_get(c) -= dec;
-	critical_exit();
+	counter_enter();
+	counter_u64_subtract_prot(c, dec);
+	counter_exit();
 }
 
 #endif	/* ! __MACHINE_COUNTER_H__ */

--YyxzkC/DtE3JUx8+--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130402141717.GK76816>