Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 24 Jun 2001 10:02:50 -0700 (PDT)
From:      Matt Dillon <dillon@earth.backplane.com>
To:        Bruce Evans <bde@zeta.org.au>
Cc:        Mikhail Teterin <mi@aldan.algebra.com>, jlemon@FreeBSD.ORG, cvs-committers@FreeBSD.ORG, cvs-all@FreeBSD.ORG
Subject:   kernel size w/ optimized bzero() & patch set (was Re: Inline optimized bzero (was Re: cvs commit: src/sys/netinettcp_subr.c))
Message-ID:  <200106241702.f5OH2oN78720@earth.backplane.com>
References:   <Pine.BSF.4.21.0106241725360.54646-100000@besplex.bde.org>

next in thread | previous in thread | raw e-mail | index | archive | help
    Ok, how about this.  I replaced bzero() with the inline and placed it
    in the machine-dependant section of code.  I managed to knock the inline
    code generation down to the point where it does not bloat the resulting
    kernel binary.  As an example of this, the 'register int z = 0' caused
    all the assignments to 0 to use 'movl %eax,...' (3 byte instruction)
    instead of 'movl $0,...' (7 byte instruction).  The kernel size is
    around 6000 bytes larger without that optimization.  Sometimes GCC's
    optimizer gets in the way :-(

    I am amazed by the results... and I found a couple of interesting things
    out too.  For example, tcp_input bzero's a number of 8 and 12 byte
    structures, not just the 20 byte structures we were looking at previously.

    I don't test for address alignment (it can't be done in the inline and
    still have good code), but i586_bzero() doesn't check for address
    alignment either so it is no worse then before.

						-Matt

    Normal bzero:

apollo:/usr/src/sys/compile/MOBILE# size kernel
   text    data     bss     dec     hex filename
1850705  159392  144536 2154633  20e089 kernel

    Inline bzero:

apollo:/usr/src/sys/compile/MOBILE# size kernel
   text    data     bss     dec     hex filename
1850833  159396  144536 2154765  20e10d kernel

    Patch set (relative to -stable) (UNTESTED):



Index: i386/i386/identcpu.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/identcpu.c,v
retrieving revision 1.80.2.4
diff -u -r1.80.2.4 identcpu.c
--- i386/i386/identcpu.c	2000/09/30 03:32:21	1.80.2.4
+++ i386/i386/identcpu.c	2001/06/24 16:16:42
@@ -504,7 +504,7 @@
 #if defined(I486_CPU)
 	case CPUCLASS_486:
 		printf("486");
-		bzero = i486_bzero;
+		md_bzero = i486_bzero;
 		break;
 #endif
 #if defined(I586_CPU)
Index: i386/i386/support.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/support.s,v
retrieving revision 1.67.2.3
diff -u -r1.67.2.3 support.s
--- i386/i386/support.s	2000/09/30 02:49:33	1.67.2.3
+++ i386/i386/support.s	2001/06/24 15:57:51
@@ -49,6 +49,8 @@
 _bcopy_vector:
 	.long	_generic_bcopy
 	.globl	_bzero
+	.globl	_md_bzero
+_md_bzero:
 _bzero:
 	.long	_generic_bzero
 	.globl	_copyin_vector
Index: i386/include/asnames.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/Attic/asnames.h,v
retrieving revision 1.44.2.1
diff -u -r1.44.2.1 asnames.h
--- i386/include/asnames.h	2000/05/16 06:58:10	1.44.2.1
+++ i386/include/asnames.h	2001/06/24 15:59:07
@@ -244,6 +244,7 @@
 #define _lapic				lapic
 #define _linux_sigcode			linux_sigcode
 #define _linux_szsigcode		linux_szsigcode
+#define _md_bzero			md_bzero
 #define _mi_startup			mi_startup
 #define _microuptime			microuptime
 #define _mp_gdtbase			mp_gdtbase
Index: i386/include/cpufunc.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/cpufunc.h,v
retrieving revision 1.96.2.1
diff -u -r1.96.2.1 cpufunc.h
--- i386/include/cpufunc.h	2001/05/16 20:51:38	1.96.2.1
+++ i386/include/cpufunc.h	2001/06/24 16:55:05
@@ -53,6 +53,83 @@
 
 #ifdef	__GNUC__
 
+/*
+ * bzero() inline.  IA32 specific.
+ *
+ *	This function assumes that unaligned accesses are allowed.  If the
+ *	length is a constant we attempt to optimize small bzeros, generating
+ *	only a few instructions.  We also optimize medium sized bzeros with
+ *	a simple loop (where call overhead would otherwise be inefficient).
+ *	Anything else goes through the assembly-optimized bzero function.
+ *
+ *	Do not mess around with this function without also checking the
+ *	resulting assembly.
+ */
+
+void	(*md_bzero)	__P((void *buf, size_t len));
+
+#if 0
+
+static __inline void
+bzero(void *buf, size_t len)
+{
+    md_bzero(buf, len);
+}
+
+#else
+
+static __inline void
+bzero(void *buf, size_t len)
+{
+    if (__builtin_constant_p(len)) {
+	register int z = 0;	/* this+switch results in %eax instead of $0 */
+
+	switch(len) {
+	case 16 * sizeof(int):
+	case 15 * sizeof(int):
+	case 14 * sizeof(int):
+	case 13 * sizeof(int):
+	case 12 * sizeof(int):
+	case 11 * sizeof(int):
+	case 10 * sizeof(int):
+	case 9 * sizeof(int):
+	case 8 * sizeof(int):
+	    do {
+		len -= sizeof(int);
+		*(int *)((char *)buf + len) = z;
+	    } while(len);
+	    break;
+	case 7 * sizeof(int):
+	    *((int *)buf + 6) = z;
+	case 6 * sizeof(int):
+	    *((int *)buf + 5) = z;
+	case 5 * sizeof(int):
+	    *((int *)buf + 4) = z;
+	case 4 * sizeof(int):
+	    *((int *)buf + 3) = z;
+	case 3 * sizeof(int):
+	    *((int *)buf + 2) = z;
+	case 2 * sizeof(int):
+	    *((int *)buf + 1) = z;
+	case 1 * sizeof(int):
+	    *((int *)buf + 0) = z;
+	case 0 * sizeof(int):
+	    break;
+	default:
+	    /*
+	     * Warning! gcc inline no longer considers 'len' a constant in
+	     * the default case of this switch.
+	     */
+	    md_bzero(buf, len);
+	    break;
+	}
+    } else {
+	md_bzero(buf, len);
+    }
+}
+
+#endif
+
 #ifdef SMP
 #include <machine/lock.h>		/* XXX */
 #endif
@@ -508,6 +585,7 @@
 int	breakpoint	__P((void));
 u_int	bsfl		__P((u_int mask));
 u_int	bsrl		__P((u_int mask));
+void	(*bzero)	__P((void *buf, size_t len));
 void	disable_intr	__P((void));
 void	enable_intr	__P((void));
 u_char	inb		__P((u_int port));
Index: i386/isa/npx.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/npx.c,v
retrieving revision 1.80
diff -u -r1.80 npx.c
--- i386/isa/npx.c	2000/01/29 16:17:36	1.80
+++ i386/isa/npx.c	2001/06/24 16:14:11
@@ -456,7 +456,7 @@
 			ovbcopy_vector = i586_bcopy;
 		}
 		if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO))
-			bzero = i586_bzero;
+			md_bzero = i586_bzero;
 		if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) {
 			copyin_vector = i586_copyin;
 			copyout_vector = i586_copyout;
Index: sys/systm.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/systm.h,v
retrieving revision 1.111.2.5
diff -u -r1.111.2.5 systm.h
--- sys/systm.h	2001/01/16 12:26:21	1.111.2.5
+++ sys/systm.h	2001/06/24 15:52:22
@@ -135,7 +135,7 @@
 void	ovbcopy __P((const void *from, void *to, size_t len));
 
 #ifdef __i386__
-extern void	(*bzero) __P((void *buf, size_t len));
+/* in machine/cpufunc.h */
 #else
 void	bzero __P((void *buf, size_t len));
 #endif

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe cvs-all" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200106241702.f5OH2oN78720>