Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Mar 2001 07:58:21 +1000 (EST)
From:      peter.jeremy@alcatel.com.au
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   gnu/26083: Reducing code over-alignment on Alpha
Message-ID:  <200103252158.f2PLwLS88211@gsmx07.alcatel.com.au>

next in thread | raw e-mail | index | archive | help

>Number:         26083
>Category:       gnu
>Synopsis:       Reducing code over-alignment on Alpha
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sun Mar 25 14:00:02 PST 2001
>Closed-Date:
>Last-Modified:
>Originator:     Peter Jeremy
>Release:        FreeBSD 5.0-CURRENT alpha
>Organization:
Alcatel Australia Limited
>Environment:

	DEC Multia running 5-CURRENT from early February.
	GNU CC 2.95.3 (prerelease)

>Description:

	Recently, following a commit to src/release/scripts/doFS.sh,
	a comment was made that the Alpha install disk is almost
	overflowing.  In the i386 case, the install disk can be squeezed
	by reducing the code over-alignment generated by gcc.  This
	feature isn't available on gcc/Alpha where the code alignment
	is hard-coded.

	Attached are some patches to implement -malign-jumps=N,
	-malign-loops=N and -malign-functions=N as for the i386.
	Implementing all this functionality is overkill for the
	release disk situation, but is consistent with the i386 and
	mostly consistent with the SPARC (alignments are specified
	in bytes on the SPARC, but log[base2](bytes) on the i386
	and in the patches below).

	I have successfully build and run a kernel with options
	"-malign-jumps=2 -malign-loops=2 -malign-functions=2" and
	completed a buildworld with options
	"-malign-jumps=3 -malign-loops=3 -malign-functions=4".
	
>How-To-Repeat:

	Build a kernel with the standard gcc and with gcc containing
	the patches below.  Use "cmp -l" to show that they are the same
	other than the build-time and version number.

	I found that the kernel text segment shrank by 10% when compiled
	with "-malign-jumps=2 -malign-loops=2 -malign-functions=2".

>Fix:

	The following patches are against the following pre-release
	versions, rather than the just-committed 2.95.3 release.

Index: invoke.texi
===================================================================
RCS file: /home/CVSROOT/src/contrib/gcc.295/invoke.texi,v
retrieving revision 1.8
diff -u -r1.8 invoke.texi
--- invoke.texi	2001/02/17 09:04:50	1.8
+++ invoke.texi	2001/03/25 21:46:25
@@ -380,6 +380,8 @@
 -mcpu=@var{cpu type}
 -mbwx -mno-bwx -mcix -mno-cix -mmax -mno-max
 -mmemory-latency=@var{time}
+-malign-jumps=@var{num}  -malign-loops=@var{num}
+-malign-functions=@var{num}
 
 @emph{Clipper Options}
 -mc300  -mc400
@@ -5703,6 +5705,22 @@
 Note that L3 is only valid for EV5.
 
 @end table
+
+@item -malign-loops=@var{num}
+Align loops to a 2 raised to a @var{num} byte boundary.  If
+@samp{-malign-loops} is not specified, the default is 4 (16 bytes)
+if optimising unless ECOFF symbols are selected, otherwise no
+additional alignment is used.
+
+@item -malign-jumps=@var{num}
+Align instructions that are only jumped to to a 2 raised to a @var{num}
+byte boundary.  If @samp{-malign-jumps} is not specified, the default is
+4 (16 bytes) if optimising unless ECOFF symbols are selected, otherwise no
+additional alignment is used.
+
+@item -malign-functions=@var{num}
+Align the start of functions to a 2 raised to @var{num} byte boundary.
+If @samp{-malign-functions} is not specified, the default is 5 (32 bytes).
 @end table
 
 @node Clipper Options
Index: config/alpha/alpha.c
===================================================================
RCS file: /home/CVSROOT/src/contrib/gcc.295/config/alpha/alpha.c,v
retrieving revision 1.2
diff -u -r1.2 alpha.c
--- config/alpha/alpha.c	2000/08/09 08:36:27	1.2
+++ config/alpha/alpha.c	2001/03/25 21:46:24
@@ -75,6 +75,20 @@
 const char *alpha_fptm_string;	/* -mfp-trap-mode=[n|u|su|sui] */
 const char *alpha_mlat_string;	/* -mmemory-latency= */
 
+/* Alignment to use for functions, loops and jumps:  */
+
+/* Power of two alignment for functions. */
+int alpha_align_funcs;
+const char *alpha_align_funcs_string;
+
+/* Power of two alignment for loops. */
+int alpha_align_loops;
+const char *alpha_align_loops_string;
+
+/* Power of two alignment for non-loop jumps. */
+int alpha_align_jumps;
+const char *alpha_align_jumps_string;
+
 /* Save information from a "cmpxx" operation until the branch or scc is
    emitted.  */
 
@@ -320,6 +334,40 @@
 
   /* Acquire a unique set number for our register saves and restores.  */
   alpha_sr_alias_set = new_alias_set ();
+
+  /* Validate -malign-loops= value, or provide default.
+
+   ??? The default is no alignment if we don't optimize and also if we
+   are writing ECOFF symbols to work around a bug in DEC's assembler,
+   otherwise we use octaword alignment.
+ */
+
+  alpha_align_loops = (optimize > 0 && write_symbols != SDB_DEBUG ? 4 : 0);
+  if (alpha_align_loops_string)
+    {
+      alpha_align_loops = atoi (alpha_align_loops_string);
+      if (alpha_align_loops < 2 || alpha_align_loops > 6)
+	fatal ("-malign-loops=%d is not between 2 and 6", alpha_align_loops);
+    }
+
+  /* Validate -malign-jumps= value, or provide default.  */
+  alpha_align_jumps = (optimize > 0 && write_symbols != SDB_DEBUG ? 4 : 0);
+  if (alpha_align_jumps_string)
+    {
+      alpha_align_jumps = atoi (alpha_align_jumps_string);
+      if (alpha_align_jumps < 2 || alpha_align_jumps > 6)
+	fatal ("-malign-jumps=%d is not between 2 and 6", alpha_align_jumps);
+    }
+
+  /* Validate -malign-functions= value, or provide default. */
+  alpha_align_funcs = 5;		/* default is 32-byte boundary */
+  if (alpha_align_funcs_string)
+    {
+      alpha_align_funcs = atoi (alpha_align_funcs_string);
+      if (alpha_align_funcs < 2 || alpha_align_funcs > 6)
+	fatal ("-malign-functions=%d is not between 2 and 6",
+		alpha_align_funcs);
+    }
 }
 
 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
Index: config/alpha/alpha.h
===================================================================
RCS file: /home/CVSROOT/src/contrib/gcc.295/config/alpha/alpha.h,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 alpha.h
--- config/alpha/alpha.h	1999/10/16 06:07:49	1.1.1.3
+++ config/alpha/alpha.h	2001/03/25 21:46:24
@@ -243,6 +243,9 @@
 extern const char *alpha_fptm_string;	/* For -mfp-trap-mode=[n|u|su|sui]  */
 extern const char *alpha_tp_string;	/* For -mtrap-precision=[p|f|i] */
 extern const char *alpha_mlat_string;	/* For -mmemory-latency= */
+extern const char *alpha_align_loops_string;	/* For -malign-loops= */
+extern const char *alpha_align_jumps_string;	/* For -malign-jumps= */
+extern const char *alpha_align_funcs_string;	/* For -malign-functions= */
 
 #define TARGET_OPTIONS					\
 {							\
@@ -256,6 +259,12 @@
    "Control the precision given to fp exceptions"},	\
   {"memory-latency=",	&alpha_mlat_string,		\
    "Tune expected memory latency"},			\
+  { "align-loops=",	&alpha_align_loops_string, 	\
+    "Loop code aligned to this power of 2" },		\
+  { "align-jumps=",	&alpha_align_jumps_string,	\
+    "Jump targets are aligned to this power of 2" },	\
+  { "align-functions=",	&alpha_align_funcs_string,	\
+    "Function starts are aligned to this power of 2" },	\
 }
 
 /* Attempt to describe CPU characteristics to the preprocessor.  */
@@ -475,7 +484,8 @@
 #define STACK_BOUNDARY 64
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
-#define FUNCTION_BOUNDARY 256
+extern int alpha_align_funcs;	/* power of two alignment for functions */
+#define FUNCTION_BOUNDARY (1 << (alpha_align_funcs + 3))
 
 /* Alignment of field after `int : 0' in a structure.  */
 #define EMPTY_FIELD_BOUNDARY 64
@@ -486,21 +496,15 @@
 /* A bitfield declared as `int' forces `int' alignment for the struct.  */
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
-/* Align loop starts for optimal branching.  
-
-   ??? Kludge this and the next macro for the moment by not doing anything if
-   we don't optimize and also if we are writing ECOFF symbols to work around
-   a bug in DEC's assembler. */
-
-#define LOOP_ALIGN(LABEL) \
-  (optimize > 0 && write_symbols != SDB_DEBUG ? 4 : 0)
-
-/* This is how to align an instruction for optimal branching.  On
-   Alpha we'll get better performance by aligning on an octaword
-   boundary.  */
-
-#define LABEL_ALIGN_AFTER_BARRIER(FILE)	\
-  (optimize > 0 && write_symbols != SDB_DEBUG ? 4 : 0)
+/* Align loop starts for optimal branching.  */
+extern int alpha_align_loops;		/* power of two alignment for loops */
+#define LOOP_ALIGN(LABEL) (alpha_align_loops)
+
+/* This is how to align an instruction for optimal branching.
+   On Alpha, we should get better performance by aligning to an
+   octaword (16 byte) boundary.  */
+extern int alpha_align_jumps;		/* power of two alignment for jumos */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) (alpha_align_jumps)
 
 /* No data type wants to be aligned rounder than this.  */
 #define BIGGEST_ALIGNMENT 64
>Release-Note:
>Audit-Trail:
>Unformatted:

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200103252158.f2PLwLS88211>