Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 16 Jan 2013 23:05:16 -0800
From:      John-Mark Gurney <jmg@funkthat.com>
To:        toolchain@FreeBSD.org
Subject:   patch to add aes and pclmulqdq instructions to gcc
Message-ID:  <20130117070516.GI1410@funkthat.com>

next in thread | raw e-mail | index | archive | help

--IU5/I01NYhRvwH70
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Mike Belopuhov pointed me to the patch in OpenBSD:
http://freshbsd.org/commit/openbsd/0babc91a00b1f1953637bb39c8ec97aef704629e/diff.txt

While OpenBSD's binutils is quite different than FreeBSD's, I was able
to use his patch to teach binutils how to assemble and disassemble the
aes and pclmulqdq instructions.

I have done basic tests, such as verified that it can assemble the aesni
module and get the same results, and assemble a sample file for
pclmulqdq..  For each of these tests, I have verified that it's output
matches (as close as possible, as gcc/clang compile callq's differently)
clang on amd64..

I have attached the patch, and it is also availble at:
http://people.freebsd.org/~jmg/gcc.aes.patch

Comments?

I have not passed it through a make universe yet, but will before
committing...

I am also working on basic intrinsics header files for these instructions
too...

-- 
  John-Mark Gurney				Voice: +1 415 225 5579

     "All that I will do, has been done, All that I have, has not."

--IU5/I01NYhRvwH70
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="gcc.aes.patch"

Index: contrib/binutils/gas/config/tc-i386.c
===================================================================
--- contrib/binutils/gas/config/tc-i386.c	(revision 245361)
+++ contrib/binutils/gas/config/tc-i386.c	(working copy)
@@ -3981,7 +3981,7 @@
 	 SSE4 instructions have 3 bytes.  We may use one more higher
 	 byte to specify a prefix the instruction requires.  Exclude
 	 instructions which are in both SSE4 and ABM.  */
-      if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0
+      if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4 | CpuAES | CpuPCLMUL)) != 0
 	  && (i.tm.cpu_flags & CpuABM) == 0)
 	{
 	  if (i.tm.base_opcode & 0xff000000)
@@ -4033,7 +4033,7 @@
 	}
       else
 	{
-	  if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0
+	  if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4 | CpuAES | CpuPCLMUL)) != 0
 	      && (i.tm.cpu_flags & CpuABM) == 0)
 	    {
 	      p = frag_more (3);
Index: contrib/binutils/opcodes/i386-dis.c
===================================================================
--- contrib/binutils/opcodes/i386-dis.c	(revision 245361)
+++ contrib/binutils/opcodes/i386-dis.c	(working copy)
@@ -543,6 +543,13 @@
 #define PREGRP97  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 97 } }
 #define PREGRP98  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 98 } }
 #define PREGRP99  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 99 } }
+#define PREGRP100 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 100 } }
+#define PREGRP101 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 101 } }
+#define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
+#define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
+#define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
+#define PREGRP105 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 105 } }
+#define PREGRP106 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 106 } }
 
 
 #define X86_64_0  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
@@ -1319,7 +1326,7 @@
   /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
   /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
   /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
-  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
   /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
   /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
   /*       -------------------------------        */
@@ -1382,7 +1389,7 @@
   /* 10 */ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* 1f */
   /* 20 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */
   /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
-  /* 40 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
+  /* 40 */ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
   /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */
   /* 60 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */
   /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */
@@ -1391,7 +1398,7 @@
   /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
   /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
   /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
-  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, /* df */
   /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
   /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
   /*       -------------------------------        */
@@ -2605,6 +2612,62 @@
     { "invvpid",{ Gm, Mo } },
     { "(bad)",	{ XX } },
   },
+
+  /* PREGRP100 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aesimc", { XM, EXx } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP101 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aesenc",{ XM, EXx } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP102 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aesenclast", { XM, EXx } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP103 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aesdec", { XM, EXx } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP104 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aesdeclast", { XM, EXx } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP105 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "aeskeygenassist", { XM, EXx, Ib } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP106 */
+  {
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "pclmulqdq", { XM, EXx, Ib } },
+    { "(bad)",	{ XX } },
+  },
 };
 
 static const struct dis386 x86_64_table[][2] = {
@@ -2876,11 +2939,11 @@
     { "(bad)", { XX } },
     { "(bad)", { XX } },
     { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP100 },
+    { PREGRP101 },
+    { PREGRP102 },
+    { PREGRP103 },
+    { PREGRP104 },
     /* e0 */
     { "(bad)", { XX } },
     { "(bad)", { XX } },
@@ -2997,10 +3060,10 @@
     { PREGRP84 },
     { PREGRP85 },
     { "(bad)", { XX } },
+    { PREGRP106 },
     { "(bad)", { XX } },
     { "(bad)", { XX } },
     { "(bad)", { XX } },
-    { "(bad)", { XX } },
     /* 48 */
     { "(bad)", { XX } },
     { "(bad)", { XX } },
@@ -3171,7 +3234,7 @@
     { "(bad)", { XX } },
     { "(bad)", { XX } },
     { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP105 },
     /* e0 */
     { "(bad)", { XX } },
     { "(bad)", { XX } },
Index: contrib/binutils/opcodes/i386-opc.h
===================================================================
--- contrib/binutils/opcodes/i386-opc.h	(revision 245361)
+++ contrib/binutils/opcodes/i386-opc.h	(working copy)
@@ -72,6 +72,8 @@
 #define CpuSSE4_1    0x400000	/* SSE4.1 Instructions required */
 #define CpuSSE4_2    0x800000	/* SSE4.2 Instructions required */
 #define CpuXSAVE    0x1000000	/* XSAVE Instructions required */
+#define CpuAES      0x2000000	/* AES Instructions required */
+#define CpuPCLMUL   0x4000000	/* Carry-less Multiplication extensions */
 
 /* SSE4.1/4.2 Instructions required */
 #define CpuSSE4	     (CpuSSE4_1|CpuSSE4_2)
@@ -84,7 +86,7 @@
 #define CpuUnknownFlags (Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \
 	|CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuVMX \
 	|Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuSSSE3|CpuSSE4_1 \
-	|CpuSSE4_2|CpuABM|CpuSSE4a|CpuXSAVE)
+	|CpuSSE4_2|CpuABM|CpuSSE4a|CpuXSAVE|CpuAES|CpuPCLMUL)
 
   /* the bits in opcode_modifier are used to generate the final opcode from
      the base_opcode.  These bits also are used to detect alternate forms of
@@ -126,6 +128,8 @@
 #define Rex64	    0x10000000  /* instruction require Rex64 prefix.  */
 #define Ugh	    0x20000000	/* deprecated fp insn, gets a warning */
 
+#define NoSuf		(No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf)
+
   /* operand_types[i] describes the type of operand i.  This is made
      by OR'ing together all of the possible type masks.  (e.g.
      'operand_types[i] = Reg|Imm' specifies that operand i can be
Index: contrib/binutils/opcodes/i386-tbl.h
===================================================================
--- contrib/binutils/opcodes/i386-tbl.h	(revision 245361)
+++ contrib/binutils/opcodes/i386-tbl.h	(working copy)
@@ -4319,6 +4319,54 @@
   { "xrstor", 1, 0xfae, 0x5, CpuXSAVE,
     Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_xSuf,
     { BaseIndex|Disp8|Disp16|Disp32|Disp32S } },
+  /* Intel AES extensions */
+  {"aesdec", 2, 0x660f38de, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"aesdeclast", 2, 0x660f38df, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"aesenc", 2, 0x660f38dc, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"aesenclast", 2, 0x660f38dd, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"aesimc", 2, 0x660f38db, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"aeskeygenassist", 3, 0x660f3adf, None, CpuAES,
+    Modrm|IgnoreSize|NoSuf,
+    { Imm8, RegXMM|LLongMem,
+      RegXMM } },
+  
+  /* Intel Carry-less Multiplication extensions */
+  {"pclmulqdq", 3, 0x660f3a44, None, CpuPCLMUL,
+    Modrm|IgnoreSize|NoSuf,
+    { Imm8, RegXMM|LLongMem,
+      RegXMM } },
+  {"pclmullqlqdq", 2, 0x660f3a44, 0x0, CpuPCLMUL,
+    Modrm|IgnoreSize|NoSuf|ImmExt,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"pclmulhqlqdq", 2, 0x660f3a44, 0x1, CpuPCLMUL,
+    Modrm|IgnoreSize|NoSuf|ImmExt,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"pclmullqhqdq", 2, 0x660f3a44, 0x10, CpuPCLMUL,
+    Modrm|IgnoreSize|NoSuf|ImmExt,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  {"pclmulhqhqdq", 2, 0x660f3a44, 0x11, CpuPCLMUL,
+    Modrm|IgnoreSize|NoSuf|ImmExt,
+    { RegXMM|LLongMem,
+      RegXMM } },
+  
   { NULL, 0, 0, 0, 0, 0, { 0 } }
 };
 

--IU5/I01NYhRvwH70--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130117070516.GI1410>