Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 Nov 2018 20:28:35 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r340464 - head/lib/libc/amd64/string
Message-ID:  <201811152028.wAFKSZne077132@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Nov 15 20:28:35 2018
New Revision: 340464
URL: https://svnweb.freebsd.org/changeset/base/340464

Log:
  amd64: sync up libc memset with the kernel version
  
  - tidy up memset to have rax set earlier for small sizes
  - finish the tail in memset with an overlapping store
  - align memset buffers to 16 bytes before using rep stos
  
  Sponsored by:	The FreeBSD Foundation

Modified:
  head/lib/libc/amd64/string/memset.S

Modified: head/lib/libc/amd64/string/memset.S
==============================================================================
--- head/lib/libc/amd64/string/memset.S	Thu Nov 15 20:20:39 2018	(r340463)
+++ head/lib/libc/amd64/string/memset.S	Thu Nov 15 20:28:35 2018	(r340464)
@@ -31,12 +31,14 @@
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
+#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
 .macro MEMSET erms
-	movq	%rdi,%r9
+	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	movzbq	%sil,%r8
-	movabs	$0x0101010101010101,%rax
-	imulq	%r8,%rax
+	movabs	$0x0101010101010101,%r10
+	imulq	%r8,%r10
 
 	cmpq	$32,%rcx
 	jb	1016f
@@ -45,10 +47,10 @@ __FBSDID("$FreeBSD$");
 	ja	1256f
 
 1032:
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
-	movq	%rax,16(%rdi)
-	movq	%rax,24(%rdi)
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%r10,16(%rdi)
+	movq	%r10,24(%rdi)
 	leaq	32(%rdi),%rdi
 	subq	$32,%rcx
 	cmpq	$32,%rcx
@@ -58,54 +60,72 @@ __FBSDID("$FreeBSD$");
 1016:
 	cmpb	$16,%cl
 	jl	1008f
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
 	subb	$16,%cl
 	jz	1000f
 	leaq	16(%rdi),%rdi
 1008:
 	cmpb	$8,%cl
 	jl	1004f
-	movq	%rax,(%rdi)
+	movq	%r10,(%rdi)
 	subb	$8,%cl
 	jz	1000f
 	leaq	8(%rdi),%rdi
 1004:
 	cmpb	$4,%cl
 	jl	1002f
-	movl	%eax,(%rdi)
+	movl	%r10d,(%rdi)
 	subb	$4,%cl
 	jz	1000f
 	leaq	4(%rdi),%rdi
 1002:
 	cmpb	$2,%cl
 	jl	1001f
-	movw	%ax,(%rdi)
+	movw	%r10w,(%rdi)
 	subb	$2,%cl
 	jz	1000f
 	leaq	2(%rdi),%rdi
 1001:
 	cmpb	$1,%cl
 	jl	1000f
-	movb	%al,(%rdi)
+	movb	%r10b,(%rdi)
 1000:
-	movq	%r9,%rax
 	ret
-
+	ALIGN_TEXT
 1256:
+	movq	%rdi,%r9
+	movq	%r10,%rax
+	testl	$15,%edi
+	jnz	3f
+1:
 .if \erms == 1
 	rep
 	stosb
+	movq	%r9,%rax
 .else
+	movq	%rcx,%rdx
 	shrq	$3,%rcx
 	rep
 	stosq
-	movq	%rdx,%rcx
-	andb	$7,%cl
-	jne	1004b
-.endif
 	movq	%r9,%rax
+	andl	$7,%edx
+	jnz	2f
 	ret
+2:
+	movq	%r10,-8(%rdi,%rdx)
+.endif
+	ret
+	ALIGN_TEXT
+3:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%rdi,%r8
+	andq	$15,%r8
+	leaq	-16(%rcx,%r8),%rcx
+	neg	%r8
+	leaq	16(%rdi,%r8),%rdi
+	jmp	1b
 .endm
 
 ENTRY(memset)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811152028.wAFKSZne077132>