Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 20 Nov 2018 18:14:30 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org
Subject:   svn commit: r340688 - in releng/12.0: . lib/libc/amd64/string sys/amd64/amd64
Message-ID:  <201811201814.wAKIEUJe038566@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Tue Nov 20 18:14:30 2018
New Revision: 340688
URL: https://svnweb.freebsd.org/changeset/base/340688

Log:
  MFC r339531,r339579,r340252,r340463,r340464,340472,r340587
  
  amd64: tidy up memset to have rax set earlier for small sizes
  amd64: finish the tail in memset with an overlapping store
  amd64: align memset buffers to 16 bytes before using rep stos
  amd64: convert libc bzero to a C func to avoid future bloat
  amd64: sync up libc memset with the kernel version
  amd64: handle small memset buffers with overlapping stores
  Fix -DNO_CLEAN amd64 build after r340463
  
  Approved by:	re (gjb)

Added:
  releng/12.0/lib/libc/amd64/string/bzero.c
     - copied unchanged from r340684, stable/12/lib/libc/amd64/string/bzero.c
Deleted:
  releng/12.0/lib/libc/amd64/string/bzero.S
Modified:
  releng/12.0/Makefile.inc1
  releng/12.0/lib/libc/amd64/string/Makefile.inc
  releng/12.0/lib/libc/amd64/string/memset.S
  releng/12.0/sys/amd64/amd64/support.S
Directory Properties:
  releng/12.0/   (props changed)

Modified: releng/12.0/Makefile.inc1
==============================================================================
--- releng/12.0/Makefile.inc1	Tue Nov 20 18:13:18 2018	(r340687)
+++ releng/12.0/Makefile.inc1	Tue Nov 20 18:14:30 2018	(r340688)
@@ -948,6 +948,13 @@ _cleanobj_fast_depend_hack: .PHONY
 		   ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \
 	fi
 .endfor
+# 20181115  r340463  bzero reimplemented as .c
+	@if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \
+	    egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \
+		echo "Removing stale dependencies for bzero"; \
+		rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \
+		   ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \
+	fi
 # 20181009 track migration from ntp's embedded libevent to updated one
 	@if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \
 	    egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \

Modified: releng/12.0/lib/libc/amd64/string/Makefile.inc
==============================================================================
--- releng/12.0/lib/libc/amd64/string/Makefile.inc	Tue Nov 20 18:13:18 2018	(r340687)
+++ releng/12.0/lib/libc/amd64/string/Makefile.inc	Tue Nov 20 18:14:30 2018	(r340688)
@@ -2,7 +2,6 @@
 
 MDSRCS+= \
 	bcmp.S \
-	bzero.S \
 	memcmp.S \
 	memcpy.S \
 	memmove.S \

Copied: releng/12.0/lib/libc/amd64/string/bzero.c (from r340684, stable/12/lib/libc/amd64/string/bzero.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ releng/12.0/lib/libc/amd64/string/bzero.c	Tue Nov 20 18:14:30 2018	(r340688, copy of r340684, stable/12/lib/libc/amd64/string/bzero.c)
@@ -0,0 +1,15 @@
+/*-
+ * Public domain.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <string.h>
+
+void
+bzero(void *b, size_t len)
+{
+
+	memset(b, 0, len);
+}

Modified: releng/12.0/lib/libc/amd64/string/memset.S
==============================================================================
--- releng/12.0/lib/libc/amd64/string/memset.S	Tue Nov 20 18:13:18 2018	(r340687)
+++ releng/12.0/lib/libc/amd64/string/memset.S	Tue Nov 20 18:14:30 2018	(r340688)
@@ -31,101 +31,112 @@
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
-.macro MEMSET bzero erms
-.if \bzero == 1
-	movq	%rsi,%rcx
-	movq	%rsi,%rdx
-	xorl	%eax,%eax
-.else
-	movq	%rdi,%r9
+#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+.macro MEMSET erms
+	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	movzbq	%sil,%r8
-	movabs	$0x0101010101010101,%rax
-	imulq	%r8,%rax
-.endif
+	movabs	$0x0101010101010101,%r10
+	imulq	%r8,%r10
 
 	cmpq	$32,%rcx
-	jb	1016f
+	jbe	101632f
 
 	cmpq	$256,%rcx
 	ja	1256f
 
-1032:
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
-	movq	%rax,16(%rdi)
-	movq	%rax,24(%rdi)
+103200:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%r10,16(%rdi)
+	movq	%r10,24(%rdi)
 	leaq	32(%rdi),%rdi
 	subq	$32,%rcx
 	cmpq	$32,%rcx
-	jae	1032b
-	cmpb	$0,%cl
-	je	1000f
-1016:
+	ja	103200b
 	cmpb	$16,%cl
-	jl	1008f
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
-	subb	$16,%cl
-	jz	1000f
-	leaq	16(%rdi),%rdi
-1008:
+	ja	201632f
+	movq	%r10,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	ret
+	ALIGN_TEXT
+101632:
+	cmpb	$16,%cl
+	jl	100816f
+201632:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%r10,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	ret
+	ALIGN_TEXT
+100816:
 	cmpb	$8,%cl
-	jl	1004f
-	movq	%rax,(%rdi)
-	subb	$8,%cl
-	jz	1000f
-	leaq	8(%rdi),%rdi
-1004:
+	jl	100408f
+	movq	%r10,(%rdi)
+	movq	%r10,-8(%rdi,%rcx)
+	ret
+	ALIGN_TEXT
+100408:
 	cmpb	$4,%cl
-	jl	1002f
-	movl	%eax,(%rdi)
-	subb	$4,%cl
-	jz	1000f
-	leaq	4(%rdi),%rdi
-1002:
+	jl	100204f
+	movl	%r10d,(%rdi)
+	movl	%r10d,-4(%rdi,%rcx)
+	ret
+	ALIGN_TEXT
+100204:
 	cmpb	$2,%cl
-	jl	1001f
-	movw	%ax,(%rdi)
-	subb	$2,%cl
-	jz	1000f
-	leaq	2(%rdi),%rdi
-1001:
-	cmpb	$1,%cl
-	jl	1000f
-	movb	%al,(%rdi)
-1000:
-.if \bzero == 0
-	movq	%r9,%rax
-.endif
+	jl	100001f
+	movw	%r10w,(%rdi)
+	movw	%r10w,-2(%rdi,%rcx)
 	ret
-
+	ALIGN_TEXT
+100001:
+	cmpb	$0,%cl
+	je	100000f
+	movb	%r10b,(%rdi)
+100000:
+	ret
+	ALIGN_TEXT
 1256:
+	movq	%rdi,%r9
+	movq	%r10,%rax
+	testl	$15,%edi
+	jnz	3f
+1:
 .if \erms == 1
 	rep
 	stosb
+	movq	%r9,%rax
 .else
+	movq	%rcx,%rdx
 	shrq	$3,%rcx
 	rep
 	stosq
-	movq	%rdx,%rcx
-	andb	$7,%cl
-	jne	1004b
-.endif
-.if \bzero == 0
 	movq	%r9,%rax
+	andl	$7,%edx
+	jnz	2f
+	ret
+2:
+	movq	%r10,-8(%rdi,%rdx)
 .endif
 	ret
+	ALIGN_TEXT
+3:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%rdi,%r8
+	andq	$15,%r8
+	leaq	-16(%rcx,%r8),%rcx
+	neg	%r8
+	leaq	16(%rdi,%r8),%rdi
+	jmp	1b
 .endm
 
-#ifndef BZERO
+
 ENTRY(memset)
-	MEMSET bzero=0 erms=0
+	MEMSET erms=0
 END(memset)
-#else
-ENTRY(bzero)
-	MEMSET bzero=1 erms=0
-END(bzero)
-#endif
 
 	.section .note.GNU-stack,"",%progbits

Modified: releng/12.0/sys/amd64/amd64/support.S
==============================================================================
--- releng/12.0/sys/amd64/amd64/support.S	Tue Nov 20 18:13:18 2018	(r340687)
+++ releng/12.0/sys/amd64/amd64/support.S	Tue Nov 20 18:14:30 2018	(r340688)
@@ -452,82 +452,112 @@ END(memcpy_erms)
  */
 .macro MEMSET erms
 	PUSH_FRAME_POINTER
-	movq	%rdi,%r9
+	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	movzbq	%sil,%r8
-	movabs	$0x0101010101010101,%rax
-	imulq	%r8,%rax
+	movabs	$0x0101010101010101,%r10
+	imulq	%r8,%r10
 
 	cmpq	$32,%rcx
-	jb	1016f
+	jbe	101632f
 
 	cmpq	$256,%rcx
 	ja	1256f
 
-1032:
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
-	movq	%rax,16(%rdi)
-	movq	%rax,24(%rdi)
+103200:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%r10,16(%rdi)
+	movq	%r10,24(%rdi)
 	leaq	32(%rdi),%rdi
 	subq	$32,%rcx
 	cmpq	$32,%rcx
-	jae	1032b
-	cmpb	$0,%cl
-	je	1000f
-1016:
+	ja	103200b
 	cmpb	$16,%cl
-	jl	1008f
-	movq	%rax,(%rdi)
-	movq	%rax,8(%rdi)
-	subb	$16,%cl
-	jz	1000f
-	leaq	16(%rdi),%rdi
-1008:
+	ja	201632f
+	movq	%r10,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
+101632:
+	cmpb	$16,%cl
+	jl	100816f
+201632:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%r10,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
+100816:
 	cmpb	$8,%cl
-	jl	1004f
-	movq	%rax,(%rdi)
-	subb	$8,%cl
-	jz	1000f
-	leaq	8(%rdi),%rdi
-1004:
+	jl	100408f
+	movq	%r10,(%rdi)
+	movq	%r10,-8(%rdi,%rcx)
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
+100408:
 	cmpb	$4,%cl
-	jl	1002f
-	movl	%eax,(%rdi)
-	subb	$4,%cl
-	jz	1000f
-	leaq	4(%rdi),%rdi
-1002:
+	jl	100204f
+	movl	%r10d,(%rdi)
+	movl	%r10d,-4(%rdi,%rcx)
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
+100204:
 	cmpb	$2,%cl
-	jl	1001f
-	movw	%ax,(%rdi)
-	subb	$2,%cl
-	jz	1000f
-	leaq	2(%rdi),%rdi
-1001:
-	cmpb	$1,%cl
-	jl	1000f
-	movb	%al,(%rdi)
-1000:
-	movq	%r9,%rax
+	jl	100001f
+	movw	%r10w,(%rdi)
+	movw	%r10w,-2(%rdi,%rcx)
 	POP_FRAME_POINTER
 	ret
 	ALIGN_TEXT
+100001:
+	cmpb	$0,%cl
+	je	100000f
+	movb	%r10b,(%rdi)
+100000:
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
 1256:
+	movq	%rdi,%r9
+	movq	%r10,%rax
+	testl	$15,%edi
+	jnz	3f
+1:
 .if \erms == 1
 	rep
 	stosb
+	movq	%r9,%rax
 .else
+	movq	%rcx,%rdx
 	shrq	$3,%rcx
 	rep
 	stosq
-	movq	%rdx,%rcx
-	andb	$7,%cl
-	jne	1004b
-.endif
 	movq	%r9,%rax
+	andl	$7,%edx
+	jnz	2f
 	POP_FRAME_POINTER
 	ret
+2:
+	movq	%r10,-8(%rdi,%rdx)
+.endif
+	POP_FRAME_POINTER
+	ret
+	ALIGN_TEXT
+3:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%rdi,%r8
+	andq	$15,%r8
+	leaq	-16(%rcx,%r8),%rcx
+	neg	%r8
+	leaq	16(%rdi,%r8),%rdi
+	jmp	1b
 .endm
 
 ENTRY(memset_std)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811201814.wAKIEUJe038566>