From owner-svn-src-all@freebsd.org Tue Nov 20 17:10:46 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 6B4C91136BD3; Tue, 20 Nov 2018 17:10:46 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 0D91676CE4; Tue, 20 Nov 2018 17:10:46 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id E2E50142BF; Tue, 20 Nov 2018 17:10:45 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id wAKHAj07000805; Tue, 20 Nov 2018 17:10:45 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id wAKHAimp000800; Tue, 20 Nov 2018 17:10:44 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <201811201710.wAKHAimp000800@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Tue, 20 Nov 2018 17:10:44 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r340684 - in stable/12: . lib/libc/amd64/string sys/amd64/amd64 X-SVN-Group: stable-12 X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: in stable/12: . lib/libc/amd64/string sys/amd64/amd64 X-SVN-Commit-Revision: 340684 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Rspamd-Queue-Id: 0D91676CE4 X-Spamd-Result: default: False [0.37 / 15.00]; local_wl_from(0.00)[FreeBSD.org]; NEURAL_SPAM_LONG(0.01)[0.014,0]; NEURAL_SPAM_MEDIUM(0.21)[0.206,0]; NEURAL_SPAM_SHORT(0.15)[0.151,0] X-Rspamd-Server: mx1.freebsd.org X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 20 Nov 2018 17:10:46 -0000 Author: mjg Date: Tue Nov 20 17:10:44 2018 New Revision: 340684 URL: https://svnweb.freebsd.org/changeset/base/340684 Log: MFC r339531,r339579,r340252,r340463,r340464,340472,r340587 amd64: tidy up memset to have rax set earlier for small sizes amd64: finish the tail in memset with an overlapping store amd64: align memset buffers to 16 bytes before using rep stos amd64: convert libc bzero to a C func to avoid future bloat amd64: sync up libc memset with the kernel version amd64: handle small memset buffers with overlapping stores Fix -DNO_CLEAN amd64 build after r340463 Added: stable/12/lib/libc/amd64/string/bzero.c - copied unchanged from r340463, head/lib/libc/amd64/string/bzero.c Deleted: stable/12/lib/libc/amd64/string/bzero.S Modified: stable/12/Makefile.inc1 stable/12/lib/libc/amd64/string/Makefile.inc stable/12/lib/libc/amd64/string/memset.S stable/12/sys/amd64/amd64/support.S Directory Properties: stable/12/ (props changed) Modified: stable/12/Makefile.inc1 ============================================================================== --- stable/12/Makefile.inc1 Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/Makefile.inc1 Tue Nov 20 17:10:44 2018 (r340684) @@ -948,6 +948,13 @@ _cleanobj_fast_depend_hack: .PHONY ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \ fi .endfor +# 20181115 r340463 bzero reimplemented as .c + @if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \ + egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \ + echo "Removing stale dependencies for bzero"; \ + rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \ + ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \ + fi # 20181009 track migration from ntp's embedded libevent to updated one @if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \ egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \ Modified: stable/12/lib/libc/amd64/string/Makefile.inc ============================================================================== --- stable/12/lib/libc/amd64/string/Makefile.inc Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/lib/libc/amd64/string/Makefile.inc Tue Nov 20 17:10:44 2018 (r340684) @@ -2,7 +2,6 @@ MDSRCS+= \ bcmp.S \ - bzero.S \ memcmp.S \ memcpy.S \ memmove.S \ Copied: stable/12/lib/libc/amd64/string/bzero.c (from r340463, head/lib/libc/amd64/string/bzero.c) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/12/lib/libc/amd64/string/bzero.c Tue Nov 20 17:10:44 2018 (r340684, copy of r340463, head/lib/libc/amd64/string/bzero.c) @@ -0,0 +1,15 @@ +/*- + * Public domain. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +void +bzero(void *b, size_t len) +{ + + memset(b, 0, len); +} Modified: stable/12/lib/libc/amd64/string/memset.S ============================================================================== --- stable/12/lib/libc/amd64/string/memset.S Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/lib/libc/amd64/string/memset.S Tue Nov 20 17:10:44 2018 (r340684) @@ -31,101 +31,112 @@ #include __FBSDID("$FreeBSD$"); -.macro MEMSET bzero erms -.if \bzero == 1 - movq %rsi,%rcx - movq %rsi,%rdx - xorl %eax,%eax -.else - movq %rdi,%r9 +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + +.macro MEMSET erms + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax -.endif + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) +103200: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f - movq %rax,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + jl 100408f + movq %r10,(%rdi) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f - movl %eax,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + jl 100204f + movl %r10d,(%rdi) + movl %r10d,-4(%rdi,%rcx) + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw %ax,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f - movb %al,(%rdi) -1000: -.if \bzero == 0 - movq %r9,%rax -.endif + jl 100001f + movw %r10w,(%rdi) + movw %r10w,-2(%rdi,%rcx) ret - + ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f + movb %r10b,(%rdi) +100000: + ret + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif -.if \bzero == 0 movq %r9,%rax + andl $7,%edx + jnz 2f + ret +2: + movq %r10,-8(%rdi,%rdx) .endif ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm -#ifndef BZERO + ENTRY(memset) - MEMSET bzero=0 erms=0 + MEMSET erms=0 END(memset) -#else -ENTRY(bzero) - MEMSET bzero=1 erms=0 -END(bzero) -#endif .section .note.GNU-stack,"",%progbits Modified: stable/12/sys/amd64/amd64/support.S ============================================================================== --- stable/12/sys/amd64/amd64/support.S Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/sys/amd64/amd64/support.S Tue Nov 20 17:10:44 2018 (r340684) @@ -452,82 +452,112 @@ END(memcpy_erms) */ .macro MEMSET erms PUSH_FRAME_POINTER - movq %rdi,%r9 + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) +103200: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f - movq %rax,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + jl 100408f + movq %r10,(%rdi) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f - movl %eax,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + jl 100204f + movl %r10d,(%rdi) + movl %r10d,-4(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw %ax,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f - movb %al,(%rdi) -1000: - movq %r9,%rax + jl 100001f + movw %r10w,(%rdi) + movw %r10w,-2(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f + movb %r10b,(%rdi) +100000: + POP_FRAME_POINTER + ret + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif movq %r9,%rax + andl $7,%edx + jnz 2f POP_FRAME_POINTER ret +2: + movq %r10,-8(%rdi,%rdx) +.endif + POP_FRAME_POINTER + ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm ENTRY(memset_std)