From owner-svn-src-stable-12@freebsd.org Sat Jul 25 00:24:12 2020 Return-Path: Delivered-To: svn-src-stable-12@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 60E3A378A8B; Sat, 25 Jul 2020 00:24:12 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4BD6JD1zrCz4Sxy; Sat, 25 Jul 2020 00:24:12 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 28AF612ED4; Sat, 25 Jul 2020 00:24:12 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 06P0OCiA053638; Sat, 25 Jul 2020 00:24:12 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 06P0OBOp053636; Sat, 25 Jul 2020 00:24:11 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202007250024.06P0OBOp053636@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Sat, 25 Jul 2020 00:24:11 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r363505 - in stable/12: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Group: stable-12 X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: in stable/12: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Commit-Revision: 363505 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-12@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: SVN commit messages for only the 12-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 25 Jul 2020 00:24:12 -0000 Author: mjg Date: Sat Jul 25 00:24:11 2020 New Revision: 363505 URL: https://svnweb.freebsd.org/changeset/base/363505 Log: MFC r357208,r357309,r357239,r357310 amd64: revamp memcmp amd64: speed up failing case for memcmp amd64: sync up libc memcmp with the kernel version (r357208) amd64: sync up libc memcmp with the kernel version (r357309) Modified: stable/12/lib/libc/amd64/string/memcmp.S stable/12/sys/amd64/amd64/support.S Directory Properties: stable/12/ (props changed) Modified: stable/12/lib/libc/amd64/string/memcmp.S ============================================================================== --- stable/12/lib/libc/amd64/string/memcmp.S Sat Jul 25 00:03:23 2020 (r363504) +++ stable/12/lib/libc/amd64/string/memcmp.S Sat Jul 25 00:24:11 2020 (r363505) @@ -31,91 +31,176 @@ #include __FBSDID("$FreeBSD$"); +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + ENTRY(memcmp) - cmpq $16,%rdx - jae 5f -1: - testq %rdx,%rdx - je 3f - xorl %ecx,%ecx -2: - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jne 2b -3: xorl %eax,%eax +10: + cmpq $16,%rdx + ja 101632f + +100816: + cmpb $8,%dl + jl 100408f + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 80f + movq -8(%rdi,%rdx),%r8 + movq -8(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10081608f ret -4: +100408: + cmpb $4,%dl + jl 100204f + movl (%rdi),%r8d + movl (%rsi),%r9d + cmpl %r8d,%r9d + jne 80f + movl -4(%rdi,%rdx),%r8d + movl -4(%rsi,%rdx),%r9d + cmpl %r8d,%r9d + jne 10040804f + ret +100204: + cmpb $2,%dl + jl 100001f + movzwl (%rdi),%r8d + movzwl (%rsi),%r9d + cmpl %r8d,%r9d + jne 1f + movzwl -2(%rdi,%rdx),%r8d + movzwl -2(%rsi,%rdx),%r9d + cmpl %r8d,%r9d + jne 1f + ret +100001: + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d subl %r8d,%eax +100000: ret -5: +ALIGN_TEXT +101632: cmpq $32,%rdx - jae 7f -6: - /* - * 8 bytes - */ + ja 103200f movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 - jne 1b - leaq 8(%rdi),%rdi - leaq 8(%rsi),%rsi - subq $8,%rdx - cmpq $8,%rdx - jae 6b - jl 1b - jmp 3b -7: - /* - * 32 bytes - */ - movq (%rsi),%r8 + jne 80f + movq 8(%rdi),%r8 movq 8(%rsi),%r9 - subq (%rdi),%r8 - subq 8(%rdi),%r9 - or %r8,%r9 - jnz 1b + cmpq %r8,%r9 + jne 10163208f + movq -16(%rdi,%rdx),%r8 + movq -16(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10163216f + movq -8(%rdi,%rdx),%r8 + movq -8(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10163224f + ret +ALIGN_TEXT +103200: + movq (%rdi),%r8 + movq 8(%rdi),%r9 + subq (%rsi),%r8 + subq 8(%rsi),%r9 + orq %r8,%r9 + jnz 10320000f - movq 16(%rsi),%r8 - movq 24(%rsi),%r9 - subq 16(%rdi),%r8 - subq 24(%rdi),%r9 - or %r8,%r9 - jnz 1b + movq 16(%rdi),%r8 + movq 24(%rdi),%r9 + subq 16(%rsi),%r8 + subq 24(%rsi),%r9 + orq %r8,%r9 + jnz 10320016f leaq 32(%rdi),%rdi leaq 32(%rsi),%rsi subq $32,%rdx cmpq $32,%rdx - jae 7b - jnz 1b - jmp 3b + jae 103200b + cmpb $0,%dl + jne 10b + ret + +/* + * Mismatch was found. + * + * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). + */ +ALIGN_TEXT +10320016: + leaq 16(%rdi),%rdi + leaq 16(%rsi),%rsi +10320000: + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 80f + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + jmp 80f +ALIGN_TEXT +10081608: +10163224: + leaq -8(%rdi,%rdx),%rdi + leaq -8(%rsi,%rdx),%rsi + jmp 80f +ALIGN_TEXT +10163216: + leaq -16(%rdi,%rdx),%rdi + leaq -16(%rsi,%rdx),%rsi + jmp 80f +ALIGN_TEXT +10163208: + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + jmp 80f +ALIGN_TEXT +10040804: + leaq -4(%rdi,%rdx),%rdi + leaq -4(%rsi,%rdx),%rsi + jmp 1f + +ALIGN_TEXT +80: + movl (%rdi),%r8d + movl (%rsi),%r9d + cmpl %r8d,%r9d + jne 1f + leaq 4(%rdi),%rdi + leaq 4(%rsi),%rsi + +/* + * We have up to 4 bytes to inspect. + */ +1: + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 1(%rdi),%eax + movzbl 1(%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 2(%rdi),%eax + movzbl 2(%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 3(%rdi),%eax + movzbl 3(%rsi),%r8d +2: + subl %r8d,%eax + ret END(memcmp) .section .note.GNU-stack,"",%progbits Modified: stable/12/sys/amd64/amd64/support.S ============================================================================== --- stable/12/sys/amd64/amd64/support.S Sat Jul 25 00:03:23 2020 (r363504) +++ stable/12/sys/amd64/amd64/support.S Sat Jul 25 00:24:11 2020 (r363505) @@ -107,96 +107,185 @@ END(sse2_pagezero) /* * memcmpy(b1, b2, len) - * rdi,rsi,len + * rdi,rsi,rdx */ ENTRY(memcmp) PUSH_FRAME_POINTER - cmpq $16,%rdx - jae 5f -1: - testq %rdx,%rdx - je 3f - xorl %ecx,%ecx -2: - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jz 3f - movzbl (%rdi,%rcx,1),%eax - movzbl (%rsi,%rcx,1),%r8d - cmpb %r8b,%al - jne 4f - addq $1,%rcx - cmpq %rcx,%rdx - jne 2b -3: + xorl %eax,%eax +10: + cmpq $16,%rdx + ja 101632f + +100816: + cmpb $8,%dl + jl 100408f + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 80f + movq -8(%rdi,%rdx),%r8 + movq -8(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10081608f POP_FRAME_POINTER ret -4: +100408: + cmpb $4,%dl + jl 100204f + movl (%rdi),%r8d + movl (%rsi),%r9d + cmpl %r8d,%r9d + jne 80f + movl -4(%rdi,%rdx),%r8d + movl -4(%rsi,%rdx),%r9d + cmpl %r8d,%r9d + jne 10040804f + POP_FRAME_POINTER + ret +100204: + cmpb $2,%dl + jl 100001f + movzwl (%rdi),%r8d + movzwl (%rsi),%r9d + cmpl %r8d,%r9d + jne 1f + movzwl -2(%rdi,%rdx),%r8d + movzwl -2(%rsi,%rdx),%r9d + cmpl %r8d,%r9d + jne 1f + POP_FRAME_POINTER + ret +100001: + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d subl %r8d,%eax +100000: POP_FRAME_POINTER ret -5: +ALIGN_TEXT +101632: cmpq $32,%rdx - jae 7f -6: - /* - * 8 bytes - */ - movq (%rdi),%r8 - movq (%rsi),%r9 - cmpq %r8,%r9 - jne 1b + ja 103200f + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 80f + movq 8(%rdi),%r8 + movq 8(%rsi),%r9 + cmpq %r8,%r9 + jne 10163208f + movq -16(%rdi,%rdx),%r8 + movq -16(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10163216f + movq -8(%rdi,%rdx),%r8 + movq -8(%rsi,%rdx),%r9 + cmpq %r8,%r9 + jne 10163224f + POP_FRAME_POINTER + ret +ALIGN_TEXT +103200: + movq (%rdi),%r8 + movq 8(%rdi),%r9 + subq (%rsi),%r8 + subq 8(%rsi),%r9 + orq %r8,%r9 + jnz 10320000f + + movq 16(%rdi),%r8 + movq 24(%rdi),%r9 + subq 16(%rsi),%r8 + subq 24(%rsi),%r9 + orq %r8,%r9 + jnz 10320016f + + leaq 32(%rdi),%rdi + leaq 32(%rsi),%rsi + subq $32,%rdx + cmpq $32,%rdx + jae 103200b + cmpb $0,%dl + jne 10b + POP_FRAME_POINTER + ret + +/* + * Mismatch was found. + * + * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). + */ +ALIGN_TEXT +10320016: + leaq 16(%rdi),%rdi + leaq 16(%rsi),%rsi +10320000: + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 80f leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi - subq $8,%rdx - cmpq $8,%rdx - jae 6b - jl 1b - jmp 3b -7: - /* - * 32 bytes - */ - movq (%rsi),%r8 - movq 8(%rsi),%r9 - subq (%rdi),%r8 - subq 8(%rdi),%r9 - or %r8,%r9 - jnz 1b + jmp 80f +ALIGN_TEXT +10081608: +10163224: + leaq -8(%rdi,%rdx),%rdi + leaq -8(%rsi,%rdx),%rsi + jmp 80f +ALIGN_TEXT +10163216: + leaq -16(%rdi,%rdx),%rdi + leaq -16(%rsi,%rdx),%rsi + jmp 80f +ALIGN_TEXT +10163208: + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + jmp 80f +ALIGN_TEXT +10040804: + leaq -4(%rdi,%rdx),%rdi + leaq -4(%rsi,%rdx),%rsi + jmp 1f - movq 16(%rsi),%r8 - movq 24(%rsi),%r9 - subq 16(%rdi),%r8 - subq 24(%rdi),%r9 - or %r8,%r9 - jnz 1b +ALIGN_TEXT +80: + movl (%rdi),%r8d + movl (%rsi),%r9d + cmpl %r8d,%r9d + jne 1f + leaq 4(%rdi),%rdi + leaq 4(%rsi),%rsi - leaq 32(%rdi),%rdi - leaq 32(%rsi),%rsi - subq $32,%rdx - cmpq $32,%rdx - jae 7b - jnz 1b - jmp 3b +/* + * We have up to 4 bytes to inspect. + */ +1: + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 1(%rdi),%eax + movzbl 1(%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 2(%rdi),%eax + movzbl 2(%rsi),%r8d + cmpb %r8b,%al + jne 2f + + movzbl 3(%rdi),%eax + movzbl 3(%rsi),%r8d +2: + subl %r8d,%eax + POP_FRAME_POINTER + ret END(memcmp) /*