From owner-svn-src-stable-11@freebsd.org Sun Sep 2 10:51:33 2018 Return-Path: Delivered-To: svn-src-stable-11@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 8B3D7FEBDDF; Sun, 2 Sep 2018 10:51:33 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 3F9FB74208; Sun, 2 Sep 2018 10:51:33 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 3A9DE184A4; Sun, 2 Sep 2018 10:51:33 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w82ApXtF012456; Sun, 2 Sep 2018 10:51:33 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w82ApVE0012450; Sun, 2 Sep 2018 10:51:31 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201809021051.w82ApVE0012450@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Sun, 2 Sep 2018 10:51:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r338427 - in stable/11/sys/amd64: amd64 include vmm/intel X-SVN-Group: stable-11 X-SVN-Commit-Author: kib X-SVN-Commit-Paths: in stable/11/sys/amd64: amd64 include vmm/intel X-SVN-Commit-Revision: 338427 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-11@freebsd.org X-Mailman-Version: 2.1.27 Precedence: list List-Id: SVN commit messages for only the 11-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 02 Sep 2018 10:51:33 -0000 Author: kib Date: Sun Sep 2 10:51:31 2018 New Revision: 338427 URL: https://svnweb.freebsd.org/changeset/base/338427 Log: MFC r338068, r338113: Update L1TF workaround to sustain L1D pollution from NMI. Modified: stable/11/sys/amd64/amd64/exception.S stable/11/sys/amd64/amd64/support.S stable/11/sys/amd64/amd64/trap.c stable/11/sys/amd64/include/md_var.h stable/11/sys/amd64/vmm/intel/vmx.c stable/11/sys/amd64/vmm/intel/vmx_support.S Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/amd64/amd64/exception.S ============================================================================== --- stable/11/sys/amd64/amd64/exception.S Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/amd64/exception.S Sun Sep 2 10:51:31 2018 (r338427) @@ -848,7 +848,10 @@ nocallchain: movl %edx,%eax shrq $32,%rdx wrmsr - movq %r13,%cr3 + cmpb $0, nmi_flush_l1d_sw(%rip) + je 2f + call flush_l1d_sw /* bhyve L1TF assist */ +2: movq %r13,%cr3 RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret Modified: stable/11/sys/amd64/amd64/support.S ============================================================================== --- stable/11/sys/amd64/amd64/support.S Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/amd64/support.S Sun Sep 2 10:51:31 2018 (r338427) @@ -892,3 +892,36 @@ ENTRY(handle_ibrs_exit_rs) END(handle_ibrs_exit_rs) .noaltmacro + +/* + * Flush L1D cache. Load enough of the data from the kernel text + * to flush existing L1D content. + * + * N.B. The function does not follow ABI calling conventions, it corrupts %rbx. + * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags + * registers are clobbered. The NMI handler caller only needs %r13 preserved. + */ +ENTRY(flush_l1d_sw) +#define L1D_FLUSH_SIZE (64 * 1024) + movq $KERNBASE, %r9 + movq $-L1D_FLUSH_SIZE, %rcx + /* + * pass 1: Preload TLB. + * Kernel text is mapped using superpages. TLB preload is + * done for the benefit of older CPUs which split 2M page + * into 4k TLB entries. + */ +1: movb L1D_FLUSH_SIZE(%r9, %rcx), %al + addq $PAGE_SIZE, %rcx + jne 1b + xorl %eax, %eax + cpuid + movq $-L1D_FLUSH_SIZE, %rcx + /* pass 2: Read each cache line. */ +2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al + addq $64, %rcx + jne 2b + lfence + ret +#undef L1D_FLUSH_SIZE +END(flush_l1d_sw) Modified: stable/11/sys/amd64/amd64/trap.c ============================================================================== --- stable/11/sys/amd64/amd64/trap.c Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/amd64/trap.c Sun Sep 2 10:51:31 2018 (r338427) @@ -158,6 +158,20 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG "Print debugging information on trap signal to ctty"); /* + * Control L1D flush on return from NMI. + * + * Tunable can be set to the following values: + * 0 - only enable flush on return from NMI if required by vmm.ko (default) + * >1 - always flush on return from NMI. + * + * Post-boot, the sysctl indicates if flushing is currently enabled. + */ +int nmi_flush_l1d_sw; +SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, + &nmi_flush_l1d_sw, 0, + "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); + +/* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this Modified: stable/11/sys/amd64/include/md_var.h ============================================================================== --- stable/11/sys/amd64/include/md_var.h Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/include/md_var.h Sun Sep 2 10:51:31 2018 (r338427) @@ -38,6 +38,7 @@ extern uint64_t *vm_page_dump; extern int hw_lower_amd64_sharedpage; extern int hw_ibrs_disable; extern int hw_ssb_disable; +extern int nmi_flush_l1d_sw; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its Modified: stable/11/sys/amd64/vmm/intel/vmx.c ============================================================================== --- stable/11/sys/amd64/vmm/intel/vmx.c Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/vmm/intel/vmx.c Sun Sep 2 10:51:31 2018 (r338427) @@ -188,8 +188,11 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, static int guest_l1d_flush; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD, &guest_l1d_flush, 0, NULL); +static int guest_l1d_flush_sw; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, + &guest_l1d_flush_sw, 0, NULL); -uint64_t vmx_msr_flush_cmd; +static struct msr_entry msr_load_list[1] __aligned(16); /* * Use the last page below 4GB as the APIC access address. This address is @@ -500,6 +503,9 @@ vmx_cleanup(void) vpid_unr = NULL; } + if (nmi_flush_l1d_sw == 1) + nmi_flush_l1d_sw = 0; + smp_rendezvous(NULL, vmx_disable, NULL, NULL); return (0); @@ -728,11 +734,30 @@ vmx_init(int ipinum) guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0; TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); - if (guest_l1d_flush && - (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0) - vmx_msr_flush_cmd = IA32_FLUSH_CMD_L1D; /* + * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when + * available. Otherwise fall back to the software flush + * method which loads enough data from the kernel text to + * flush existing L1D content, both on VMX entry and on NMI + * return. + */ + if (guest_l1d_flush) { + if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) { + guest_l1d_flush_sw = 1; + TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw", + &guest_l1d_flush_sw); + } + if (guest_l1d_flush_sw) { + if (nmi_flush_l1d_sw <= 1) + nmi_flush_l1d_sw = 1; + } else { + msr_load_list[0].index = MSR_IA32_FLUSH_CMD; + msr_load_list[0].val = IA32_FLUSH_CMD_L1D; + } + } + + /* * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 */ fixed0 = rdmsr(MSR_VMX_CR0_FIXED0); @@ -920,6 +945,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap) error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); error += vmwrite(VMCS_VPID, vpid[i]); + + if (guest_l1d_flush && !guest_l1d_flush_sw) { + vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( + (vm_offset_t)&msr_load_list[0])); + vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, + nitems(msr_load_list)); + vmcs_write(VMCS_EXIT_MSR_STORE, 0); + vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); + } /* exception bitmap */ if (vcpu_trace_exceptions(vm, i)) Modified: stable/11/sys/amd64/vmm/intel/vmx_support.S ============================================================================== --- stable/11/sys/amd64/vmm/intel/vmx_support.S Sat Sep 1 16:16:40 2018 (r338426) +++ stable/11/sys/amd64/vmm/intel/vmx_support.S Sun Sep 2 10:51:31 2018 (r338427) @@ -176,44 +176,10 @@ ENTRY(vmx_enter_guest) jbe invept_error /* Check invept instruction error */ guest_restore: - - /* - * Flush L1D cache if requested. Use IA32_FLUSH_CMD MSR if available, - * otherwise load enough of the data from the zero_region to flush - * existing L1D content. - */ -#define L1D_FLUSH_SIZE (64 * 1024) movl %edx, %r8d - cmpb $0, guest_l1d_flush(%rip) + cmpb $0, guest_l1d_flush_sw(%rip) je after_l1d - movq vmx_msr_flush_cmd(%rip), %rax - testq %rax, %rax - jz 1f - movq %rax, %rdx - shrq $32, %rdx - movl $MSR_IA32_FLUSH_CMD, %ecx - wrmsr - jmp after_l1d -1: movq $KERNBASE, %r9 - movq $-L1D_FLUSH_SIZE, %rcx - /* - * pass 1: Preload TLB. - * Kernel text is mapped using superpages. TLB preload is - * done for the benefit of older CPUs which split 2M page - * into 4k TLB entries. - */ -2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al - addq $PAGE_SIZE, %rcx - jne 2b - xorl %eax, %eax - cpuid - movq $-L1D_FLUSH_SIZE, %rcx - /* pass 2: Read each cache line */ -3: movb L1D_FLUSH_SIZE(%r9, %rcx), %al - addq $64, %rcx - jne 3b - lfence -#undef L1D_FLUSH_SIZE + call flush_l1d_sw after_l1d: cmpl $0, %r8d je do_launch