Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Aug 2018 09:02:02 +0000 (UTC)
From:      =?UTF-8?Q?Roger_Pau_Monn=c3=a9?= <royger@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r477316 - in head: emulators/xen-kernel411 emulators/xen-kernel411/files sysutils/xen-tools411 sysutils/xen-tools411/files
Message-ID:  <201808160902.w7G922kJ047574@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: royger (src committer)
Date: Thu Aug 16 09:02:02 2018
New Revision: 477316
URL: https://svnweb.freebsd.org/changeset/ports/477316

Log:
  xen411: apply fixes for XSA-269, XSA-272 and XSA-273

Added:
  head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0011-x86-possibly-bring-up-all-CPUs-even-if-not-all-are-s.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0012-x86-command-line-option-to-avoid-use-of-secondary-hy.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0013-x86-vmx-Don-t-clobber-dr6-while-debugging-state-is-l.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0014-x86-xstate-Use-a-guests-CPUID-policy-rather-than-all.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0015-x86-xstate-Make-errors-in-xstate-calculations-more-o.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0016-x86-hvm-Disallow-unknown-MSR_EFER-bits.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0017-x86-spec-ctrl-Fix-the-parsing-of-xpti-on-fixed-Intel.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0018-x86-spec-ctrl-Yet-more-fixes-for-xpti-parsing.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0019-x86-vmx-Fix-handing-of-MSR_DEBUGCTL-on-VMExit.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0020-x86-vmx-Defer-vmx_vmcs_exit-as-long-as-possible-in-c.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0021-x86-vmx-API-improvements-for-MSR-load-save-infrastru.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0022-x86-vmx-Internal-cleanup-for-MSR-load-save-infrastru.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0023-x86-vmx-Factor-locate_msr_entry-out-of-vmx_find_msr-.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0024-x86-vmx-Support-remote-access-to-the-MSR-lists.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0025-x86-vmx-Improvements-to-LBR-MSR-handling.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0026-x86-vmx-Pass-an-MSR-value-into-vmx_msr_add.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0027-x86-vmx-Support-load-only-guest-MSR-list-entries.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0028-VMX-fix-vmx_-find-del-_msr-build.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0029-ARM-disable-grant-table-v2.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0030-x86-vtx-Fix-the-checking-for-unknown-invalid-MSR_DEB.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0032-x86-spec-ctrl-Calculate-safe-PTE-addresses-for-L1TF-.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0033-x86-spec-ctrl-Introduce-an-option-to-control-L1TF-mi.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0034-x86-shadow-Infrastructure-to-force-a-PV-guest-into-s.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0035-x86-mm-Plumbing-to-allow-any-PTE-update-to-fail-with.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0036-x86-pv-Force-a-guest-into-shadow-mode-when-it-writes.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0037-x86-spec-ctrl-CPUID-MSR-definitions-for-L1D_FLUSH.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0038-x86-msr-Virtualise-MSR_FLUSH_CMD-for-guests.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0039-x86-spec-ctrl-Introduce-an-option-to-control-L1D_FLU.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0040-x86-Make-spec-ctrl-no-a-global-disable-of-all-mitiga.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch   (contents, props changed)
  head/sysutils/xen-tools411/files/0031-tools-oxenstored-Make-evaluation-order-explicit.patch   (contents, props changed)
  head/sysutils/xen-tools411/files/0041-xl.conf-Add-global-affinity-masks.patch   (contents, props changed)
Modified:
  head/emulators/xen-kernel411/Makefile
  head/sysutils/xen-tools411/Makefile

Modified: head/emulators/xen-kernel411/Makefile
==============================================================================
--- head/emulators/xen-kernel411/Makefile	Thu Aug 16 08:56:17 2018	(r477315)
+++ head/emulators/xen-kernel411/Makefile	Thu Aug 16 09:02:02 2018	(r477316)
@@ -2,7 +2,7 @@
 
 PORTNAME=	xen
 PORTVERSION=	4.11.0
-PORTREVISION=	0
+PORTREVISION=	1
 CATEGORIES=	emulators
 MASTER_SITES=	http://downloads.xenproject.org/release/xen/${PORTVERSION}/
 PKGNAMESUFFIX=	-kernel411
@@ -47,6 +47,49 @@ EXTRA_PATCHES+=	${FILESDIR}/0001-x86-replace-usage-in-
 		${FILESDIR}/0002-x86-efi-split-compiler-vs-linker-support.patch:-p1
 # Fix PVH Dom0 build with shadow paging
 EXTRA_PATCHES+= ${FILESDIR}/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch:-p1
+# XSA-269 (MSR_DEBUGCTL handling) and XSA-273 (L1TF)
+# Note that due to the high value of patches needed to fix L1TF the package is
+# brought up to the state of the staging-4.11 branch. This can be removed when
+# 4.11.1 is released.
+EXTRA_PATCHES+= ${FILESDIR}/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch:-p1 \
+		${FILESDIR}/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch:-p1 \
+		${FILESDIR}/0003-x86-spec-ctrl-command-line-handling-adjustments.patch:-p1 \
+		${FILESDIR}/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch:-p1 \
+		${FILESDIR}/0006-allow-cpu_down-to-be-called-earlier.patch:-p1 \
+		${FILESDIR}/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch:-p1 \
+		${FILESDIR}/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch:-p1 \
+		${FILESDIR}/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch:-p1 \
+		${FILESDIR}/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch:-p1 \
+		${FILESDIR}/0011-x86-possibly-bring-up-all-CPUs-even-if-not-all-are-s.patch:-p1 \
+		${FILESDIR}/0012-x86-command-line-option-to-avoid-use-of-secondary-hy.patch:-p1 \
+		${FILESDIR}/0013-x86-vmx-Don-t-clobber-dr6-while-debugging-state-is-l.patch:-p1 \
+		${FILESDIR}/0014-x86-xstate-Use-a-guests-CPUID-policy-rather-than-all.patch:-p1 \
+		${FILESDIR}/0015-x86-xstate-Make-errors-in-xstate-calculations-more-o.patch:-p1 \
+		${FILESDIR}/0016-x86-hvm-Disallow-unknown-MSR_EFER-bits.patch:-p1 \
+		${FILESDIR}/0017-x86-spec-ctrl-Fix-the-parsing-of-xpti-on-fixed-Intel.patch:-p1 \
+		${FILESDIR}/0018-x86-spec-ctrl-Yet-more-fixes-for-xpti-parsing.patch:-p1 \
+		${FILESDIR}/0019-x86-vmx-Fix-handing-of-MSR_DEBUGCTL-on-VMExit.patch:-p1 \
+		${FILESDIR}/0020-x86-vmx-Defer-vmx_vmcs_exit-as-long-as-possible-in-c.patch:-p1 \
+		${FILESDIR}/0021-x86-vmx-API-improvements-for-MSR-load-save-infrastru.patch:-p1 \
+		${FILESDIR}/0022-x86-vmx-Internal-cleanup-for-MSR-load-save-infrastru.patch:-p1 \
+		${FILESDIR}/0023-x86-vmx-Factor-locate_msr_entry-out-of-vmx_find_msr-.patch:-p1 \
+		${FILESDIR}/0024-x86-vmx-Support-remote-access-to-the-MSR-lists.patch:-p1 \
+		${FILESDIR}/0025-x86-vmx-Improvements-to-LBR-MSR-handling.patch:-p1 \
+		${FILESDIR}/0026-x86-vmx-Pass-an-MSR-value-into-vmx_msr_add.patch:-p1 \
+		${FILESDIR}/0027-x86-vmx-Support-load-only-guest-MSR-list-entries.patch:-p1 \
+		${FILESDIR}/0028-VMX-fix-vmx_-find-del-_msr-build.patch:-p1 \
+		${FILESDIR}/0029-ARM-disable-grant-table-v2.patch:-p1 \
+		${FILESDIR}/0030-x86-vtx-Fix-the-checking-for-unknown-invalid-MSR_DEB.patch:-p1 \
+		${FILESDIR}/0032-x86-spec-ctrl-Calculate-safe-PTE-addresses-for-L1TF-.patch:-p1 \
+		${FILESDIR}/0033-x86-spec-ctrl-Introduce-an-option-to-control-L1TF-mi.patch:-p1 \
+		${FILESDIR}/0034-x86-shadow-Infrastructure-to-force-a-PV-guest-into-s.patch:-p1 \
+		${FILESDIR}/0035-x86-mm-Plumbing-to-allow-any-PTE-update-to-fail-with.patch:-p1 \
+		${FILESDIR}/0036-x86-pv-Force-a-guest-into-shadow-mode-when-it-writes.patch:-p1 \
+		${FILESDIR}/0037-x86-spec-ctrl-CPUID-MSR-definitions-for-L1D_FLUSH.patch:-p1 \
+		${FILESDIR}/0038-x86-msr-Virtualise-MSR_FLUSH_CMD-for-guests.patch:-p1 \
+		${FILESDIR}/0039-x86-spec-ctrl-Introduce-an-option-to-control-L1D_FLU.patch:-p1 \
+		${FILESDIR}/0040-x86-Make-spec-ctrl-no-a-global-disable-of-all-mitiga.patch:-p1 \
+		${FILESDIR}/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch:-p1
 
 .include <bsd.port.options.mk>
 

Added: head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,213 @@
+From e932371d6ae0f69b89abb2dce725483c75356de2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 30 Jul 2018 11:17:27 +0200
+Subject: [PATCH 01/42] xen: Port the array_index_nospec() infrastructure from
+ Linux
+
+This is as the infrastructure appeared in Linux 4.17, adapted slightly for
+Xen.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: 2ddfae51d8b1d7b8cd33a4f6ad4d16d27cb869ae
+master date: 2018-07-06 16:49:57 +0100
+---
+ xen/include/asm-arm/arm32/system.h | 18 ++++++++
+ xen/include/asm-arm/arm64/system.h | 22 ++++++++++
+ xen/include/asm-x86/system.h       | 24 ++++++++++
+ xen/include/xen/compiler.h         |  3 ++
+ xen/include/xen/nospec.h           | 70 ++++++++++++++++++++++++++++++
+ 5 files changed, 137 insertions(+)
+ create mode 100644 xen/include/xen/nospec.h
+
+diff --git a/xen/include/asm-arm/arm32/system.h b/xen/include/asm-arm/arm32/system.h
+index c617b40438..ab57abfbc5 100644
+--- a/xen/include/asm-arm/arm32/system.h
++++ b/xen/include/asm-arm/arm32/system.h
+@@ -48,6 +48,24 @@ static inline int local_fiq_is_enabled(void)
+     return !(flags & PSR_FIQ_MASK);
+ }
+ 
++#define CSDB    ".inst  0xe320f014"
++
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++                                                    unsigned long sz)
++{
++    unsigned long mask;
++
++    asm volatile( "cmp    %1, %2\n"
++                  "sbc    %0, %1, %1\n"
++                  CSDB
++                  : "=r" (mask)
++                  : "r" (idx), "Ir" (sz)
++                  : "cc" );
++
++    return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+  * Local variables:
+diff --git a/xen/include/asm-arm/arm64/system.h b/xen/include/asm-arm/arm64/system.h
+index 2e2ee212a1..2e36573ac6 100644
+--- a/xen/include/asm-arm/arm64/system.h
++++ b/xen/include/asm-arm/arm64/system.h
+@@ -58,6 +58,28 @@ static inline int local_fiq_is_enabled(void)
+     return !(flags & PSR_FIQ_MASK);
+ }
+ 
++#define csdb()  asm volatile ( "hint #20" : : : "memory" )
++
++/*
++ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
++ * and 0 otherwise.
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++                                                    unsigned long sz)
++{
++    unsigned long mask;
++
++    asm volatile ( "cmp     %1, %2\n"
++                   "sbc     %0, xzr, xzr\n"
++                   : "=r" (mask)
++                   : "r" (idx), "Ir" (sz)
++                   : "cc" );
++    csdb();
++
++    return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+  * Local variables:
+diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
+index 43fb6fe489..483cd20afd 100644
+--- a/xen/include/asm-x86/system.h
++++ b/xen/include/asm-x86/system.h
+@@ -221,6 +221,30 @@ static always_inline unsigned long __xadd(
+ #define set_mb(var, value) do { xchg(&var, value); } while (0)
+ #define set_wmb(var, value) do { var = value; smp_wmb(); } while (0)
+ 
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ *      bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ *     0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++                                                    unsigned long size)
++{
++    unsigned long mask;
++
++    asm volatile ( "cmp %[size], %[index]; sbb %[mask], %[mask];"
++                   : [mask] "=r" (mask)
++                   : [size] "g" (size), [index] "r" (index) );
++
++    return mask;
++}
++
++/* Override default implementation in nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
+ #define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
+ 
+diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
+index 533a8ea0f3..a7e05681c9 100644
+--- a/xen/include/xen/compiler.h
++++ b/xen/include/xen/compiler.h
+@@ -81,6 +81,9 @@
+ #pragma GCC visibility push(hidden)
+ #endif
+ 
++/* Make the optimizer believe the variable can be manipulated arbitrarily. */
++#define OPTIMIZER_HIDE_VAR(var) __asm__ ( "" : "+g" (var) )
++
+ /* This macro obfuscates arithmetic on a variable address so that gcc
+    shouldn't recognize the original var, and make assumptions about it */
+ /*
+diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
+new file mode 100644
+index 0000000000..48793996e8
+--- /dev/null
++++ b/xen/include/xen/nospec.h
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2018 Linus Torvalds. All rights reserved. */
++/* Copyright(c) 2018 Alexei Starovoitov. All rights reserved. */
++/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
++/* Copyright(c) 2018 Citrix Systems R&D Ltd. All rights reserved. */
++
++#ifndef XEN_NOSPEC_H
++#define XEN_NOSPEC_H
++
++#include <asm/system.h>
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set.  Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++                                                    unsigned long size)
++{
++    /*
++     * Always calculate and emit the mask even if the compiler
++     * thinks the mask is not needed. The compiler does not take
++     * into account the value of @index under speculation.
++     */
++    OPTIMIZER_HIDE_VAR(index);
++    return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ *     if (index < size) {
++ *         index = array_index_nospec(index, size);
++ *         val = array[index];
++ *     }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size)                                 \
++({                                                                      \
++    typeof(index) _i = (index);                                         \
++    typeof(size) _s = (size);                                           \
++    unsigned long _mask = array_index_mask_nospec(_i, _s);              \
++                                                                        \
++    BUILD_BUG_ON(sizeof(_i) > sizeof(long));                            \
++    BUILD_BUG_ON(sizeof(_s) > sizeof(long));                            \
++                                                                        \
++    (typeof(_i)) (_i & _mask);                                          \
++})
++
++#endif /* XEN_NOSPEC_H */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,51 @@
+From da33530ab393dcc04d3e35424956277669b8d8ce Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:18:54 +0200
+Subject: [PATCH 02/42] x86: correctly set nonlazy_xstate_used when loading
+ full state
+
+In this case, just like xcr0_accum, nonlazy_xstate_used should always be
+set to the intended new value, rather than possibly leaving the flag set
+from a prior state load.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: f46bf0e101ca63118b9db2616e8f51e972d7f563
+master date: 2018-07-09 10:51:02 +0200
+---
+ xen/arch/x86/domctl.c  | 3 +--
+ xen/arch/x86/hvm/hvm.c | 3 +--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
+index 8fbbf3aeb3..b04388d663 100644
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -1187,8 +1187,7 @@ long arch_do_domctl(
+                 vcpu_pause(v);
+                 v->arch.xcr0 = _xcr0;
+                 v->arch.xcr0_accum = _xcr0_accum;
+-                if ( _xcr0_accum & XSTATE_NONLAZY )
+-                    v->arch.nonlazy_xstate_used = 1;
++                v->arch.nonlazy_xstate_used = _xcr0_accum & XSTATE_NONLAZY;
+                 compress_xsave_states(v, _xsave_area,
+                                       evc->size - PV_XSAVE_HDR_SIZE);
+                 vcpu_unpause(v);
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index c23983cdff..279cb88e45 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -1324,8 +1324,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
+ 
+     v->arch.xcr0 = ctxt->xcr0;
+     v->arch.xcr0_accum = ctxt->xcr0_accum;
+-    if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
+-        v->arch.nonlazy_xstate_used = 1;
++    v->arch.nonlazy_xstate_used = ctxt->xcr0_accum & XSTATE_NONLAZY;
+     compress_xsave_states(v, &ctxt->save_area,
+                           size - offsetof(struct hvm_hw_cpu_xsave, save_area));
+ 
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,45 @@
+From 4bdeedbd611c59f07878eb22955f655a81452835 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:19:41 +0200
+Subject: [PATCH 03/42] x86/spec-ctrl: command line handling adjustments
+
+For one, "no-xen" should not imply "no-eager-fpu", as "eager FPU" mode
+is to guard guests, not Xen itself, which is also expressed so by
+print_details().
+
+And then opt_ssbd, despite being off by default, should also be cleared
+by the "no" and "no-xen" sub-options.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: ac3f9a72141a48d40fabfff561d5a7dc0e1b810d
+master date: 2018-07-10 12:22:31 +0200
+---
+ xen/arch/x86/spec_ctrl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 08e6784c4c..73dc7170c7 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -124,6 +124,8 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_msr_sc_pv = false;
+             opt_msr_sc_hvm = false;
+ 
++            opt_eager_fpu = 0;
++
+         disable_common:
+             opt_rsb_pv = false;
+             opt_rsb_hvm = false;
+@@ -131,7 +133,7 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_thunk = THUNK_JMP;
+             opt_ibrs = 0;
+             opt_ibpb = false;
+-            opt_eager_fpu = 0;
++            opt_ssbd = false;
+         }
+         else if ( val > 0 )
+             rc = -EINVAL;
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,66 @@
+From ac35e050b64a565fe234dd42e8dac163e946e58d Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli@citrix.com>
+Date: Mon, 30 Jul 2018 11:21:28 +0200
+Subject: [PATCH 05/42] mm/page_alloc: correct first_dirty calculations during
+ block merging
+
+Currently it's possible to hit an assertion in alloc_heap_pages():
+
+Assertion 'first_dirty != INVALID_DIRTY_IDX || !(pg[i].count_info & PGC_need_scrub)' failed at page_alloc.c:988
+
+This can happen because a piece of logic to calculate first_dirty
+during block merging in free_heap_pages() is missing for the following
+scenario:
+
+1. Current block's first_dirty equals to INVALID_DIRTY_IDX
+2. Successor block is free but its first_dirty != INVALID_DIRTY_IDX
+3. The successor is merged into current block
+4. Current block's first_dirty still equals to INVALID_DIRTY_IDX
+
+This will trigger the assertion during allocation of such block in
+alloc_heap_pages() because there will be pages with PGC_need_scrub
+bit set despite the claim of first_dirty that the block is scrubbed.
+
+Add the missing piece of logic and slightly update the comment for
+the predecessor case to better capture the code's intent.
+
+Fixes 1a37f33ea613 ("mm: Place unscrubbed pages at the end of pagelist")
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+master commit: 1e2df9608857b5355f2ec3b1a34b87a2007dcd16
+master date: 2018-07-12 10:45:11 +0200
+---
+ xen/common/page_alloc.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
+index 20ee1e4897..02aeed7c47 100644
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -1426,7 +1426,7 @@ static void free_heap_pages(
+ 
+             page_list_del(predecessor, &heap(node, zone, order));
+ 
+-            /* Keep predecessor's first_dirty if it is already set. */
++            /* Update predecessor's first_dirty if necessary. */
+             if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
+                  pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+                 predecessor->u.free.first_dirty = (1U << order) +
+@@ -1447,6 +1447,12 @@ static void free_heap_pages(
+ 
+             check_and_stop_scrub(successor);
+ 
++            /* Update pg's first_dirty if necessary. */
++            if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX &&
++                 successor->u.free.first_dirty != INVALID_DIRTY_IDX )
++                pg->u.free.first_dirty = (1U << order) +
++                                         successor->u.free.first_dirty;
++
+             page_list_del(successor, &heap(node, zone, order));
+         }
+ 
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,58 @@
+From a44cf0c8728e08858638170a057675ca5479fdc7 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:22:06 +0200
+Subject: [PATCH 06/42] allow cpu_down() to be called earlier
+
+The function's use of the stop-machine logic has so far prevented its
+use ahead of the processing of the "ordinary" initcalls. Since at this
+early time we're in a controlled environment anyway, there's no need for
+such a heavy tool. Additionally this ought to have less of a performance
+impact especially on large systems, compared to the alternative of
+making stop-machine functionality available earlier.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 5894c0a2da66243a89088d309c7e1ea212ab28d6
+master date: 2018-07-16 15:15:12 +0200
+---
+ xen/common/cpu.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/xen/common/cpu.c b/xen/common/cpu.c
+index 6350f150bd..653a56b840 100644
+--- a/xen/common/cpu.c
++++ b/xen/common/cpu.c
+@@ -67,12 +67,17 @@ void __init register_cpu_notifier(struct notifier_block *nb)
+     spin_unlock(&cpu_add_remove_lock);
+ }
+ 
+-static int take_cpu_down(void *unused)
++static void _take_cpu_down(void *unused)
+ {
+     void *hcpu = (void *)(long)smp_processor_id();
+     int notifier_rc = notifier_call_chain(&cpu_chain, CPU_DYING, hcpu, NULL);
+     BUG_ON(notifier_rc != NOTIFY_DONE);
+     __cpu_disable();
++}
++
++static int take_cpu_down(void *arg)
++{
++    _take_cpu_down(arg);
+     return 0;
+ }
+ 
+@@ -98,7 +103,9 @@ int cpu_down(unsigned int cpu)
+         goto fail;
+     }
+ 
+-    if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
++    if ( unlikely(system_state < SYS_STATE_active) )
++        on_selected_cpus(cpumask_of(cpu), _take_cpu_down, NULL, true);
++    else if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
+         goto fail;
+ 
+     __cpu_die(cpu);
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,109 @@
+From b53e0defcea1400c03f83d1d5cc30a3b237c8cfe Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 30 Jul 2018 11:22:42 +0200
+Subject: [PATCH 07/42] x86/svm Fixes and cleanup to svm_inject_event()
+
+ * State adjustments (and debug tracing) for #DB/#BP/#PF should not be done
+   for `int $n` instructions.  Updates to %cr2 occur even if the exception
+   combines to #DF.
+ * Don't opencode DR_STEP when updating %dr6.
+ * Simplify the logic for calling svm_emul_swint_injection() as in the common
+   case, every condition needs checking.
+ * Fix comments which have become stale as code has moved between components.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+master commit: 8dab867c81ede455009028a9a88edc4ff3b9da88
+master date: 2018-07-17 10:12:40 +0100
+---
+ xen/arch/x86/hvm/svm/svm.c | 41 ++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 24 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index 165500e3f2..b964c59dad 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1432,24 +1432,18 @@ static void svm_inject_event(const struct x86_event *event)
+      * Xen must emulate enough of the event injection to be sure that a
+      * further fault shouldn't occur during delivery.  This covers the fact
+      * that hardware doesn't perform DPL checking on injection.
+-     *
+-     * Also, it accounts for proper positioning of %rip for an event with trap
+-     * semantics (where %rip should point after the instruction) which suffers
+-     * a fault during injection (at which point %rip should point at the
+-     * instruction).
+      */
+     if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION ||
+-         (!cpu_has_svm_nrips && (event->type == X86_EVENTTYPE_SW_INTERRUPT ||
+-                                 event->type == X86_EVENTTYPE_SW_EXCEPTION)) )
++         (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
+         svm_emul_swint_injection(&_event);
+ 
+-    switch ( _event.vector )
++    switch ( _event.vector | -(_event.type == X86_EVENTTYPE_SW_INTERRUPT) )
+     {
+     case TRAP_debug:
+         if ( regs->eflags & X86_EFLAGS_TF )
+         {
+             __restore_debug_registers(vmcb, curr);
+-            vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | 0x4000);
++            vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | DR_STEP);
+         }
+         /* fall through */
+     case TRAP_int3:
+@@ -1459,6 +1453,13 @@ static void svm_inject_event(const struct x86_event *event)
+             domain_pause_for_debugger();
+             return;
+         }
++        break;
++
++    case TRAP_page_fault:
++        ASSERT(_event.type == X86_EVENTTYPE_HW_EXCEPTION);
++        curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
++        vmcb_set_cr2(vmcb, _event.cr2);
++        break;
+     }
+ 
+     if ( unlikely(eventinj.fields.v) &&
+@@ -1481,13 +1482,9 @@ static void svm_inject_event(const struct x86_event *event)
+      * icebp, software events with trap semantics need emulating, so %rip in
+      * the trap frame points after the instruction.
+      *
+-     * The x86 emulator (if requested by the x86_swint_emulate_* choice) will
+-     * have performed checks such as presence/dpl/etc and believes that the
+-     * event injection will succeed without faulting.
+-     *
+-     * The x86 emulator will always provide fault semantics for software
+-     * events, with _trap.insn_len set appropriately.  If the injection
+-     * requires emulation, move %rip forwards at this point.
++     * svm_emul_swint_injection() has already confirmed that events with trap
++     * semantics won't fault on injection.  Position %rip/NextRIP suitably,
++     * and restrict the event type to what hardware will tolerate.
+      */
+     switch ( _event.type )
+     {
+@@ -1544,16 +1541,12 @@ static void svm_inject_event(const struct x86_event *event)
+            eventinj.fields.errorcode == (uint16_t)eventinj.fields.errorcode);
+     vmcb->eventinj = eventinj;
+ 
+-    if ( _event.vector == TRAP_page_fault )
+-    {
+-        curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
+-        vmcb_set_cr2(vmcb, _event.cr2);
+-        HVMTRACE_LONG_2D(PF_INJECT, _event.error_code, TRC_PAR_LONG(_event.cr2));
+-    }
++    if ( _event.vector == TRAP_page_fault &&
++         _event.type == X86_EVENTTYPE_HW_EXCEPTION )
++        HVMTRACE_LONG_2D(PF_INJECT, _event.error_code,
++                         TRC_PAR_LONG(_event.cr2));
+     else
+-    {
+         HVMTRACE_2D(INJ_EXC, _event.vector, _event.error_code);
+-    }
+ }
+ 
+ static int svm_event_pending(struct vcpu *v)
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,55 @@
+From 0a2016ca2fabfe674c311dcfd8e15fec0ba3f7b6 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:23:22 +0200
+Subject: [PATCH 08/42] cpupools: fix state when downing a CPU failed
+
+While I've run into the issue with further patches in place which no
+longer guarantee the per-CPU area to start out as all zeros, the
+CPU_DOWN_FAILED processing looks to have the same issue: By not zapping
+the per-CPU cpupool pointer, cpupool_cpu_add()'s (indirect) invocation
+of schedule_cpu_switch() will trigger the "c != old_pool" assertion
+there.
+
+Clearing the field during CPU_DOWN_PREPARE is too early (afaict this
+should not happen before cpu_disable_scheduler()). Clearing it in
+CPU_DEAD and CPU_DOWN_FAILED would be an option, but would take the same
+piece of code twice. Since the field's value shouldn't matter while the
+CPU is offline, simply clear it (implicitly) for CPU_ONLINE and
+CPU_DOWN_FAILED, but only for other than the suspend/resume case (which
+gets specially handled in cpupool_cpu_remove()).
+
+By adjusting the conditional in cpupool_cpu_add() CPU_DOWN_FAILED
+handling in the suspend case should now also be handled better.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+master commit: cb1ae9a27819cea0c5008773c68a7be6f37eb0e5
+master date: 2018-07-19 09:41:55 +0200
+---
+ xen/common/cpupool.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
+index 999839444e..1e8edcbd57 100644
+--- a/xen/common/cpupool.c
++++ b/xen/common/cpupool.c
+@@ -490,7 +490,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+     cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
+     cpumask_set_cpu(cpu, &cpupool_free_cpus);
+ 
+-    if ( system_state == SYS_STATE_resume )
++    if ( system_state == SYS_STATE_suspend || system_state == SYS_STATE_resume )
+     {
+         struct cpupool **c;
+ 
+@@ -522,6 +522,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+          * (or unplugging would have failed) and that is the default behavior
+          * anyway.
+          */
++        per_cpu(cpupool, cpu) = NULL;
+         ret = cpupool_assign_cpu_locked(cpupool0, cpu);
+     }
+  out:
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,121 @@
+From bd51a6424202a5f1cd13dee6614bcb69ecbd2458 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:24:01 +0200
+Subject: [PATCH 09/42] x86/AMD: distinguish compute units from hyper-threads
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fam17 replaces CUs by HTs, which we should reflect accordingly, even if
+the difference is not very big. The most relevant change (requiring some
+code restructuring) is that the topoext feature no longer means there is
+a valid CU ID.
+
+Take the opportunity and convert wrongly plain int variables in
+set_cpu_sibling_map() to unsigned int.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Brian Woods <brian.woods@amd.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 9429b07a0af7f92a5f25e4068e11db881e157495
+master date: 2018-07-19 09:42:42 +0200
+---
+ xen/arch/x86/cpu/amd.c | 16 +++++++++++-----
+ xen/arch/x86/smpboot.c | 32 ++++++++++++++++++++------------
+ 2 files changed, 31 insertions(+), 17 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 458a3fe60c..76078b55b2 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -505,17 +505,23 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
+                 u32 eax, ebx, ecx, edx;
+ 
+                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+-                c->compute_unit_id = ebx & 0xFF;
+                 c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1;
++
++                if (c->x86 < 0x17)
++                        c->compute_unit_id = ebx & 0xFF;
++                else {
++                        c->cpu_core_id = ebx & 0xFF;
++                        c->x86_max_cores /= c->x86_num_siblings;
++                }
+         }
+         
+         if (opt_cpu_info)
+                 printk("CPU %d(%d) -> Processor %d, %s %d\n",
+                        cpu, c->x86_max_cores, c->phys_proc_id,
+-                       cpu_has(c, X86_FEATURE_TOPOEXT) ? "Compute Unit" : 
+-                                                         "Core",
+-                       cpu_has(c, X86_FEATURE_TOPOEXT) ? c->compute_unit_id :
+-                                                         c->cpu_core_id);
++                       c->compute_unit_id != INVALID_CUID ? "Compute Unit"
++                                                          : "Core",
++                       c->compute_unit_id != INVALID_CUID ? c->compute_unit_id
++                                                          : c->cpu_core_id);
+ }
+ 
+ static void early_init_amd(struct cpuinfo_x86 *c)
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index d4478e6132..78ba73578a 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -234,33 +234,41 @@ static void link_thread_siblings(int cpu1, int cpu2)
+     cpumask_set_cpu(cpu2, per_cpu(cpu_core_mask, cpu1));
+ }
+ 
+-static void set_cpu_sibling_map(int cpu)
++static void set_cpu_sibling_map(unsigned int cpu)
+ {
+-    int i;
++    unsigned int i;
+     struct cpuinfo_x86 *c = cpu_data;
+ 
+     cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+ 
+     cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
++    cpumask_set_cpu(cpu, per_cpu(cpu_core_mask, cpu));
++    cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+ 
+     if ( c[cpu].x86_num_siblings > 1 )
+     {
+         for_each_cpu ( i, &cpu_sibling_setup_map )
+         {
+-            if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) {
+-                if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+-                     (c[cpu].compute_unit_id == c[i].compute_unit_id) )
++            if ( cpu == i || c[cpu].phys_proc_id != c[i].phys_proc_id )
++                continue;
++            if ( c[cpu].compute_unit_id != INVALID_CUID &&
++                 c[i].compute_unit_id != INVALID_CUID )
++            {
++                if ( c[cpu].compute_unit_id == c[i].compute_unit_id )
++                    link_thread_siblings(cpu, i);
++            }
++            else if ( c[cpu].cpu_core_id != XEN_INVALID_CORE_ID &&
++                      c[i].cpu_core_id != XEN_INVALID_CORE_ID )
++            {
++                if ( c[cpu].cpu_core_id == c[i].cpu_core_id )
+                     link_thread_siblings(cpu, i);
+-            } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+-                        (c[cpu].cpu_core_id == c[i].cpu_core_id) ) {
+-                link_thread_siblings(cpu, i);
+             }
++            else
++                printk(XENLOG_WARNING
++                       "CPU%u: unclear relationship with CPU%u\n",
++                       cpu, i);
+         }
+     }
+-    else
+-    {
+-        cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+-    }
+ 
+     if ( c[cpu].x86_max_cores == 1 )
+     {
+-- 
+2.18.0
+

Added: head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch	Thu Aug 16 09:02:02 2018	(r477316)
@@ -0,0 +1,423 @@
+From 5908b4866b682d9189c36eddf7c898fd95b27ec1 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 30 Jul 2018 11:24:53 +0200
+Subject: [PATCH 10/42] x86: distinguish CPU offlining from CPU removal
+
+In order to be able to service #MC on offlined CPUs, the GDT, IDT,
+stack, and per-CPU data (which includes the TSS) need to be kept
+allocated. They should only be freed upon CPU removal (which we
+currently don't support, so some code is becoming effectively dead for
+the moment).
+
+Note that for now park_offline_cpus doesn't get set to true anywhere -
+this is going to be the subject of a subsequent patch.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 2e6c8f182c9c50129b1c7a620242861e6ad6a9fb
+master date: 2018-07-19 13:43:33 +0100
+---
+ xen/arch/x86/cpu/mcheck/mce.c | 15 ++++++--
+ xen/arch/x86/domain.c         |  9 +++--
+ xen/arch/x86/genapic/x2apic.c |  9 +++--
+ xen/arch/x86/percpu.c         |  9 +++--
+ xen/arch/x86/smpboot.c        | 71 ++++++++++++++++++++++-------------
+ xen/include/asm-x86/smp.h     |  2 +
+ xen/include/xen/cpu.h         |  2 +
+ xen/include/xen/cpumask.h     | 23 ++++++++++++
+ xen/include/xen/mm.h          |  8 ++++
+ xen/include/xen/xmalloc.h     |  6 +++
+ 10 files changed, 115 insertions(+), 39 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
+index a8c287d124..32273d9208 100644
+--- a/xen/arch/x86/cpu/mcheck/mce.c
++++ b/xen/arch/x86/cpu/mcheck/mce.c
+@@ -692,12 +692,15 @@ static void cpu_bank_free(unsigned int cpu)
+ 
+     mcabanks_free(poll);
+     mcabanks_free(clr);
++
++    per_cpu(poll_bankmask, cpu) = NULL;
++    per_cpu(mce_clear_banks, cpu) = NULL;
+ }
+ 
+ static int cpu_bank_alloc(unsigned int cpu)
+ {
+-    struct mca_banks *poll = mcabanks_alloc();
+-    struct mca_banks *clr = mcabanks_alloc();
++    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc();
++    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc();
+ 
+     if ( !poll || !clr )
+     {
+@@ -725,7 +728,13 @@ static int cpu_callback(
+ 
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
+-        cpu_bank_free(cpu);
++        if ( !park_offline_cpus )
++            cpu_bank_free(cpu);
++        break;
++
++    case CPU_REMOVE:
++        if ( park_offline_cpus )
++            cpu_bank_free(cpu);
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 9850a782ec..c39cf2c6e5 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -107,10 +107,11 @@ static void play_dead(void)
+     local_irq_disable();
+ 
+     /*
+-     * NOTE: After cpu_exit_clear, per-cpu variables are no longer accessible,
+-     * as they may be freed at any time. In this case, heap corruption or
+-     * #PF can occur (when heap debugging is enabled). For example, even
+-     * printk() can involve tasklet scheduling, which touches per-cpu vars.
++     * NOTE: After cpu_exit_clear, per-cpu variables may no longer accessible,
++     * as they may be freed at any time if offline CPUs don't get parked. In
++     * this case, heap corruption or #PF can occur (when heap debugging is
++     * enabled). For example, even printk() can involve tasklet scheduling,
++     * which touches per-cpu vars.
+      * 
+      * Consider very carefully when adding code to *dead_idle. Most hypervisor
+      * subsystems are unsafe to call.
+diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
+index 4779b0d0d5..d997806272 100644
+--- a/xen/arch/x86/genapic/x2apic.c
++++ b/xen/arch/x86/genapic/x2apic.c
+@@ -201,18 +201,21 @@ static int update_clusterinfo(
+         if ( !cluster_cpus_spare )
+             cluster_cpus_spare = xzalloc(cpumask_t);
+         if ( !cluster_cpus_spare ||
+-             !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
++             !cond_alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
+             err = -ENOMEM;
+         break;
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
++    case CPU_REMOVE:
++        if ( park_offline_cpus == (action != CPU_REMOVE) )
++            break;
+         if ( per_cpu(cluster_cpus, cpu) )
+         {
+             cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu));
+             if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) )
+-                xfree(per_cpu(cluster_cpus, cpu));
++                XFREE(per_cpu(cluster_cpus, cpu));
+         }
+-        free_cpumask_var(per_cpu(scratch_mask, cpu));
++        FREE_CPUMASK_VAR(per_cpu(scratch_mask, cpu));
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c
+index c9997b7937..8be4ebddf4 100644
+--- a/xen/arch/x86/percpu.c
++++ b/xen/arch/x86/percpu.c
+@@ -28,7 +28,7 @@ static int init_percpu_area(unsigned int cpu)
+     char *p;
+ 
+     if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA )
+-        return -EBUSY;
++        return 0;
+ 
+     if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL )
+         return -ENOMEM;
+@@ -76,9 +76,12 @@ static int cpu_percpu_callback(
+         break;
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
+-        free_percpu_area(cpu);
++        if ( !park_offline_cpus )
++            free_percpu_area(cpu);
+         break;
+-    default:
++    case CPU_REMOVE:
++        if ( park_offline_cpus )
++            free_percpu_area(cpu);
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 78ba73578a..7e76cc3d68 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -63,6 +63,8 @@ static cpumask_t scratch_cpu0mask;
+ cpumask_t cpu_online_map __read_mostly;
+ EXPORT_SYMBOL(cpu_online_map);
+ 
++bool __read_mostly park_offline_cpus;
++
+ unsigned int __read_mostly nr_sockets;
+ cpumask_t **__read_mostly socket_cpumask;
+ static cpumask_t *secondary_socket_cpumask;
+@@ -895,7 +897,14 @@ static void cleanup_cpu_root_pgt(unsigned int cpu)
+     }
+ }
+ 
+-static void cpu_smpboot_free(unsigned int cpu)
++/*
++ * The 'remove' boolean controls whether a CPU is just getting offlined (and
++ * parked), or outright removed / offlined without parking. Parked CPUs need
++ * things like their stack, GDT, IDT, TSS, and per-CPU data still available.
++ * A few other items, in particular CPU masks, are also retained, as it's
++ * difficult to prove that they're entirely unreferenced from parked CPUs.
++ */
++static void cpu_smpboot_free(unsigned int cpu, bool remove)
+ {
+     unsigned int order, socket = cpu_to_socket(cpu);
+     struct cpuinfo_x86 *c = cpu_data;
+@@ -906,15 +915,19 @@ static void cpu_smpboot_free(unsigned int cpu)
+         socket_cpumask[socket] = NULL;
+     }
+ 
+-    c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
+-    c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
+-    c[cpu].compute_unit_id = INVALID_CUID;
+     cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+ 
+-    free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
+-    free_cpumask_var(per_cpu(cpu_core_mask, cpu));
+-    if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
+-        free_cpumask_var(per_cpu(scratch_cpumask, cpu));
++    if ( remove )
++    {
++        c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
++        c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
++        c[cpu].compute_unit_id = INVALID_CUID;
++

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201808160902.w7G922kJ047574>