From owner-svn-src-all@FreeBSD.ORG Mon Apr 5 16:11:42 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D183E106564A; Mon, 5 Apr 2010 16:11:42 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id BD0E28FC18; Mon, 5 Apr 2010 16:11:42 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o35GBgLR077692; Mon, 5 Apr 2010 16:11:42 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o35GBgmJ077683; Mon, 5 Apr 2010 16:11:42 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201004051611.o35GBgmJ077683@svn.freebsd.org> From: Alan Cox Date: Mon, 5 Apr 2010 16:11:42 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r206183 - in stable/8/sys: amd64/amd64 amd64/include i386/i386 i386/include X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Apr 2010 16:11:42 -0000 Author: alc Date: Mon Apr 5 16:11:42 2010 New Revision: 206183 URL: http://svn.freebsd.org/changeset/base/206183 Log: MFC r204907, r204913, r205402, r205573, r205573 Implement AMD's recommended workaround for Erratum 383 on Family 10h processors. Enable machine check exceptions by default. Modified: stable/8/sys/amd64/amd64/mca.c stable/8/sys/amd64/amd64/pmap.c stable/8/sys/amd64/include/md_var.h stable/8/sys/amd64/include/specialreg.h stable/8/sys/i386/i386/mca.c stable/8/sys/i386/i386/pmap.c stable/8/sys/i386/include/md_var.h stable/8/sys/i386/include/specialreg.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/amd64/amd64/mca.c ============================================================================== --- stable/8/sys/amd64/amd64/mca.c Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/amd64/amd64/mca.c Mon Apr 5 16:11:42 2010 (r206183) @@ -60,11 +60,20 @@ static int mca_count; /* Number of reco SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); -static int mca_enabled = 0; +static int mca_enabled = 1; TUNABLE_INT("hw.mca.enabled", &mca_enabled); SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, "Administrative toggle for machine check support"); +static int amd10h_L1TP = 1; +TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP); +SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, + "Administrative toggle for logging of level one TLB parity (L1TP) errors"); + +int workaround_erratum383; +SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0, + "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); + static STAILQ_HEAD(, mca_internal) mca_records; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ @@ -527,7 +536,7 @@ void mca_init(void) { uint64_t mcg_cap; - uint64_t ctl; + uint64_t ctl, mask; int skip; int i; @@ -535,6 +544,15 @@ mca_init(void) if (!mca_enabled || !(cpu_feature & CPUID_MCE)) return; + /* + * On AMD Family 10h processors, unless logging of level one TLB + * parity (L1TP) errors is disabled, enable the recommended workaround + * for Erratum 383. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) + workaround_erratum383 = 1; + if (cpu_feature & CPUID_MCA) { if (PCPU_GET(cpuid) == 0) mca_setup(); @@ -545,6 +563,19 @@ mca_init(void) /* Enable MCA features. */ wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + /* + * Disable logging of level one TLB parity (L1TP) errors by + * the data cache as an alternative workaround for AMD Family + * 10h Erratum 383. Unlike the recommended workaround, there + * is no performance penalty to this workaround. However, + * L1TP errors will go unreported. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) { + mask = rdmsr(MSR_MC0_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); + } for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { /* By default enable logging of all errors. */ ctl = 0xffffffffffffffffUL; Modified: stable/8/sys/amd64/amd64/pmap.c ============================================================================== --- stable/8/sys/amd64/amd64/pmap.c Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/amd64/amd64/pmap.c Mon Apr 5 16:11:42 2010 (r206183) @@ -7,7 +7,7 @@ * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. - * Copyright (c) 2005-2008 Alan L. Cox + * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -252,6 +252,9 @@ static void pmap_remove_entry(struct pma static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, + pd_entry_t newpde); +static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -654,13 +657,13 @@ pmap_init(void) pv_entry_high_water = 9 * (pv_entry_max / 10); /* - * Disable large page mappings by default if the kernel is running in - * a virtual machine on an AMD Family 10h processor. This is a work- - * around for Erratum 383. + * If the kernel is running in a virtual machine on an AMD Family 10h + * processor, then it must assume that MCA is enabled by the virtual + * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) - pg_ps_enabled = 0; + workaround_erratum383 = 1; /* * Are large page mappings enabled? @@ -795,6 +798,45 @@ pmap_cache_bits(int mode, boolean_t is_p cache_bits |= PG_NC_PWT; return (cache_bits); } + +/* + * After changing the page size for the specified virtual address in the page + * table, flush the corresponding entries from the processor's TLB. Only the + * calling processor's TLB is affected. + * + * The calling thread must be pinned to a processor. + */ +static void +pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) +{ + u_long cr4; + + if ((newpde & PG_PS) == 0) + /* Demotion: flush a specific 2MB page mapping. */ + invlpg(va); + else if ((newpde & PG_G) == 0) + /* + * Promotion: flush every 4KB page mapping from the TLB + * because there are too many to flush individually. + */ + invltlb(); + else { + /* + * Promotion: flush every 4KB page mapping from the TLB, + * including any global (PG_G) mappings. + */ + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. PG_G is simply + * ignored if CR4.PGE is clear. Moreover, in case this block + * is re-entered, the load_cr4() either above or below will + * modify CR4.PGE flushing the TLB. + */ + load_cr4(cr4 | CR4_PGE); + } +} #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. @@ -891,6 +933,69 @@ pmap_invalidate_cache(void) smp_cache_flush(); sched_unpin(); } + +struct pde_action { + cpumask_t store; /* processor that updates the PDE */ + cpumask_t invalidate; /* processors that invalidate their TLB */ + vm_offset_t va; + pd_entry_t *pde; + pd_entry_t newpde; +}; + +static void +pmap_update_pde_action(void *arg) +{ + struct pde_action *act = arg; + + if (act->store == PCPU_GET(cpumask)) + pde_store(act->pde, act->newpde); +} + +static void +pmap_update_pde_teardown(void *arg) +{ + struct pde_action *act = arg; + + if ((act->invalidate & PCPU_GET(cpumask)) != 0) + pmap_update_pde_invalidate(act->va, act->newpde); +} + +/* + * Change the page size for the specified virtual address in a way that + * prevents any possibility of the TLB ever having two entries that map the + * same virtual address using different page sizes. This is the recommended + * workaround for Erratum 383 on AMD Family 10h processors. It prevents a + * machine check exception for a TLB state that is improperly diagnosed as a + * hardware error. + */ +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + struct pde_action act; + cpumask_t active, cpumask; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + if (pmap == kernel_pmap) + active = all_cpus; + else + active = pmap->pm_active; + if ((active & PCPU_GET(other_cpus)) != 0) { + act.store = cpumask; + act.invalidate = active; + act.va = va; + act.pde = pde; + act.newpde = newpde; + smp_rendezvous_cpus(cpumask | active, + smp_no_rendevous_barrier, pmap_update_pde_action, + pmap_update_pde_teardown, &act); + } else { + pde_store(pde, newpde); + if ((active & cpumask) != 0) + pmap_update_pde_invalidate(va, newpde); + } + sched_unpin(); +} #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. @@ -928,6 +1033,15 @@ pmap_invalidate_cache(void) wbinvd(); } + +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + + pde_store(pde, newpde); + if (pmap == kernel_pmap || pmap->pm_active) + pmap_update_pde_invalidate(va, newpde); +} #endif /* !SMP */ static void @@ -2310,7 +2424,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ - pde_store(pde, newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else + pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. @@ -2926,7 +3043,10 @@ setpte: /* * Map the superpage. */ - pde_store(pde, PG_PS | newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, PG_PS | newpde); + else + pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" Modified: stable/8/sys/amd64/include/md_var.h ============================================================================== --- stable/8/sys/amd64/include/md_var.h Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/amd64/include/md_var.h Mon Apr 5 16:11:42 2010 (r206183) @@ -61,6 +61,7 @@ extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; extern int vm_page_dump_size; +extern int workaround_erratum383; extern int _udatasel; extern int _ucodesel; extern int _ucode32sel; Modified: stable/8/sys/amd64/include/specialreg.h ============================================================================== --- stable/8/sys/amd64/include/specialreg.h Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/amd64/include/specialreg.h Mon Apr 5 16:11:42 2010 (r206183) @@ -506,6 +506,7 @@ #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ +#define MSR_MC0_CTL_MASK 0xc0010044 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */ Modified: stable/8/sys/i386/i386/mca.c ============================================================================== --- stable/8/sys/i386/i386/mca.c Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/i386/i386/mca.c Mon Apr 5 16:11:42 2010 (r206183) @@ -60,11 +60,20 @@ static int mca_count; /* Number of reco SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); -static int mca_enabled = 0; +static int mca_enabled = 1; TUNABLE_INT("hw.mca.enabled", &mca_enabled); SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, "Administrative toggle for machine check support"); +static int amd10h_L1TP = 1; +TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP); +SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, + "Administrative toggle for logging of level one TLB parity (L1TP) errors"); + +int workaround_erratum383; +SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0, + "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); + static STAILQ_HEAD(, mca_internal) mca_records; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ @@ -527,7 +536,7 @@ void mca_init(void) { uint64_t mcg_cap; - uint64_t ctl; + uint64_t ctl, mask; int skip; int i; @@ -535,6 +544,15 @@ mca_init(void) if (!mca_enabled || !(cpu_feature & CPUID_MCE)) return; + /* + * On AMD Family 10h processors, unless logging of level one TLB + * parity (L1TP) errors is disabled, enable the recommended workaround + * for Erratum 383. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) + workaround_erratum383 = 1; + if (cpu_feature & CPUID_MCA) { if (PCPU_GET(cpuid) == 0) mca_setup(); @@ -545,6 +563,19 @@ mca_init(void) /* Enable MCA features. */ wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + /* + * Disable logging of level one TLB parity (L1TP) errors by + * the data cache as an alternative workaround for AMD Family + * 10h Erratum 383. Unlike the recommended workaround, there + * is no performance penalty to this workaround. However, + * L1TP errors will go unreported. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) { + mask = rdmsr(MSR_MC0_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); + } for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { /* By default enable logging of all errors. */ ctl = 0xffffffffffffffffUL; Modified: stable/8/sys/i386/i386/pmap.c ============================================================================== --- stable/8/sys/i386/i386/pmap.c Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/i386/i386/pmap.c Mon Apr 5 16:11:42 2010 (r206183) @@ -5,7 +5,7 @@ * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. - * Copyright (c) 2005-2008 Alan L. Cox + * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -293,6 +293,7 @@ static void pmap_insert_pt_page(pmap_t p static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); +static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); @@ -311,6 +312,9 @@ static void pmap_remove_entry(struct pma static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, + pd_entry_t newpde); +static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -395,6 +399,13 @@ pmap_bootstrap(vm_paddr_t firstaddr) kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); + + /* + * Request a spin mutex so that changes to allpmaps cannot be + * preempted by smp_rendezvous_cpus(). Otherwise, + * pmap_update_pde_kernel() could access allpmaps while it is + * being changed. + */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); @@ -678,19 +689,21 @@ pmap_init(void) pv_entry_high_water = 9 * (pv_entry_max / 10); /* - * Disable large page mappings by default if the kernel is running in - * a virtual machine on an AMD Family 10h processor. This is a work- - * around for Erratum 383. + * If the kernel is running in a virtual machine on an AMD Family 10h + * processor, then it must assume that MCA is enabled by the virtual + * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) - pg_ps_enabled = 0; + workaround_erratum383 = 1; /* - * Are large page mappings enabled? + * Are large page mappings supported and enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); - if (pg_ps_enabled) { + if (pseflag == 0) + pg_ps_enabled = 0; + else if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; @@ -836,6 +849,69 @@ pmap_cache_bits(int mode, boolean_t is_p cache_bits |= PG_NC_PWT; return (cache_bits); } + +/* + * The caller is responsible for maintaining TLB consistency. + */ +static void +pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) +{ + pd_entry_t *pde; + pmap_t pmap; + boolean_t PTD_updated; + + PTD_updated = FALSE; + mtx_lock_spin(&allpmaps_lock); + LIST_FOREACH(pmap, &allpmaps, pm_list) { + if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & + PG_FRAME)) + PTD_updated = TRUE; + pde = pmap_pde(pmap, va); + pde_store(pde, newpde); + } + mtx_unlock_spin(&allpmaps_lock); + KASSERT(PTD_updated, + ("pmap_kenter_pde: current page table is not in allpmaps")); +} + +/* + * After changing the page size for the specified virtual address in the page + * table, flush the corresponding entries from the processor's TLB. Only the + * calling processor's TLB is affected. + * + * The calling thread must be pinned to a processor. + */ +static void +pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) +{ + u_long cr4; + + if ((newpde & PG_PS) == 0) + /* Demotion: flush a specific 2MB page mapping. */ + invlpg(va); + else if ((newpde & PG_G) == 0) + /* + * Promotion: flush every 4KB page mapping from the TLB + * because there are too many to flush individually. + */ + invltlb(); + else { + /* + * Promotion: flush every 4KB page mapping from the TLB, + * including any global (PG_G) mappings. + */ + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. PG_G is simply + * ignored if CR4.PGE is clear. Moreover, in case this block + * is re-entered, the load_cr4() either above or below will + * modify CR4.PGE flushing the TLB. + */ + load_cr4(cr4 | CR4_PGE); + } +} #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. @@ -932,6 +1008,92 @@ pmap_invalidate_cache(void) smp_cache_flush(); sched_unpin(); } + +struct pde_action { + cpumask_t store; /* processor that updates the PDE */ + cpumask_t invalidate; /* processors that invalidate their TLB */ + vm_offset_t va; + pd_entry_t *pde; + pd_entry_t newpde; +}; + +static void +pmap_update_pde_kernel(void *arg) +{ + struct pde_action *act = arg; + pd_entry_t *pde; + pmap_t pmap; + + if (act->store == PCPU_GET(cpumask)) + /* + * Elsewhere, this operation requires allpmaps_lock for + * synchronization. Here, it does not because it is being + * performed in the context of an all_cpus rendezvous. + */ + LIST_FOREACH(pmap, &allpmaps, pm_list) { + pde = pmap_pde(pmap, act->va); + pde_store(pde, act->newpde); + } +} + +static void +pmap_update_pde_user(void *arg) +{ + struct pde_action *act = arg; + + if (act->store == PCPU_GET(cpumask)) + pde_store(act->pde, act->newpde); +} + +static void +pmap_update_pde_teardown(void *arg) +{ + struct pde_action *act = arg; + + if ((act->invalidate & PCPU_GET(cpumask)) != 0) + pmap_update_pde_invalidate(act->va, act->newpde); +} + +/* + * Change the page size for the specified virtual address in a way that + * prevents any possibility of the TLB ever having two entries that map the + * same virtual address using different page sizes. This is the recommended + * workaround for Erratum 383 on AMD Family 10h processors. It prevents a + * machine check exception for a TLB state that is improperly diagnosed as a + * hardware error. + */ +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + struct pde_action act; + cpumask_t active, cpumask; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + if (pmap == kernel_pmap) + active = all_cpus; + else + active = pmap->pm_active; + if ((active & PCPU_GET(other_cpus)) != 0) { + act.store = cpumask; + act.invalidate = active; + act.va = va; + act.pde = pde; + act.newpde = newpde; + smp_rendezvous_cpus(cpumask | active, + smp_no_rendevous_barrier, pmap == kernel_pmap ? + pmap_update_pde_kernel : pmap_update_pde_user, + pmap_update_pde_teardown, &act); + } else { + if (pmap == kernel_pmap) + pmap_kenter_pde(va, newpde); + else + pde_store(pde, newpde); + if ((active & cpumask) != 0) + pmap_update_pde_invalidate(va, newpde); + } + sched_unpin(); +} #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. @@ -969,6 +1131,18 @@ pmap_invalidate_cache(void) wbinvd(); } + +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + + if (pmap == kernel_pmap) + pmap_kenter_pde(va, newpde); + else + pde_store(pde, newpde); + if (pmap == kernel_pmap || pmap->pm_active) + pmap_update_pde_invalidate(va, newpde); +} #endif /* !SMP */ void @@ -1842,12 +2016,9 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTL void pmap_growkernel(vm_offset_t addr) { - struct pmap *pmap; vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; - pt_entry_t *pde; - boolean_t updated_PTD; mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { @@ -1889,18 +2060,7 @@ pmap_growkernel(vm_offset_t addr) newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; - updated_PTD = FALSE; - mtx_lock_spin(&allpmaps_lock); - LIST_FOREACH(pmap, &allpmaps, pm_list) { - if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & - PG_FRAME)) - updated_PTD = TRUE; - pde = pmap_pde(pmap, kernel_vm_end); - pde_store(pde, newpdir); - } - mtx_unlock_spin(&allpmaps_lock); - KASSERT(updated_PTD, - ("pmap_growkernel: current page table is not in allpmaps")); + pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; @@ -2344,7 +2504,6 @@ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; - pmap_t allpmaps_entry; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; @@ -2450,25 +2609,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ - if (pmap == kernel_pmap) { - /* - * A harmless race exists between this loop and the bcopy() - * in pmap_pinit() that initializes the kernel segment of - * the new page table directory. Specifically, that bcopy() - * may copy the new PDE from the PTD to the new page table - * before this loop updates that new page table. - */ - mtx_lock_spin(&allpmaps_lock); - LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { - pde = pmap_pde(allpmaps_entry, va); - KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) == - (oldpde & PG_PTE_PROMOTE), - ("pmap_demote_pde: pde was %#jx, expected %#jx", - (uintmax_t)*pde, (uintmax_t)oldpde)); - pde_store(pde, newpde); - } - mtx_unlock_spin(&allpmaps_lock); - } else + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else if (pmap == kernel_pmap) + pmap_kenter_pde(va, newpde); + else pde_store(pde, newpde); if (firstpte == PADDR2) mtx_unlock(&PMAP2mutex); @@ -2987,7 +3132,6 @@ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; - pmap_t allpmaps_entry; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; @@ -3091,14 +3235,11 @@ setpte: /* * Map the superpage. */ - if (pmap == kernel_pmap) { - mtx_lock_spin(&allpmaps_lock); - LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { - pde = pmap_pde(allpmaps_entry, va); - pde_store(pde, PG_PS | newpde); - } - mtx_unlock_spin(&allpmaps_lock); - } else + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, PG_PS | newpde); + else if (pmap == kernel_pmap) + pmap_kenter_pde(va, PG_PS | newpde); + else pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; Modified: stable/8/sys/i386/include/md_var.h ============================================================================== --- stable/8/sys/i386/include/md_var.h Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/i386/include/md_var.h Mon Apr 5 16:11:42 2010 (r206183) @@ -73,6 +73,7 @@ extern int szosigcode; #endif extern uint32_t *vm_page_dump; extern int vm_page_dump_size; +extern int workaround_erratum383; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); struct thread; Modified: stable/8/sys/i386/include/specialreg.h ============================================================================== --- stable/8/sys/i386/include/specialreg.h Mon Apr 5 14:15:51 2010 (r206182) +++ stable/8/sys/i386/include/specialreg.h Mon Apr 5 16:11:42 2010 (r206183) @@ -551,6 +551,7 @@ /* AMD64 MSR's */ #define MSR_EFER 0xc0000080 /* extended features */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ +#define MSR_MC0_CTL_MASK 0xc0010044 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */