Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Jul 2014 19:07:08 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        arch@freebsd.org
Cc:        amd64@freebsd.org
Subject:   KDB entry on NMI
Message-ID:  <20140718160708.GO93733@kib.kiev.ua>

next in thread | raw e-mail | index | archive | help

--IOwL3FhNvW0Xz3At
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

It was mentioned somewhere recently, that typical BIOS today configures
NMI delivery on the hardware events as broadcast.  When I developerd
the dmar(4) busdma backend, I indeed met the problem, and wrote a
prototype which avoided startup of ddb on all cores.  Instead, the patch
implements custom spinlock, which allows only one core to win, other
cores ignore the NMI, by spinning on lock.

The issue which I see on at least two different machines with different
Intel chipsets, is that NMI is somehow sticky, i.e. it is re-delivered
after the handler executes iret.  I am not sure what the problem is,
whether it is due to hardware needing some ACK, or a bug in code.

Anyway, even on two-cores machine, having both cores simultaneously
enter NMI makes the use of ddb impossible, so I believe the patch is
improvement.  I make measures to ensure that reboot from ddb prompt
works.

Thought ?

diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 9b12449..76b992a 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -32,14 +32,18 @@ __FBSDID("$FreeBSD$");
 #include "opt_kstack_pages.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
+#include "opt_isa.h"
+#include "opt_kdb.h"
=20
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/cpu.h>
 #include <sys/cpuset.h>
 #ifdef GPROF=20
 #include <sys/gmon.h>
 #endif
+#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -60,6 +64,7 @@ __FBSDID("$FreeBSD$");
=20
 #include <x86/apicreg.h>
 #include <machine/clock.h>
+#include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/cpufunc.h>
 #include <x86/mca.h>
@@ -164,6 +169,7 @@ static int cpu_logical;			/* logical cpus per core */
 static int cpu_cores;			/* cores per package */
=20
 static void	assign_cpu_ids(void);
+static void	cpustop_handler_post(u_int cpu);
 static void	set_interrupt_apic_ids(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
@@ -1415,26 +1421,44 @@ ipi_nmi_handler()
 	cpustop_handler();
 	return (0);
 }
-    =20
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
-	u_int cpu;
=20
-	cpu =3D PCPU_GET(cpuid);
+#ifdef DEV_ISA
+static int nmi_kdb_lock;
+#endif
=20
-	savectx(&stoppcbs[cpu]);
+#ifdef DEV_ISA
+bool
+nmi_call_kdb_smp(u_int type, int code, struct trapframe *frame, bool do_pa=
nic)
+{
+	int cpu;
+	bool call_post, ret;
=20
-	/* Indicate that we are stopped */
-	CPU_SET_ATOMIC(cpu, &stopped_cpus);
+	call_post =3D false;
+	cpu =3D PCPU_GET(cpuid);
+	if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
+		ret =3D nmi_call_kdb(cpu, type, code, frame, do_panic);
+	} else {
+		ret =3D true;
+		savectx(&stoppcbs[cpu]);
+		while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
+			if (CPU_ISSET(cpu, &ipi_nmi_pending)) {
+				CPU_CLR_ATOMIC(cpu, &ipi_nmi_pending);
+				call_post =3D true;
+			}
+			cpustop_handler_post(cpu);
+			cpu_spinwait();
+		}
+	}
+	atomic_store_rel_int(&nmi_kdb_lock, 0);
+	if (call_post)
+		cpustop_handler_post(cpu);
+	return (ret);
+}
+#endif
=20
-	/* Wait for restart */
-	while (!CPU_ISSET(cpu, &started_cpus))
-	    ia32_pause();
+static void
+cpustop_handler_post(u_int cpu)
+{
=20
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
@@ -1450,6 +1474,25 @@ cpustop_handler(void)
 }
=20
 /*
+ * Handle an IPI_STOP by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpustop_handler(void)
+{
+	u_int cpu;
+
+	cpu =3D PCPU_GET(cpuid);
+	savectx(&stoppcbs[cpu]);
+	/* Indicate that we are stopped */
+	CPU_SET_ATOMIC(cpu, &stopped_cpus);
+	/* Wait for restart */
+	while (!CPU_ISSET(cpu, &started_cpus))
+	    ia32_pause();
+	cpustop_handler_post(cpu);
+}
+
+/*
  * Handle an IPI_SUSPEND by saving our current context and spinning until =
we
  * are resumed.
  */
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index d9203bc..6fa576e 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -74,6 +74,7 @@ PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
+#include <dev/pci/pcivar.h>
=20
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -158,6 +159,44 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG=
_RWTUN,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
=20
+#ifdef DEV_ISA
+bool
+nmi_call_kdb(u_int cpu, u_int type, int code, struct trapframe *frame,
+    bool do_panic)
+{
+
+	/* machine/parity/power fail/"kitchen sink" faults */
+	if (isa_nmi(code) =3D=3D 0) {
+#ifdef KDB
+		/*
+		 * NMI can be hooked up to a pushbutton for debugging.
+		 */
+		if (kdb_on_nmi) {
+			printf ("NMI/cpu%d ... going to debugger\n", cpu);
+			kdb_trap(type, 0, frame);
+			return (true);
+		}
+	} else
+#endif /* KDB */
+	if (do_panic)
+		panic("NMI indicates hardware failure");
+	return (false);
+}
+#endif
+
+static int
+handle_nmi_intr(u_int type, int code, struct trapframe *frame, bool panic)
+{
+
+#ifdef DEV_ISA
+#ifdef SMP
+	return (nmi_call_kdb_smp(type, code, frame, panic));
+#else
+	return (nmi_call_kdb(0, type, code, frame, panic));
+#endif
+#endif
+}
+
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
@@ -357,25 +396,9 @@ trap(struct trapframe *frame)
 			i =3D SIGFPE;
 			break;
=20
-#ifdef DEV_ISA
 		case T_NMI:
-			/* machine/parity/power fail/"kitchen sink" faults */
-			if (isa_nmi(code) =3D=3D 0) {
-#ifdef KDB
-				/*
-				 * NMI can be hooked up to a pushbutton
-				 * for debugging.
-				 */
-				if (kdb_on_nmi) {
-					printf ("NMI ... going to debugger\n");
-					kdb_trap(type, 0, frame);
-				}
-#endif /* KDB */
-				goto userout;
-			} else if (panic_on_nmi)
-				panic("NMI indicates hardware failure");
+			handle_nmi_intr(type, code, frame, true);
 			break;
-#endif /* DEV_ISA */
=20
 		case T_OFLOW:		/* integer overflow fault */
 			ucode =3D FPE_INTOVF;
@@ -543,25 +566,11 @@ trap(struct trapframe *frame)
 #endif
 			break;
=20
-#ifdef DEV_ISA
 		case T_NMI:
-			/* machine/parity/power fail/"kitchen sink" faults */
-			if (isa_nmi(code) =3D=3D 0) {
-#ifdef KDB
-				/*
-				 * NMI can be hooked up to a pushbutton
-				 * for debugging.
-				 */
-				if (kdb_on_nmi) {
-					printf ("NMI ... going to debugger\n");
-					kdb_trap(type, 0, frame);
-				}
-#endif /* KDB */
-				goto out;
-			} else if (panic_on_nmi =3D=3D 0)
+			if (handle_nmi_intr(type, code, frame, false) ||
+			    !panic_on_nmi)
 				goto out;
 			/* FALLTHROUGH */
-#endif /* DEV_ISA */
 		}
=20
 		trap_fatal(frame, 0);
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 5ddfbbd..da170f2 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -120,5 +120,9 @@ struct savefpu *get_pcb_user_save_td(struct thread *td);
 struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
 struct pcb *get_pcb_td(struct thread *td);
 void	amd64_db_resume_dbreg(void);
+bool	nmi_call_kdb(u_int cpu, u_int type, int code, struct trapframe *frame,
+	    bool panic);
+bool	nmi_call_kdb_smp(u_int type, int code, struct trapframe *frame,
+	    bool panic);
=20
 #endif /* !_MACHINE_MD_VAR_H_ */

--IOwL3FhNvW0Xz3At
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBAgAGBQJTyUYsAAoJEJDCuSvBvK1Bl5YP/2IEO296ZUqjVYUqvcGoZm/x
Ov+1xzBFsWd+M81IHKHtyQ91B5dzS4vDnTBrlQR7/PjEKpMuuscLPkXDMJ2Q3KG7
3RC/DHZO2gtNKuppUW72Wtr5bZwRc7WChFE/q48/N0f3Pan8o25ZqHtdt+j3pMlM
xj3GjtH5xlnlOzHUgBUeGgZEnzA9bl1kvBYDz53j+JSqbjZCsoJpSfhq5zP0fuYM
O6L+gyQhqPN7NjkVcJaG4wfsA+spVfHt72+67HWj8AbXvj6NJsNifUCtyNlTnRwf
czp+j6m9NfZyC3aSgvnC6uI+txLm7ambdRuuxtzvLEsQb4f2Lst1HKPgq3yvexcI
yNAwmlkCFWl7GBpUFknk9I6+MTW+bgicrnU0F49JUj247V1jTpLImQPVORD7wWC3
vhcudmCDCiB4Qj/meRMfKjaIBtGcM6OYFvbkLUKse2zHwc8vl41B/tGdECwgt3Bd
ZWhj6jHm0ck6mKvNCxlqDrEFL4cXgeeaV823AEBWJi9M4zczLe+W4NIk1sihePtJ
OSHBUtsK3ExytjUoV1Q7cf45G0+gJuHZkrk7V53Kty1qAd1JrHi5dC57hDOwUqyE
EvXRUi4z9NYvrSxyyGOzbcNaaQMqwlTo39hsuTNUUGX5dmQjYIasby07M+9OE51b
oA1RaVbv/lGOcnqaCEY0
=CQqR
-----END PGP SIGNATURE-----

--IOwL3FhNvW0Xz3At--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140718160708.GO93733>