Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 16 Mar 2010 16:01:19 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r205214 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include
Message-ID:  <201003161601.o2GG1JmE028502@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Tue Mar 16 16:01:19 2010
New Revision: 205214
URL: http://svn.freebsd.org/changeset/base/205214

Log:
  - Extend the machine check record structure to include several fields useful
    for parsing model-specific and other fields in machine check events
    including the global machine check capabilities and status registers,
    CPU identification, and the FreeBSD CPU ID.
  - Report these added fields in the console log of a machine check so that
    a record structure can be reconstituted from the console messages.
  - Parse new architectural errors including memory controller errors.
  
  MFC after:	1 week

Modified:
  head/sys/amd64/amd64/mca.c
  head/sys/amd64/include/mca.h
  head/sys/amd64/include/specialreg.h
  head/sys/i386/i386/mca.c
  head/sys/i386/include/mca.h
  head/sys/i386/include/specialreg.h

Modified: head/sys/amd64/amd64/mca.c
==============================================================================
--- head/sys/amd64/amd64/mca.c	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/amd64/amd64/mca.c	Tue Mar 16 16:01:19 2010	(r205214)
@@ -186,19 +186,46 @@ mca_error_request(uint16_t mca_error)
 	return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x70) >> 4) {
+	case 0x0:
+		return ("GEN");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("AC");
+	case 0x4:
+		return ("MS");
+	}
+	return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
 	uint16_t mca_error;
 
-	printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: CPU %d ", rec->mr_apic_id);
+	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+	    rec->mr_cpu_id, rec->mr_apic_id);
+	printf("MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
 		printf("UNCOR ");
-	else
+	else {
 		printf("COR ");
+		if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+			printf("(%lld) ", ((long long)rec->mr_status &
+			    MC_STATUS_COR_COUNT) >> 38);
+	}
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
 	if (rec->mr_status & MC_STATUS_OVER)
@@ -221,6 +248,9 @@ mca_log(const struct mca_record *rec)
 	case 0x0004:
 		printf("FRC error");
 		break;
+	case 0x0005:
+		printf("internal parity error");
+		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
@@ -245,6 +275,17 @@ mca_log(const struct mca_record *rec)
 			break;
 		}
 
+		/* Memory controller error. */
+		if ((mca_error & 0xef80) == 0x0080) {
+			printf("%s channel ", mca_error_mmtype(mca_error));
+			if ((mca_error & 0x000f) != 0x000f)
+				printf("%d", mca_error & 0x000f);
+			else
+				printf("??");
+			printf(" memory error");
+			break;
+		}
+		
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
@@ -322,6 +363,11 @@ mca_check_status(int bank, struct mca_re
 		rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
 	rec->mr_tsc = rdtsc();
 	rec->mr_apic_id = PCPU_GET(apic_id);
+	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+	rec->mr_cpu_id = cpu_id;
+	rec->mr_cpu_vendor_id = cpu_vendor_id;
+	rec->mr_cpu = PCPU_GET(cpuid);
 
 	/*
 	 * Clear machine check.  Don't do this for uncorrectable

Modified: head/sys/amd64/include/mca.h
==============================================================================
--- head/sys/amd64/include/mca.h	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/amd64/include/mca.h	Tue Mar 16 16:01:19 2010	(r205214)
@@ -37,6 +37,11 @@ struct mca_record {
 	uint64_t	mr_tsc;
 	int		mr_apic_id;
 	int		mr_bank;
+	uint64_t	mr_mcg_cap;
+	uint64_t	mr_mcg_status;
+	int		mr_cpu_id;
+	int		mr_cpu_vendor_id;
+	int		mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: head/sys/amd64/include/specialreg.h
==============================================================================
--- head/sys/amd64/include/specialreg.h	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/amd64/include/specialreg.h	Tue Mar 16 16:01:19 2010	(r205214)
@@ -267,6 +267,7 @@
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
+#define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
@@ -352,8 +353,10 @@
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
@@ -363,9 +366,14 @@
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffffUL
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000UL
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000UL
+#define	MC_STATUS_COR_COUNT	0x001fffc000000000UL	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_TES_STATUS	0x0060000000000000UL	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_AR		0x0080000000000000UL	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_S		0x0100000000000000UL	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_PCC		0x0200000000000000UL
 #define	MC_STATUS_ADDRV		0x0400000000000000UL
 #define	MC_STATUS_MISCV		0x0800000000000000UL
@@ -373,6 +381,10 @@
 #define	MC_STATUS_UC		0x2000000000000000UL
 #define	MC_STATUS_OVER		0x4000000000000000UL
 #define	MC_STATUS_VAL		0x8000000000000000UL
+#define	MC_MISC_RA_LSB		0x000000000000003fUL	/* If MCG_CAP_SER_P */
+#define	MC_MISC_ADDRESS_MODE	0x00000000000001c0UL	/* If MCG_CAP_SER_P */
+#define	MC_CTL2_THRESHOLD	0x0000000000003fffUL
+#define	MC_CTL2_CMCI_EN		0x0000000040000000UL
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.

Modified: head/sys/i386/i386/mca.c
==============================================================================
--- head/sys/i386/i386/mca.c	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/i386/i386/mca.c	Tue Mar 16 16:01:19 2010	(r205214)
@@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
 	return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x70) >> 4) {
+	case 0x0:
+		return ("GEN");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("AC");
+	case 0x4:
+		return ("MS");
+	}
+	return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
 	uint16_t mca_error;
 
-	printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: CPU %d ", rec->mr_apic_id);
+	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+	    rec->mr_cpu_id, rec->mr_apic_id);
+	printf("MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
 		printf("UNCOR ");
-	else
+	else {
 		printf("COR ");
+		if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+			printf("(%lld) ", ((long long)rec->mr_status &
+			    MC_STATUS_COR_COUNT) >> 38);
+	}
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
 	if (rec->mr_status & MC_STATUS_OVER)
@@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
 	case 0x0004:
 		printf("FRC error");
 		break;
+	case 0x0005:
+		printf("internal parity error");
+		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
@@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
 			break;
 		}
 
+		/* Memory controller error. */
+		if ((mca_error & 0xef80) == 0x0080) {
+			printf("%s channel ", mca_error_mmtype(mca_error));
+			if ((mca_error & 0x000f) != 0x000f)
+				printf("%d", mca_error & 0x000f);
+			else
+				printf("??");
+			printf(" memory error");
+			break;
+		}
+		
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
@@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re
 		rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
 	rec->mr_tsc = rdtsc();
 	rec->mr_apic_id = PCPU_GET(apic_id);
+	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+	rec->mr_cpu_id = cpu_id;
+	rec->mr_cpu_vendor_id = cpu_vendor_id;
+	rec->mr_cpu = PCPU_GET(cpuid);
 
 	/*
 	 * Clear machine check.  Don't do this for uncorrectable

Modified: head/sys/i386/include/mca.h
==============================================================================
--- head/sys/i386/include/mca.h	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/i386/include/mca.h	Tue Mar 16 16:01:19 2010	(r205214)
@@ -37,6 +37,11 @@ struct mca_record {
 	uint64_t	mr_tsc;
 	int		mr_apic_id;
 	int		mr_bank;
+	uint64_t	mr_mcg_cap;
+	uint64_t	mr_mcg_status;
+	int		mr_cpu_id;
+	int		mr_cpu_vendor_id;
+	int		mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: head/sys/i386/include/specialreg.h
==============================================================================
--- head/sys/i386/include/specialreg.h	Tue Mar 16 15:55:41 2010	(r205213)
+++ head/sys/i386/include/specialreg.h	Tue Mar 16 16:01:19 2010	(r205214)
@@ -273,6 +273,7 @@
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
+#define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
@@ -421,8 +422,10 @@
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
@@ -432,9 +435,14 @@
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffffULL
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000ULL
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000ULL
+#define	MC_STATUS_COR_COUNT	0x001fffc000000000ULL	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_TES_STATUS	0x0060000000000000ULL	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_AR		0x0080000000000000ULL	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_S		0x0100000000000000ULL	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_PCC		0x0200000000000000ULL
 #define	MC_STATUS_ADDRV		0x0400000000000000ULL
 #define	MC_STATUS_MISCV		0x0800000000000000ULL
@@ -442,6 +450,10 @@
 #define	MC_STATUS_UC		0x2000000000000000ULL
 #define	MC_STATUS_OVER		0x4000000000000000ULL
 #define	MC_STATUS_VAL		0x8000000000000000ULL
+#define	MC_MISC_RA_LSB		0x000000000000003fULL	/* If MCG_CAP_SER_P */
+#define	MC_MISC_ADDRESS_MODE	0x00000000000001c0ULL	/* If MCG_CAP_SER_P */
+#define	MC_CTL2_THRESHOLD	0x0000000000003fffULL
+#define	MC_CTL2_CMCI_EN		0x0000000040000000ULL
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201003161601.o2GG1JmE028502>