Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Feb 2010 10:23:08 -0500
From:      John Baldwin <jhb@freebsd.org>
To:        freebsd-hardware@freebsd.org
Cc:        Charles Owens <cowens@greatbaysoftware.com>
Subject:   Re: mptutil(8) segfault on IBM xSeries 3550
Message-ID:  <201002181023.08131.jhb@freebsd.org>
In-Reply-To: <4B79C9CB.8060102@greatbaysoftware.com>
References:  <4B75AB2D.2090306@greatbaysoftware.com> <4B79C9CB.8060102@greatbaysoftware.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Monday 15 February 2010 5:25:15 pm Charles Owens wrote:
> Charles Owens wrote:
> > Howdy,
> >
> > We're working with IBM hardware (xSeries 3550) that has an
> > mpt-based RAID controller... after initial success with testing the
> > mptutil utility, now operations other than "show adapter" and "show
> > volume" are resulting in segfaults.
> >
> > While it was working properly we created and removed volumes several
> > times, force-failed drives, and just generally put it through its
> > paces... and all seemed fine.  Then, after a reboot, it suddenly started
> > failing with segfault as described, and nothing we do has helped to get
> > it out of this state (including trying to use the LSI in-BIOS manager to
> > create/delete volumes -- which in and of itself works fine).
> >
> > We found recent thread
> > http://docs.freebsd.org/cgi/mid.cgi?4B56CD4C.80503 and hoped that it
> > might somehow relate... and even tried the patch that John Baldwin
> > posted, but to no avail.
> >
> > Has anyone seen this behavior and/or have a suggested fix or workaround?
> >
> >
> > Here's the output of "mptutil show adapter":
> >
> > mpt0 Adapter:
> >        Board Name: SR-BR10i
> >    Board Assembly: L3-25116-01H
> >         Chip Name: C1068E
> >     Chip Revision: UNUSED
> >       RAID Levels: RAID0, RAID1, RAID1E
> >     RAID0 Stripes: 64K
> >    RAID1E Stripes: 64K
> >  RAID0 Drives/Vol: 1-10
> >  RAID1 Drives/Vol: 2
> > RAID1E Drives/Vol: 3-10
> >
> >
> > This work is being done using FreeBSD 8.0-RELEASE-p2 + PAE.
> >   
> 
> 
> I should add that the RAID controller in question is the IBM
> ServeRAID-BR10i SAS/SATA Controller which is based on the LSI 1068E
> processor, as described here:
> http://www-01.ibm.com/common/ssi/rep_ca/4/872/ENUSAG09-0104/index.html

Try this updated patch.  It should fix the problems with 'mptutil show drives' 
displaying all daX devices in the system rather than just the ones for the 
mptX bus.  I had incorrectly interpreted the XPT matches as being an AND 
rather than an OR.  This changes the code to first do a lookup for the logical 
"path" (SCSI bus) for mptX devices and then do a second lookup to fetch any 
daX devices on that path.  I tested it on a machine with an mpt controller and 
a USB disk.  Unfortunately I wasn't able to test any of the RAID stuff, just 
'show drives'.  This mpt(4) controller doesn't support RAID either, so I was 
also able to verify the fix you had already tested for cleaning up 'show 
adapter' output in that case.

Index: mpt_cam.c
===================================================================
--- mpt_cam.c	(revision 204004)
+++ mpt_cam.c	(working copy)
@@ -56,15 +56,75 @@
 	return (xptfd);
 }
 
+/* Fetch the path id of bus 0 for the opened mpt controller. */
+static int
+fetch_path_id(path_id_t *path_id)
+{
+	struct bus_match_pattern *b;
+	union ccb ccb;
+	size_t bufsize;
+
+	if (xpt_open() < 0)
+		return (ENXIO);
+
+	/* First, find the path id of bus 0 for this mpt controller. */
+	bzero(&ccb, sizeof(ccb));
+
+	ccb.ccb_h.func_code = XPT_DEV_MATCH;
+
+	bufsize = sizeof(struct dev_match_result) * 1;
+	ccb.cdm.num_matches = 0;
+	ccb.cdm.match_buf_len = bufsize;
+	ccb.cdm.matches = calloc(1, bufsize);
+
+	bufsize = sizeof(struct dev_match_pattern) * 1;
+	ccb.cdm.num_patterns = 1;
+	ccb.cdm.pattern_buf_len = bufsize;
+	ccb.cdm.patterns = calloc(1, bufsize);
+
+	/* Match mptX bus 0. */
+	ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
+	b = &ccb.cdm.patterns[0].pattern.bus_pattern;
+	snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
+	b->unit_number = mpt_unit;
+	b->bus_id = 0;
+	b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
+
+	if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
+		free(ccb.cdm.matches);
+		free(ccb.cdm.patterns);
+		return (errno);
+	}
+	free(ccb.cdm.patterns);
+
+	if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) ||
+	    (ccb.cdm.status != CAM_DEV_MATCH_LAST)) {
+		warnx("fetch_path_id got CAM error %#x, CDM error %d\n",
+		    ccb.ccb_h.status, ccb.cdm.status);
+		free(ccb.cdm.matches);
+		return (EIO);
+	}
+
+	/* We should have exactly 1 match for the bus. */
+	if (ccb.cdm.num_matches != 1 ||
+	    ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
+		free(ccb.cdm.matches);
+		return (ENOENT);
+	}
+	*path_id = ccb.cdm.matches[0].result.bus_result.path_id;
+	free(ccb.cdm.matches);
+	return (0);
+}
+
 int
 mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd)
 {
-	struct bus_match_pattern *b;
 	struct periph_match_pattern *p;
 	struct periph_match_result *r;
 	union ccb ccb;
+	path_id_t path_id;
 	size_t bufsize;
-	int i;
+	int error, i;
 
 	/* mpt(4) only handles devices on bus 0. */
 	if (VolumeBus != 0)
@@ -73,6 +133,11 @@
 	if (xpt_open() < 0)
 		return (ENXIO);
 
+	/* Find the path ID of bus 0. */
+	error = fetch_path_id(&path_id);
+	if (error)
+		return (error);
+
 	bzero(&ccb, sizeof(ccb));
 
 	ccb.ccb_h.func_code = XPT_DEV_MATCH;
@@ -85,25 +150,18 @@
 	ccb.cdm.match_buf_len = bufsize;
 	ccb.cdm.matches = calloc(1, bufsize);
 
-	bufsize = sizeof(struct dev_match_pattern) * 2;
-	ccb.cdm.num_patterns = 2;
+	bufsize = sizeof(struct dev_match_pattern) * 1;
+	ccb.cdm.num_patterns = 1;
 	ccb.cdm.pattern_buf_len = bufsize;
 	ccb.cdm.patterns = calloc(1, bufsize);
 
-	/* Match mptX bus 0. */
-	ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
-	b = &ccb.cdm.patterns[0].pattern.bus_pattern;
-	snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
-	b->unit_number = mpt_unit;
-	b->bus_id = 0;
-	b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
 	/* Look for a "da" device at the specified target and lun. */
-	ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH;
-	p = &ccb.cdm.patterns[1].pattern.periph_pattern;
+	ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH;
+	p = &ccb.cdm.patterns[0].pattern.periph_pattern;
+	p->path_id = path_id;
 	snprintf(p->periph_name, sizeof(p->periph_name), "da");
 	p->target_id = VolumeID;
-	p->flags = PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET;
+	p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET;
 
 	if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
 		i = errno;
@@ -122,25 +180,22 @@
 	}
 
 	/*
-	 * We should have exactly 2 matches, 1 for the bus and 1 for
-	 * the peripheral.  However, if we only have 1 match and it is
-	 * for the bus, don't print an error message and return
-	 * ENOENT.
+	 * We should have exactly 1 match for the peripheral.
+	 * However, if we don't get a match, don't print an error
+	 * message and return ENOENT.
 	 */
-	if (ccb.cdm.num_matches == 1 &&
-	    ccb.cdm.matches[0].type == DEV_MATCH_BUS) {
+	if (ccb.cdm.num_matches == 0) {
 		free(ccb.cdm.matches);
 		return (ENOENT);
 	}
-	if (ccb.cdm.num_matches != 2) {
-		warnx("mpt_query_disk got %d matches, expected 2",
+	if (ccb.cdm.num_matches != 1) {
+		warnx("mpt_query_disk got %d matches, expected 1",
 		    ccb.cdm.num_matches);
 		free(ccb.cdm.matches);
 		return (EIO);
 	}
-	if (ccb.cdm.matches[0].type != DEV_MATCH_BUS ||
-	    ccb.cdm.matches[1].type != DEV_MATCH_PERIPH) {
-		warnx("mpt_query_disk got wrong CAM matches");
+	if (ccb.cdm.matches[0].type != DEV_MATCH_PERIPH) {
+		warnx("mpt_query_disk got wrong CAM match");
 		free(ccb.cdm.matches);
 		return (EIO);
 	}
@@ -336,47 +391,44 @@
 {
 	CONFIG_PAGE_IOC_2 *ioc2;
 	struct mpt_standalone_disk *disks;
-	struct bus_match_pattern *b;
 	struct periph_match_pattern *p;
 	struct periph_match_result *r;
 	struct cam_device *dev;
 	union ccb ccb;
+	path_id_t path_id;
 	size_t bufsize;
 	u_int i;
-	int count;
+	int count, error;
 
 	if (xpt_open() < 0)
 		return (ENXIO);
 
+	error = fetch_path_id(&path_id);
+	if (error)
+		return (error);
+
 	for (count = 100;; count+= 100) {
 		/* Try to fetch 'count' disks in one go. */
 		bzero(&ccb, sizeof(ccb));
 
 		ccb.ccb_h.func_code = XPT_DEV_MATCH;
 
-		bufsize = sizeof(struct dev_match_result) * (count + 2);
+		bufsize = sizeof(struct dev_match_result) * (count + 1);
 		ccb.cdm.num_matches = 0;
 		ccb.cdm.match_buf_len = bufsize;
 		ccb.cdm.matches = calloc(1, bufsize);
 
-		bufsize = sizeof(struct dev_match_pattern) * 2;
-		ccb.cdm.num_patterns = 2;
+		bufsize = sizeof(struct dev_match_pattern) * 1;
+		ccb.cdm.num_patterns = 1;
 		ccb.cdm.pattern_buf_len = bufsize;
 		ccb.cdm.patterns = calloc(1, bufsize);
 
-		/* Match mptX bus 0. */
-		ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
-		b = &ccb.cdm.patterns[0].pattern.bus_pattern;
-		snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
-		b->unit_number = mpt_unit;
-		b->bus_id = 0;
-		b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
 		/* Match any "da" peripherals. */
-		ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH;
-		p = &ccb.cdm.patterns[1].pattern.periph_pattern;
+		ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH;
+		p = &ccb.cdm.patterns[0].pattern.periph_pattern;
+		p->path_id = path_id;
 		snprintf(p->periph_name, sizeof(p->periph_name), "da");
-		p->flags = PERIPH_MATCH_NAME;
+		p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME;
 
 		if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
 			i = errno;
@@ -406,21 +458,16 @@
 		break;
 	}
 
-	/*
-	 * We should have N + 1 matches, 1 for the bus and 1 for each
-	 * "da" device.
-	 */
-	if (ccb.cdm.num_matches < 1) {
-		warnx("mpt_fetch_disks didn't get any matches");
+	/* Shortcut if we don't have any "da" devices. */
+	if (ccb.cdm.num_matches == 0) {
 		free(ccb.cdm.matches);
-		return (EIO);
+		*ndisks = 0;
+		*disksp = NULL;
+		return (0);
 	}
-	if (ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
-		warnx("mpt_fetch_disks got wrong CAM matches");
-		free(ccb.cdm.matches);
-		return (EIO);
-	}
-	for (i = 1; i < ccb.cdm.num_matches; i++) {
+
+	/* We should have N matches, 1 for each "da" device. */
+	for (i = 0; i < ccb.cdm.num_matches; i++) {
 		if (ccb.cdm.matches[i].type != DEV_MATCH_PERIPH) {
 			warnx("mpt_fetch_disks got wrong CAM matches");
 			free(ccb.cdm.matches);
@@ -428,14 +475,6 @@
 		}
 	}
 
-	/* Shortcut if we don't have any "da" devices. */
-	if (ccb.cdm.num_matches == 1) {
-		free(ccb.cdm.matches);
-		*ndisks = 0;
-		*disksp = NULL;
-		return (0);
-	}
-
 	/*
 	 * Some of the "da" peripherals may be for RAID volumes, so
 	 * fetch the IOC 2 page (list of RAID volumes) so we can
@@ -444,7 +483,7 @@
 	ioc2 = mpt_read_ioc_page(fd, 2, NULL);
 	disks = calloc(ccb.cdm.num_matches, sizeof(*disks));
 	count = 0;
-	for (i = 1; i < ccb.cdm.num_matches; i++) {
+	for (i = 0; i < ccb.cdm.num_matches; i++) {
 		r = &ccb.cdm.matches[i].result.periph_result;
 		if (periph_is_volume(ioc2, r))
 			continue;
@@ -480,10 +519,9 @@
 int
 mpt_rescan_bus(int bus, int id)
 {
-	struct bus_match_pattern *b;
 	union ccb ccb;
 	path_id_t path_id;
-	size_t bufsize;
+	int error;
 
 	/* mpt(4) only handles devices on bus 0. */
 	if (bus != -1 && bus != 0)
@@ -492,54 +530,12 @@
 	if (xpt_open() < 0)
 		return (ENXIO);
 
-	/* First, find the path id of bus 0 for this mpt controller. */
-	bzero(&ccb, sizeof(ccb));
+	error = fetch_path_id(&path_id);
+	if (error)
+		return (error);
 
-	ccb.ccb_h.func_code = XPT_DEV_MATCH;
-
-	bufsize = sizeof(struct dev_match_result) * 1;
-	ccb.cdm.num_matches = 0;
-	ccb.cdm.match_buf_len = bufsize;
-	ccb.cdm.matches = calloc(1, bufsize);
-
-	bufsize = sizeof(struct dev_match_pattern) * 1;
-	ccb.cdm.num_patterns = 1;
-	ccb.cdm.pattern_buf_len = bufsize;
-	ccb.cdm.patterns = calloc(1, bufsize);
-
-	/* Match mptX bus 0. */
-	ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
-	b = &ccb.cdm.patterns[0].pattern.bus_pattern;
-	snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
-	b->unit_number = mpt_unit;
-	b->bus_id = 0;
-	b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
-	if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
-		free(ccb.cdm.matches);
-		free(ccb.cdm.patterns);
-		return (errno);
-	}
-	free(ccb.cdm.patterns);
-
-	if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) ||
-	    (ccb.cdm.status != CAM_DEV_MATCH_LAST)) {
-		warnx("mpt_rescan_bus got CAM error %#x, CDM error %d\n",
-		    ccb.ccb_h.status, ccb.cdm.status);
-		free(ccb.cdm.matches);
-		return (EIO);
-	}
-
-	/* We should have exactly 1 match for the bus. */
-	if (ccb.cdm.num_matches != 1 ||
-	    ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
-		free(ccb.cdm.matches);
-		return (ENOENT);
-	}
-	path_id = ccb.cdm.matches[0].result.bus_result.path_id;
-	free(ccb.cdm.matches);
-
-	/* Now perform the actual rescan. */
+	/* Perform the actual rescan. */
+	bzero(&ccb, sizeof(ccb));
 	ccb.ccb_h.path_id = path_id;
 	if (id == -1) {
 		ccb.ccb_h.func_code = XPT_SCAN_BUS;
Index: mpt_show.c
===================================================================
--- mpt_show.c	(revision 204004)
+++ mpt_show.c	(working copy)
@@ -78,6 +78,7 @@
 	CONFIG_PAGE_MANUFACTURING_0 *man0;
 	CONFIG_PAGE_IOC_2 *ioc2;
 	CONFIG_PAGE_IOC_6 *ioc6;
+	U16 IOCStatus;
 	int fd, comma;
 
 	if (ac != 1) {
@@ -108,7 +109,7 @@
 
 	free(man0);
 
-	ioc2 = mpt_read_ioc_page(fd, 2, NULL);
+	ioc2 = mpt_read_ioc_page(fd, 2, &IOCStatus);
 	if (ioc2 != NULL) {
 		printf("      RAID Levels:");
 		comma = 0;
@@ -151,9 +152,11 @@
 			printf(" none");
 		printf("\n");
 		free(ioc2);
-	}
+	} else if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+	    MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+		warnx("mpt_read_ioc_page(2): %s", mpt_ioc_status(IOCStatus));
 
-	ioc6 = mpt_read_ioc_page(fd, 6, NULL);
+	ioc6 = mpt_read_ioc_page(fd, 6, &IOCStatus);
 	if (ioc6 != NULL) {
 		display_stripe_map("    RAID0 Stripes",
 		    ioc6->SupportedStripeSizeMapIS);
@@ -172,7 +175,9 @@
 			printf("-%u", ioc6->MaxDrivesIME);
 		printf("\n");
 		free(ioc6);
-	}
+	} else if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+	    MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+		warnx("mpt_read_ioc_page(6): %s", mpt_ioc_status(IOCStatus));
 
 	/* TODO: Add an ioctl to fetch IOC_FACTS and print firmware version. */
 
@@ -541,7 +546,8 @@
 	for (i = 0; i <= 0xff; i++) {
 		pinfo = mpt_pd_info(fd, i, &IOCStatus);
 		if (pinfo == NULL) {
-			if (IOCStatus != MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+			if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+			    MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
 				warnx("mpt_pd_info(%d): %s", i,
 				    mpt_ioc_status(IOCStatus));
 			continue;

-- 
John Baldwin



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201002181023.08131.jhb>