From owner-svn-src-user@FreeBSD.ORG Fri Nov 18 01:04:25 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D121E106564A; Fri, 18 Nov 2011 01:04:25 +0000 (UTC) (envelope-from sbruno@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C01D28FC14; Fri, 18 Nov 2011 01:04:25 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id pAI14PrO040376; Fri, 18 Nov 2011 01:04:25 GMT (envelope-from sbruno@svn.freebsd.org) Received: (from sbruno@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id pAI14P9H040373; Fri, 18 Nov 2011 01:04:25 GMT (envelope-from sbruno@svn.freebsd.org) Message-Id: <201111180104.pAI14P9H040373@svn.freebsd.org> From: Sean Bruno Date: Fri, 18 Nov 2011 01:04:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r227644 - user/sbruno/mfid X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 18 Nov 2011 01:04:25 -0000 Author: sbruno Date: Fri Nov 18 01:04:25 2011 New Revision: 227644 URL: http://svn.freebsd.org/changeset/base/227644 Log: Add the Yahoo! mfi(4) monitoring utility for review. Compiles against amd64 freebsd-current at this time. Installs a /usr/sbin/mfid and can be started/stopped via the included rc script. Obtained from: Yahoo! Inc. and jhb@ in a former life Added: user/sbruno/mfid/ user/sbruno/mfid/Makefile user/sbruno/mfid/mfid.c user/sbruno/mfid/mfid.rc (contents, props changed) Added: user/sbruno/mfid/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mfid/Makefile Fri Nov 18 01:04:25 2011 (r227644) @@ -0,0 +1,9 @@ +PROG= mfid +BINDIR= /usr/sbin + +CFLAGS+= -g -Wall -Wunused + +NOMAN= +NO_MAN= + +.include Added: user/sbruno/mfid/mfid.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mfid/mfid.c Fri Nov 18 01:04:25 2011 (r227644) @@ -0,0 +1,1011 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MAX_UNIT 16 + +#define VOLUME_DEGRADED(state) \ + ((state) == MFI_LD_STATE_PARTIALLY_DEGRADED || \ + (state) == MFI_LD_STATE_DEGRADED) + +static char hostname[MAXHOSTNAMELEN]; +static char *mailto = "root@localhost"; +static int notifyminutes = 720; /* send mail every 12 hours by default */ +static int dostdout; + +/* Maximum target_id and device_id of volumes and drives, respectively. */ +#define MFI_MAX_LD_ID 256 +#define MFI_MAX_PD_ID 65536 + +struct mfi_physical_drive { + uint32_t generation; + uint16_t device_id; + enum mfi_pd_state state; + uint16_t encl_device_id; + uint8_t encl_index; + uint8_t slot_number; + uint8_t missing; +}; + +struct mfi_logical_drive { + enum mfi_ld_state state; + enum mfi_ld_state prev_state; + int sentcnt; + uint32_t generation; + uint8_t target_id; + int missing_drives; +}; + +struct mfi_controller { + int fd; + int unit; + uint32_t generation; + uint32_t config_size; + int config_valid; + int missing_drives; + int bad_drives; + int prev_bad_drives; + int sentcnt; + struct mfi_config_data *config; + struct mfi_logical_drive *ldrives[MFI_MAX_LD_ID]; + struct mfi_physical_drive *pdrives[MFI_MAX_PD_ID]; +}; + +static struct mfi_array *mfi_find_array(struct mfi_controller *c, + uint16_t array_ref); + +static struct mfi_controller controllers[MAX_UNIT]; +static int ncontrollers; + +static int +mfi_query_disk(struct mfi_controller *c, uint8_t target_id, + struct mfi_query_disk *info) +{ + + info->array_id = target_id; + if (ioctl(c->fd, MFIIO_QUERY_DISK, info) < 0) + return (-1); + if (!info->present) { + errno = ENXIO; + return (-1); + } + return (0); +} + +static const char * +mfi_volume_name(struct mfi_controller *c, uint8_t target_id) +{ + static struct mfi_query_disk info; + static char buf[4]; + + if (mfi_query_disk(c, target_id, &info) < 0) { + snprintf(buf, sizeof(buf), "%d", target_id); + return (buf); + } + return (info.devname); +} + +static int +mfi_dcmd_command(struct mfi_controller *c, uint32_t opcode, void *buf, + size_t bufsize, uint8_t *mbox, size_t mboxlen, uint8_t *statusp) +{ + struct mfi_ioc_passthru ioc; + struct mfi_dcmd_frame *dcmd; + int r; + + if ((mbox != NULL && (mboxlen == 0 || mboxlen > MFI_MBOX_SIZE)) || + (mbox == NULL && mboxlen != 0)) { + errno = EINVAL; + return (-1); + } + + bzero(&ioc, sizeof(ioc)); + dcmd = &ioc.ioc_frame; + if (mbox) + bcopy(mbox, dcmd->mbox, mboxlen); + dcmd->header.cmd = MFI_CMD_DCMD; + dcmd->header.timeout = 0; + dcmd->header.flags = 0; + dcmd->header.data_len = bufsize; + dcmd->opcode = opcode; + + ioc.buf = buf; + ioc.buf_size = bufsize; + r = ioctl(c->fd, MFIIO_PASSTHRU, &ioc); + if (r < 0) + return (r); + + if (statusp != NULL) + *statusp = dcmd->header.cmd_status; + else if (dcmd->header.cmd_status != MFI_STAT_OK) { + warnx("mfi%d: command %x returned error status %x", c->unit, + opcode, dcmd->header.cmd_status); + errno = EIO; + return (-1); + } + return (0); +} + +static void +mbox_store_device_id(uint8_t *mbox, uint16_t device_id) +{ + + mbox[0] = device_id & 0xff; + mbox[1] = device_id >> 8; +} + +static int +mfi_pd_get_list(struct mfi_controller *c, struct mfi_pd_list **listp, + uint8_t *statusp) +{ + struct mfi_pd_list *list; + uint32_t list_size; + + /* + * Keep fetching the list in a loop until we have a large enough + * buffer to hold the entire list. + */ + list = NULL; + list_size = 1024; +fetch: + list = reallocf(list, list_size); + if (list == NULL) + return (-1); + if (mfi_dcmd_command(c, MFI_DCMD_PD_GET_LIST, list, list_size, NULL, 0, + statusp) < 0) { + free(list); + return (-1); + } + + if (list->size > list_size) { + list_size = list->size; + goto fetch; + } + + *listp = list; + return (0); +} + +static int +mfi_pd_get_info(struct mfi_controller *c, uint16_t device_id, + struct mfi_pd_info *info, uint8_t *statusp) +{ + uint8_t mbox[2]; + + mbox_store_device_id(&mbox[0], device_id); + return (mfi_dcmd_command(c, MFI_DCMD_PD_GET_INFO, info, + sizeof(struct mfi_pd_info), mbox, 2, statusp)); +} + +int +mfi_drive_location(char *p, struct mfi_physical_drive *pd) +{ + + if (pd->encl_device_id == 0xffff) + return (sprintf(p, "slot %d", pd->slot_number)); + else if (pd->encl_device_id == pd->device_id) + return (sprintf(p, "enclosure %d", pd->encl_index)); + else + return (sprintf(p, "enclosure %d, slot %d", pd->encl_index, + pd->slot_number)); +} + +static void +mfi_scan_volume(struct mfi_controller *c, struct mfi_ld_config *ldc) +{ + struct mfi_logical_drive *ld; + struct mfi_array *ar; + uint8_t state; + int i, span; + + state = ldc->params.state; + + /* See if we have seen this drive before. */ + ld = c->ldrives[ldc->properties.ld.v.target_id]; + if (ld == NULL) { + ld = calloc(1, sizeof(struct mfi_logical_drive)); + ld->target_id = ldc->properties.ld.v.target_id; + c->ldrives[ld->target_id] = ld; + + ld->prev_state = state; + } else + ld->prev_state = ld->state; + + /* Update generation count and other state. */ + ld->generation = c->generation; + ld->state = state; + + /* + * Scan all the arrays this volume spans to see if this volume + * is missing any drives. + */ + ld->missing_drives = 0; + for (span = 0; span < ldc->params.span_depth; span++) { + ar = mfi_find_array(c, ldc->span[span].array_ref); + + /* Walk the array to find the backing drives. */ + for (i = 0; i < ar->num_drives; i++) + /* Missing drive. */ + if (ar->pd[i].ref.v.device_id == 0xffff) + ld->missing_drives++; + } +} + +static void +mfi_scan_volumes(struct mfi_controller *c) +{ + struct mfi_logical_drive *ld; + char *p; + int i; + + /* Find the first config. */ + p = (char *)c->config->array + + c->config->array_count * c->config->array_size; + + /* Scan all the volumes. */ + for (i = 0; i < c->config->log_drv_count; i++) { + mfi_scan_volume(c, (struct mfi_ld_config *)p); + p += c->config->log_drv_size; + } + + /* Throw away all the volumes that disappeared. */ + for (i = 0; i < MFI_MAX_LD_ID; i++) { + ld = c->ldrives[i]; + if (ld == NULL) + continue; + if (ld->generation != c->generation) { + c->ldrives[i] = NULL; + free(ld); + } + } +} + +static void +mfi_scan_drive(struct mfi_controller *c, uint16_t device_id, uint16_t state) +{ + struct mfi_physical_drive *pd; + struct mfi_pd_info info; + + /* See if we have seen this drive before. */ + pd = c->pdrives[device_id]; + if (pd == NULL) { + pd = calloc(1, sizeof(struct mfi_physical_drive)); + pd->device_id = device_id; + c->pdrives[device_id] = pd; + + if (mfi_pd_get_info(c, device_id, &info, NULL) < 0) + warn("mfi%d: Failed to get info for drive %u", c->unit, + device_id); + else { + pd->encl_device_id = info.encl_device_id; + pd->encl_index = info.encl_index; + pd->slot_number = info.slot_number; + } + } + + /* Update generation count and other state. */ + pd->generation = c->generation; + pd->state = state; + pd->missing = 0; +} + +static void +mfi_scan_drives(struct mfi_controller *c) +{ + struct mfi_physical_drive *pd; + struct mfi_pd_list *list; + struct mfi_pd_info info; + struct mfi_array *ar; + char *p; + int i, j, count; + + /* Find the first array. */ + p = (char *)c->config->array; + + /* Scan all the arrays. */ + c->missing_drives = 0; + for (i = 0; i < c->config->array_count; i++) { + ar = (struct mfi_array *)p; + + /* Scan each drive in the array. */ + for (j = 0; j < ar->num_drives; j++) { + /* Missing drive. */ + if (ar->pd[j].ref.v.device_id == 0xffff) { + c->missing_drives++; + continue; + } + mfi_scan_drive(c, ar->pd[j].ref.v.device_id, + ar->pd[j].fw_state); + } + p += c->config->array_size; + } + + /* Scan all of the physical drives to find bad drives. */ + c->prev_bad_drives = c->bad_drives; + c->bad_drives = 0; + if (mfi_pd_get_list(c, &list, NULL) < 0) + warn("mfi%d: Failed to get physical drive list", c->unit); + else { + for (i = 0; i < list->count; i++) { + if (list->addr[i].scsi_dev_type != 0) + continue; + + /* Skip drives we've already scanned above. */ + pd = c->pdrives[list->addr[i].device_id]; + if (pd != NULL && pd->generation == c->generation) + continue; + + if (mfi_pd_get_info(c, list->addr[i].device_id, &info, + NULL) < 0) { + warn("mfi%d: Failed to get info for drive %u", + c->unit, list->addr[i].device_id); + continue; + } + if (info.fw_state == MFI_PD_STATE_UNCONFIGURED_BAD) { + mfi_scan_drive(c, list->addr[i].device_id, + info.fw_state); + c->bad_drives++; + } + } + free(list); + } + + /* + * If we have any missing drives, check to see if all of the drives + * that disappeared are missing drives. + */ + if (c->missing_drives) { + count = 0; + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->generation != c->generation) + count++; + } + + if (count <= c->missing_drives) { + /* + * Ok, it looks like all of the drives that + * disappeared are known to be missing. + */ + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->generation != c->generation) { + pd->missing = 1; + pd->generation = c->generation; + } + } + } + } + + /* Throw away all the drives that disappeared. */ + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->generation != c->generation) { + c->pdrives[i] = NULL; + free(pd); + } + } +} + +static void +mfi_scan_controller(struct mfi_controller *c) +{ + uint8_t status; + int count; + + c->config_valid = 0; + + /* Start off with just the header. */ + if (c->config == NULL) { + c->config_size = sizeof(struct mfi_config_data); + c->config = malloc(sizeof(struct mfi_config_data)); + }; + +fetch: + /* Try to fetch the RAID configuration for this controller. */ + for (count = 0; count < 5; count++) { + if (mfi_dcmd_command(c, MFI_DCMD_CFG_READ, c->config, + c->config_size, NULL, 0, &status) < 0) { + warn("mfi%d: Failed to get config", c->unit); + return; + } + if (status != MFI_STAT_MEMORY_NOT_AVAILABLE) + break; + sleep(5); + } + if (status != MFI_STAT_OK) { + warnx("mfi%d: Failed to get config with error status %x", + c->unit, status); + return; + } + + /* Is the size too small? */ + if (c->config_size < c->config->size) { + c->config_size = c->config->size; + c->config = realloc(c->config, c->config_size); + if (c->config == NULL) { + warn("mfi%d: Failed to grow config object", c->unit); + return; + } + goto fetch; + } + + /* + * Ok, now we have a config. The config contains 3 arrays for us to + * process. The first array contains MFI_ARRAY objects which define + * RAID arrays of physical drives. The second array contains + * MFI_LD_CONFIG objects which define logical drives, or volumes, + * that are created by taking spans from backing MFI_ARRAYs. Finally, + * the third array consists of MFI_SPARE objects describing spare + * disks. We ignore the spares. We care about the states of the + * volumes (degraded or not) and the state of the drives backing each + * of the volumes. The MFI_LD_CONFIG objects already contain the + * state of each volume, but we need to query each of the physical + * drives to determine their state. + */ + + /* Bump the overall generation count. */ + c->generation++; + c->config_valid = 1; + + mfi_scan_volumes(c); + mfi_scan_drives(c); +} + +static void +mfi_scan_all(void) +{ + int i; + + for (i = 0; i < ncontrollers; i++) + mfi_scan_controller(&controllers[i]); +} + +static int +mfi_open(void) +{ + char path[MAXPATHLEN]; + int fd, unit; + + ncontrollers = 0; + for (unit = 0; unit < MAX_UNIT; unit++) { + snprintf(path, sizeof(path), "/dev/mfi%d", unit); + fd = open(path, O_RDWR); + if (fd < 0) + continue; + controllers[ncontrollers].fd = fd; + controllers[ncontrollers].unit = unit; + ncontrollers++; + } + if (ncontrollers == 0) + return (ncontrollers); + + mfi_scan_all(); + + return (ncontrollers); +} + +static FILE * +mailer_open(void) +{ + FILE *fp; + + if (dostdout) + fp = stdout; + else + fp = popen("/usr/sbin/sendmail -t", "w"); + fprintf(fp, "To: %s\n", mailto); + return fp; +} + +static void +mailer_close(FILE *fp) +{ + + if (dostdout == 0) + pclose(fp); + else + fflush(fp); +} + +static void +mailer_write(FILE *fp, const char *fmt, ...) +{ + va_list ap; + char *mfmt, *pfmt = NULL; + + pfmt = mfmt = strdup(fmt); + + va_start (ap, fmt); + vfprintf (fp, fmt, ap); + va_end (ap); + + /* XXX: Hack for Subject: */ + if (strncmp(fmt, "Subject: ", 9) == 0) { + char *p; + pfmt += strlen("Subject: "); + if ((p = strchr(pfmt, '\n')) != NULL) + *p = '\0'; + } + + if (dostdout == 0) { + va_start (ap, fmt); + vsyslog(LOG_CRIT, pfmt, ap); + va_end (ap); + } + + if (mfmt) + free(mfmt); +} + +static struct mfi_ld_config * +mfi_find_ld_config(struct mfi_controller *c, uint8_t target_id) +{ + struct mfi_ld_config *ld; + char *p; + int i; + + p = (char *)&c->config[1] + + c->config->array_count * c->config->array_size; + for (i = 0; i < c->config->log_drv_count; i++) { + ld = (struct mfi_ld_config *)p; + if (ld->properties.ld.v.target_id == target_id) + return (ld); + p += c->config->log_drv_size; + } + return (NULL); +} + +static struct mfi_array * +mfi_find_array(struct mfi_controller *c, uint16_t array_ref) +{ + struct mfi_array *ar; + char *p; + int i; + + p = (char *)&c->config[1]; + for (i = 0; i < c->config->array_count; i++) { + ar = (struct mfi_array *)p; + if (ar->array_ref == array_ref) + return (ar); + p += c->config->array_size; + } + return (NULL); +} + +static int +mfi_in_state(uint16_t state, struct mfi_controller *c, + struct mfi_logical_drive *ld) +{ + struct mfi_physical_drive *pd; + struct mfi_ld_config *ldc; + struct mfi_array *ar; + int i, instate, span; + + instate = 0; + + /* Find the config for this volume. */ + ldc = mfi_find_ld_config(c, ld->target_id); + + /* Walk each span for this volume. */ + for (span = 0; span < ldc->params.span_depth; span++) { + ar = mfi_find_array(c, ldc->span[span].array_ref); + + /* Walk the array to find the backing drives. */ + for (i = 0; i < ar->num_drives; i++) { + pd = c->pdrives[ar->pd[i].ref.v.device_id]; + if (pd == NULL) + continue; + if (pd->state == state) { + instate++; + } + } + } + return (instate); +} + +char * +mfi_show_state(uint16_t state, struct mfi_controller *c, + struct mfi_logical_drive *ld) +{ + struct mfi_physical_drive *pd; + struct mfi_ld_config *ldc; + struct mfi_array *ar; + int i, comma = 0, instate, span; + char *str, *p; + + instate = mfi_in_state(state, c, ld); + if (instate == 0) + return (NULL); + + str = calloc(instate * 64, sizeof(char)); + if (str == NULL) + return (NULL); + + p = str; + *p++ = '('; + + /* Find the config for this volume. */ + ldc = mfi_find_ld_config(c, ld->target_id); + + /* Walk each span for this volume. */ + for (span = 0; span < ldc->params.span_depth; span++) { + ar = mfi_find_array(c, ldc->span[span].array_ref); + + /* Walk the array to find the backing drives. */ + for (i = 0; i < ar->num_drives; i++) { + pd = c->pdrives[ar->pd[i].ref.v.device_id]; + if (pd == NULL) + continue; + if (pd->state == state) { + if (comma++) + *p++ = ','; + p += sprintf(p, "drive %u in ", pd->device_id); + p += mfi_drive_location(p, pd); + } + } + } + if ((p - str) == 1) { + int n = sprintf(p, "none"); + p += n; + } + *p = ')'; + + return (str); +} + +char * +mfi_show_missing(struct mfi_controller *c, struct mfi_logical_drive *ld) +{ + struct mfi_physical_drive *pd; + char *str, *p; + int i, comma = 0, instate; + + if (c->missing_drives == 0 || ld->missing_drives == 0) + return (NULL); + + instate = 0; + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->missing) + instate++; + } + if (instate == 0) + return (NULL); + + str = calloc(instate * 64, sizeof(char)); + if (str == NULL) + return (NULL); + + p = str; + *p++ = '('; + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->missing) { + if (comma++) + *p++ = ','; + p += sprintf(p, "drive %u in ", pd->device_id); + p += mfi_drive_location(p, pd); + } + } + if ((p - str) == 1) { + int n = sprintf(p, "none"); + p += n; + } + *p = ')'; + + return (str); +} + +char * +mfi_show_bad(struct mfi_controller *c) +{ + struct mfi_physical_drive *pd; + char *str, *p; + int i, comma = 0, instate; + + if (c->bad_drives == 0) + return (NULL); + + instate = 0; + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->state == MFI_PD_STATE_UNCONFIGURED_BAD) + instate++; + } + if (instate == 0) + return (NULL); + + str = calloc(instate * 64, sizeof(char)); + if (str == NULL) + return (NULL); + + p = str; + *p++ = '('; + for (i = 0; i < MFI_MAX_PD_ID; i++) { + pd = c->pdrives[i]; + if (pd == NULL) + continue; + if (pd->state == MFI_PD_STATE_UNCONFIGURED_BAD) { + if (comma++) + *p++ = ','; + p += sprintf(p, "drive %u in ", pd->device_id); + p += mfi_drive_location(p, pd); + } + } + if ((p - str) == 1) { + int n = sprintf(p, "none"); + p += n; + } + *p = ')'; + + return (str); +} + +static void +mfi_notify_failure(struct mfi_controller *c, struct mfi_logical_drive *ld) +{ + FILE *fp; + int *sentcnt; + char *bad, *failed, *missing, *rebuild; + + sentcnt = &ld->sentcnt; + if (ld->state == ld->prev_state && + c->bad_drives == c->prev_bad_drives && + ((*sentcnt)++ % notifyminutes) != 0) + return; + *sentcnt = 1; + c->sentcnt = 1; + + bad = mfi_show_bad(c); + rebuild = mfi_show_state(MFI_PD_STATE_REBUILD, c, ld); + failed = mfi_show_state(MFI_PD_STATE_FAILED, c, ld); + missing = mfi_show_missing(c, ld); + + fp = mailer_open(); + mailer_write(fp, "Subject: [MFI ALERT] controller %d vol %s on %s\n\n", + c->unit, mfi_volume_name(c, ld->target_id), hostname); + if (!VOLUME_DEGRADED(ld->state)) { + mailer_write(fp, + "%s: controller %d volume %s is rebuilt and no longer has errors\n", + hostname, c->unit, mfi_volume_name(c, ld->target_id)); + } else { + if (rebuild) + mailer_write(fp, + "%s: recovering to %s on controller %d volume %s\n", + hostname, rebuild, c->unit, + mfi_volume_name(c, ld->target_id)); + if (failed) + mailer_write(fp, + "%s: disk(s) on controller %d volume %s needs to be replaced: %s\n", + hostname, c->unit, + mfi_volume_name(c, ld->target_id), failed); + if (missing) + mailer_write(fp, + "%s: disk(s) on controller %d volume %s are missing: %s\n", + hostname, c->unit, + mfi_volume_name(c, ld->target_id), missing); + else if (ld->missing_drives) + mailer_write(fp, + "%s: %d disk(s) on controller %d volume %s are missing\n", + hostname, ld->missing_drives, c->unit, + mfi_volume_name(c, ld->target_id)); + } + if (bad) + mailer_write(fp, "%s: disk(s) on controller %d are bad: %s\n", + hostname, c->unit, bad); + else if (c->bad_drives) + mailer_write(fp, "%s: %d disk(s) on controller %d are bad\n", + hostname, c->bad_drives, c->unit); + else if (c->prev_bad_drives) + mailer_write(fp, + "%s: controller %d no longer has any bad disks\n", + hostname, c->unit); + + if (bad) + free(bad); + if (failed) + free(failed); + if (rebuild) + free(rebuild); + if (missing) + free(missing); + + mailer_close(fp); +} + +static void +mfi_notify_bad(struct mfi_controller *c) +{ + FILE *fp; + int *sentcnt; + char *bad; + + sentcnt = &c->sentcnt; + if (c->bad_drives == c->prev_bad_drives && + ((*sentcnt)++ % notifyminutes) != 0) + return; + *sentcnt = 1; + + bad = mfi_show_bad(c); + + fp = mailer_open(); + mailer_write(fp, "Subject: [MFI ALERT] controller %d on %s\n\n", + c->unit, hostname); + if (bad) + mailer_write(fp, "%s: disk(s) on controller %d are bad: %s\n", + hostname, c->unit, bad); + else if (c->bad_drives) + mailer_write(fp, "%s: %d disk(s) on controller %d are bad\n", + hostname, c->bad_drives, c->unit); + else + mailer_write(fp, + "%s: controller %d no longer has any bad disks\n", + hostname, c->unit); + + if (bad) + free(bad); + + mailer_close(fp); +} + +static void +mfi_check_volumes(void) +{ + struct mfi_logical_drive *ld; + struct mfi_controller *c; + int i, j, notified; + + for (i = 0; i < ncontrollers; i++) { + c = &controllers[i]; + if (!c->config_valid) + continue; + notified = 0; + for (j = 0; j < MFI_MAX_LD_ID; j++) { + ld = c->ldrives[j]; + if (ld == NULL) + continue; + + if (VOLUME_DEGRADED(ld->state) || + VOLUME_DEGRADED(ld->prev_state)) { + mfi_notify_failure(c, ld); + notified = 1; + } + } + if (!notified && + (c->bad_drives != 0 || c->prev_bad_drives != 0)) + mfi_notify_bad(c); + } +} + +static void +mfi_stop_patrol(void) +{ + struct mfi_controller *c; + struct mfi_pr_status status; + int i; + + for (i = 0; i < ncontrollers; i++) { + c = &controllers[i]; + if (c->config_valid == 0) + continue; + if (mfi_dcmd_command(c, MFI_DCMD_PR_GET_STATUS, &status, + sizeof(status), NULL, 0, NULL) < 0) + continue; + if (status.state == MFI_PR_STATE_STOPPED) + continue; + if (mfi_dcmd_command(c, MFI_DCMD_PR_STOP, NULL, 0, + NULL, 0, NULL) < 0) + warn("Failed to stop patrol reads"); + } +} + +static void +mfi_disable_patrol(void) +{ + struct mfi_controller *c; + struct mfi_pr_properties prop; + int i; + + for (i = 0; i < ncontrollers; i++) { + c = &controllers[i]; + if (c->config_valid == 0) + continue; + if (mfi_dcmd_command(c, MFI_DCMD_PR_GET_PROPERTIES, &prop, + sizeof(prop), NULL, 0, NULL) < 0) + continue; + if (prop.op_mode == MFI_PR_OPMODE_DISABLED) + continue; + prop.op_mode = MFI_PR_OPMODE_DISABLED; + if (mfi_dcmd_command(c, MFI_DCMD_PR_SET_PROPERTIES, &prop, + sizeof(prop), NULL, 0, NULL) < 0) + warn("Failed to disable patrol reads"); + } +} + +static void +usage(void) +{ + fprintf(stderr, "usage: mfid [-ds] [-t minutes] [mailto]\n"); + exit(1); +} + +int +main(int ac, char *av[]) +{ + int ch, daemonize = 1; + + while ((ch = getopt(ac, av, "dst:")) != -1) { + switch (ch) { + case 'd': + daemonize = 0; + break; + + case 't': + notifyminutes = atoi(optarg); + break; + + case 's': + dostdout = 1; + break; + case '?': + usage(); + } + } + + av += optind; + ac -= optind; + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***