From owner-svn-src-all@FreeBSD.ORG Wed Nov 20 11:08:08 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 24F0D98; Wed, 20 Nov 2013 11:08:08 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 04ED62198; Wed, 20 Nov 2013 11:08:08 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id rAKB87EI060753; Wed, 20 Nov 2013 11:08:07 GMT (envelope-from avg@svn.freebsd.org) Received: (from avg@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id rAKB87gf060750; Wed, 20 Nov 2013 11:08:07 GMT (envelope-from avg@svn.freebsd.org) Message-Id: <201311201108.rAKB87gf060750@svn.freebsd.org> From: Andriy Gapon Date: Wed, 20 Nov 2013 11:08:07 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r258385 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys X-SVN-Group: vendor-sys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.16 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 20 Nov 2013 11:08:08 -0000 Author: avg Date: Wed Nov 20 11:08:07 2013 New Revision: 258385 URL: http://svnweb.freebsd.org/changeset/base/258385 Log: 4128 disks in zpools never go away when pulled illumos/illumos-gate@39cddb10a31c1c2e66aed69e6871d09caa4c8147 Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_disk.h vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_disk.c Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_disk.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_disk.h Wed Nov 20 11:06:57 2013 (r258384) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_disk.h Wed Nov 20 11:08:07 2013 (r258385) @@ -22,6 +22,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2013 Joyent, Inc. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_VDEV_DISK_H @@ -44,6 +45,8 @@ typedef struct vdev_disk { ddi_devid_t vd_devid; char *vd_minor; ldi_handle_t vd_lh; + list_t vd_ldi_cbs; + boolean_t vd_ldi_offline; } vdev_disk_t; #endif Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_disk.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_disk.c Wed Nov 20 11:06:57 2013 (r258384) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_disk.c Wed Nov 20 11:08:07 2013 (r258385) @@ -22,7 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. */ #include @@ -42,6 +42,146 @@ extern ldi_ident_t zfs_li; +static void vdev_disk_close(vdev_t *); + +typedef struct vdev_disk_ldi_cb { + list_node_t lcb_next; + ldi_callback_id_t lcb_id; +} vdev_disk_ldi_cb_t; + +static void +vdev_disk_alloc(vdev_t *vd) +{ + vdev_disk_t *dvd; + + dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + /* + * Create the LDI event callback list. + */ + list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t), + offsetof(vdev_disk_ldi_cb_t, lcb_next)); +} + +static void +vdev_disk_free(vdev_t *vd) +{ + vdev_disk_t *dvd = vd->vdev_tsd; + vdev_disk_ldi_cb_t *lcb; + + if (dvd == NULL) + return; + + /* + * We have already closed the LDI handle. Clean up the LDI event + * callbacks and free vd->vdev_tsd. + */ + while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) { + list_remove(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_remove_callbacks(lcb->lcb_id); + kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t)); + } + list_destroy(&dvd->vd_ldi_cbs); + kmem_free(dvd, sizeof (vdev_disk_t)); + vd->vdev_tsd = NULL; +} + +/* ARGSUSED */ +static int +vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg, + void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + vdev_disk_t *dvd = vd->vdev_tsd; + + /* + * Ignore events other than offline. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) + return (LDI_EV_SUCCESS); + + /* + * All LDI handles must be closed for the state change to succeed, so + * call on vdev_disk_close() to do this. + * + * We inform vdev_disk_close that it is being called from offline + * notify context so it will defer cleanup of LDI event callbacks and + * freeing of vd->vdev_tsd to the offline finalize or a reopen. + */ + dvd->vd_ldi_offline = B_TRUE; + vdev_disk_close(vd); + + /* + * Now that the device is closed, request that the spa_async_thread + * mark the device as REMOVED and notify FMA of the removal. + */ + zfs_post_remove(vd->vdev_spa, vd); + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); + + return (LDI_EV_SUCCESS); +} + +/* ARGSUSED */ +static void +vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie, + int ldi_result, void *arg, void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + + /* + * Ignore events other than offline. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) + return; + + /* + * We have already closed the LDI handle in notify. + * Clean up the LDI event callbacks and free vd->vdev_tsd. + */ + vdev_disk_free(vd); + + /* + * Request that the vdev be reopened if the offline state change was + * unsuccessful. + */ + if (ldi_result != LDI_EV_SUCCESS) { + vd->vdev_probe_wanted = B_TRUE; + spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE); + } +} + +static ldi_ev_callback_t vdev_disk_off_callb = { + .cb_vers = LDI_EV_CB_VERS, + .cb_notify = vdev_disk_off_notify, + .cb_finalize = vdev_disk_off_finalize +}; + +/* ARGSUSED */ +static void +vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie, + int ldi_result, void *arg, void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + + /* + * Ignore events other than degrade. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0) + return; + + /* + * Degrade events always succeed. Mark the vdev as degraded. + * This status is purely informative for the user. + */ + (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0); +} + +static ldi_ev_callback_t vdev_disk_dgrd_callb = { + .cb_vers = LDI_EV_CB_VERS, + .cb_notify = NULL, + .cb_finalize = vdev_disk_dgrd_finalize +}; + static void vdev_disk_hold(vdev_t *vd) { @@ -146,7 +286,9 @@ vdev_disk_open(vdev_t *vd, uint64_t *psi uint64_t *ashift) { spa_t *spa = vd->vdev_spa; - vdev_disk_t *dvd; + vdev_disk_t *dvd = vd->vdev_tsd; + ldi_ev_cookie_t ecookie; + vdev_disk_ldi_cb_t *lcb; union { struct dk_minfo_ext ude; struct dk_minfo ud; @@ -172,13 +314,25 @@ vdev_disk_open(vdev_t *vd, uint64_t *psi * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ - if (vd->vdev_tsd != NULL) { - ASSERT(vd->vdev_reopening); - dvd = vd->vdev_tsd; - goto skip_open; + if (dvd != NULL) { + if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) { + /* + * If we are opening a device in its offline notify + * context, the LDI handle was just closed. Clean + * up the LDI event callbacks and free vd->vdev_tsd. + */ + vdev_disk_free(vd); + } else { + ASSERT(vd->vdev_reopening); + goto skip_open; + } } - dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + /* + * Create vd->vdev_tsd. + */ + vdev_disk_alloc(vd); + dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original @@ -211,23 +365,28 @@ vdev_disk_open(vdev_t *vd, uint64_t *psi if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); - ldi_handle_t lh; (void) snprintf(buf, len, "%ss0", vd->vdev_path); - if (ldi_open_by_name(buf, spa_mode(spa), kcred, - &lh, zfs_li) == 0) { + error = ldi_open_by_name(buf, spa_mode(spa), kcred, + &dvd->vd_lh, zfs_li); + if (error == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; - (void) ldi_close(lh, spa_mode(spa), kcred); } else { kmem_free(buf, len); } } - error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, - &dvd->vd_lh, zfs_li); + /* + * If we have not yet opened the device, try to open it by the + * specified path. + */ + if (error != 0) { + error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), + kcred, &dvd->vd_lh, zfs_li); + } /* * Compare the devid to the stored value. @@ -334,6 +493,27 @@ vdev_disk_open(vdev_t *vd, uint64_t *psi kmem_free(physpath, MAXPATHLEN); } + /* + * Register callbacks for the LDI offline event. + */ + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == + LDI_EV_SUCCESS) { + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); + list_insert_tail(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, + &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); + } + + /* + * Register callbacks for the LDI degrade event. + */ + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == + LDI_EV_SUCCESS) { + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); + list_insert_tail(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, + &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); + } skip_open: /* * Determine the actual size of the device. @@ -412,18 +592,31 @@ vdev_disk_close(vdev_t *vd) if (vd->vdev_reopening || dvd == NULL) return; - if (dvd->vd_minor != NULL) + if (dvd->vd_minor != NULL) { ddi_devid_str_free(dvd->vd_minor); + dvd->vd_minor = NULL; + } - if (dvd->vd_devid != NULL) + if (dvd->vd_devid != NULL) { ddi_devid_free(dvd->vd_devid); + dvd->vd_devid = NULL; + } - if (dvd->vd_lh != NULL) + if (dvd->vd_lh != NULL) { (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred); + dvd->vd_lh = NULL; + } vd->vdev_delayed_close = B_FALSE; - kmem_free(dvd, sizeof (vdev_disk_t)); - vd->vdev_tsd = NULL; + /* + * If we closed the LDI handle due to an offline notify from LDI, + * don't free vd->vdev_tsd or unregister the callbacks here; + * the offline finalize callback or a reopen will take care of it. + */ + if (dvd->vd_ldi_offline) + return; + + vdev_disk_free(vd); } int @@ -432,6 +625,13 @@ vdev_disk_physio(vdev_t *vd, caddr_t dat { vdev_disk_t *dvd = vd->vdev_tsd; + /* + * If the vdev is closed, it's likely in the REMOVED or FAULTED state. + * Nothing to be done here but return failure. + */ + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) + return (EIO); + ASSERT(vd->vdev_ops == &vdev_disk_ops); /* @@ -527,6 +727,15 @@ vdev_disk_io_start(zio_t *zio) buf_t *bp; int error; + /* + * If the vdev is closed, it's likely in the REMOVED or FAULTED state. + * Nothing to be done here but return failure. + */ + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) { + zio->io_error = ENXIO; + return (ZIO_PIPELINE_CONTINUE); + } + if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) {