From owner-svn-src-all@FreeBSD.ORG Thu Jan 16 15:10:32 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 926309DD; Thu, 16 Jan 2014 15:10:32 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 7AE6E15D6; Thu, 16 Jan 2014 15:10:32 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id s0GFAWE7050555; Thu, 16 Jan 2014 15:10:32 GMT (envelope-from avg@svn.freebsd.org) Received: (from avg@localhost) by svn.freebsd.org (8.14.7/8.14.7/Submit) id s0GFAU5L050535; Thu, 16 Jan 2014 15:10:30 GMT (envelope-from avg@svn.freebsd.org) Message-Id: <201401161510.s0GFAU5L050535@svn.freebsd.org> From: Andriy Gapon Date: Thu, 16 Jan 2014 15:10:30 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r260754 - in stable/9: cddl/contrib/opensolaris/lib/libzpool/common cddl/contrib/opensolaris/lib/libzpool/common/sys sys/cddl/compat/opensolaris/sys sys/cddl/contrib/opensolaris/uts/com... X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.17 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 16 Jan 2014 15:10:32 -0000 Author: avg Date: Thu Jan 16 15:10:29 2014 New Revision: 260754 URL: http://svnweb.freebsd.org/changeset/base/260754 Log: MFC r255437: MFV r247844 (illumos-gate 13975:ef6409bc370f) Note that a different kind of cv_timedwait_hires shim is provided in this branch because cv_timedwait_sbt is not available for better emulation. Sponsored by: HybridCluster [merge] Modified: stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h stable/9/sys/cddl/compat/opensolaris/sys/kcondvar.h stable/9/sys/cddl/compat/opensolaris/sys/time.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c Directory Properties: stable/9/cddl/contrib/opensolaris/ (props changed) stable/9/sys/ (props changed) stable/9/sys/cddl/contrib/opensolaris/ (props changed) Modified: stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c ============================================================================== --- stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Thu Jan 16 15:10:29 2014 (r260754) @@ -349,6 +349,41 @@ top: return (1); } +/*ARGSUSED*/ +clock_t +cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, + int flag) +{ + int error; + timestruc_t ts; + hrtime_t delta; + + ASSERT(flag == 0); + +top: + delta = tim - gethrtime(); + if (delta <= 0) + return (-1); + + ts.tv_sec = delta / NANOSEC; + ts.tv_nsec = delta % NANOSEC; + + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); + mp->m_owner = curthread; + + if (error == ETIMEDOUT) + return (-1); + + if (error == EINTR) + goto top; + + ASSERT(error == 0); + + return (1); +} + void cv_signal(kcondvar_t *cv) { Modified: stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h ============================================================================== --- stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu Jan 16 15:10:29 2014 (r260754) @@ -316,6 +316,8 @@ extern void cv_init(kcondvar_t *cv, char extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); +extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, + hrtime_t res, int flag); extern void cv_signal(kcondvar_t *cv); extern void cv_broadcast(kcondvar_t *cv); Modified: stable/9/sys/cddl/compat/opensolaris/sys/kcondvar.h ============================================================================== --- stable/9/sys/cddl/compat/opensolaris/sys/kcondvar.h Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/compat/opensolaris/sys/kcondvar.h Thu Jan 16 15:10:29 2014 (r260754) @@ -1,5 +1,6 @@ /*- * Copyright (c) 2007 Pawel Jakub Dawidek + * Copyright (c) 2013 iXsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +37,8 @@ #include #include +#include +#include typedef struct cv kcondvar_t; @@ -57,6 +60,18 @@ typedef enum { } while (0) #define cv_init(cv, name, type, arg) zfs_cv_init((cv), (name), (type), (arg)) +static clock_t +cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, + int flag) +{ + /* XXX real hires is not available. */ + + /* We do not attempt to support any flags yet. */ + ASSERT(flag == 0); + + return (cv_timedwait(cvp, mp, NSEC_TO_TICK(tim))); +} + #endif /* _KERNEL */ #endif /* _OPENSOLARIS_SYS_CONDVAR_H_ */ Modified: stable/9/sys/cddl/compat/opensolaris/sys/time.h ============================================================================== --- stable/9/sys/cddl/compat/opensolaris/sys/time.h Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/compat/opensolaris/sys/time.h Thu Jan 16 15:10:29 2014 (r260754) @@ -37,6 +37,9 @@ #define NANOSEC 1000000000 #define TIME_MAX LLONG_MAX +#define MSEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MILLISEC)) +#define NSEC2MSEC(n) ((n) / (NANOSEC / MILLISEC)) + typedef longlong_t hrtime_t; #if defined(__i386__) || defined(__powerpc__) Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c Thu Jan 16 15:10:29 2014 (r260754) @@ -744,7 +744,8 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx); } else { if (err == EAGAIN) { - txg_delay(dd->dd_pool, tx->tx_txg, 1); + txg_delay(dd->dd_pool, tx->tx_txg, + MSEC2NSEC(10), MSEC2NSEC(10)); err = SET_ERROR(ERESTART); } dsl_pool_memory_pressure(dd->dd_pool); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Thu Jan 16 15:10:29 2014 (r260754) @@ -85,6 +85,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, write_l &zfs_write_limit_override, 0, "Force a txg if dirty buffers exceed this value (bytes)"); +hrtime_t zfs_throttle_delay = MSEC2NSEC(10); +hrtime_t zfs_throttle_resolution = MSEC2NSEC(10); + int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { @@ -538,12 +541,13 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t * Weight the throughput calculation towards the current value: * thru = 3/4 old_thru + 1/4 new_thru * - * Note: write_time is in nanosecs, so write_time/MICROSEC - * yields millisecs + * Note: write_time is in nanosecs while dp_throughput is expressed in + * bytes per millisecond. */ ASSERT(zfs_write_limit_min > 0); - if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) { - uint64_t throughput = data_written / (write_time / MICROSEC); + if (data_written > zfs_write_limit_min / 8 && + write_time > MSEC2NSEC(1)) { + uint64_t throughput = data_written / NSEC2MSEC(write_time); if (dp->dp_throughput) dp->dp_throughput = throughput / 4 + @@ -641,8 +645,10 @@ dsl_pool_tempreserve_space(dsl_pool_t *d * the caller 1 clock tick. This will slow down the "fill" * rate until the sync process can catch up with us. */ - if (reserved && reserved > (write_limit - (write_limit >> 3))) - txg_delay(dp, tx->tx_txg, 1); + if (reserved && reserved > (write_limit - (write_limit >> 3))) { + txg_delay(dp, tx->tx_txg, zfs_throttle_delay, + zfs_throttle_resolution); + } return (0); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c Thu Jan 16 15:10:29 2014 (r260754) @@ -443,7 +443,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, co zfs_resilver_min_time_ms : zfs_scan_min_time_ms; elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || - (elapsed_nanosecs / MICROSEC > mintime && + (NSEC2MSEC(elapsed_nanosecs) > mintime && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)) { if (zb) { @@ -1348,7 +1348,7 @@ dsl_scan_free_should_pause(dsl_scan_t *s elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || - (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms && + (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)); } @@ -1472,7 +1472,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t * "free_bpobj/bptree txg %llu", (longlong_t)scn->scn_visited_this_txg, (longlong_t) - (gethrtime() - scn->scn_sync_start_time) / MICROSEC, + NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), (longlong_t)tx->tx_txg); scn->scn_visited_this_txg = 0; /* @@ -1520,7 +1520,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t * zfs_dbgmsg("visited %llu blocks in %llums", (longlong_t)scn->scn_visited_this_txg, - (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC); + (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); if (!scn->scn_pausing) { /* finished with scan. */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Thu Jan 16 15:10:29 2014 (r260754) @@ -538,8 +538,8 @@ spa_add(const char *name, nvlist_t *conf hdlr.cyh_level = CY_LOW_LEVEL; #endif - spa->spa_deadman_synctime = zfs_deadman_synctime * - zfs_txg_synctime_ms * MICROSEC; + spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime * + zfs_txg_synctime_ms); #ifdef illumos /* @@ -548,7 +548,7 @@ spa_add(const char *name, nvlist_t *conf * an expensive operation we don't want to check too frequently. * Instead wait for 5 synctimes before checking again. */ - when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC; + when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms); when.cyt_when = CY_INFINITY; mutex_enter(&cpu_lock); spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h Thu Jan 16 15:10:29 2014 (r260754) @@ -74,13 +74,8 @@ extern void txg_rele_to_quiesce(txg_hand extern void txg_rele_to_sync(txg_handle_t *txghp); extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks); -/* - * Delay the caller by the specified number of ticks or until - * the txg closes (whichever comes first). This is intended - * to be used to throttle writers when the system nears its - * capacity. - */ -extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks); +extern void txg_delay(struct dsl_pool *dp, uint64_t txg, hrtime_t delta, + hrtime_t resolution); /* * Wait until the given transaction group has finished syncing. Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h Thu Jan 16 15:10:29 2014 (r260754) @@ -70,7 +70,7 @@ struct tx_cpu { kmutex_t tc_open_lock; /* protects tx_open_txg */ kmutex_t tc_lock; /* protects the rest of this struct */ kcondvar_t tc_cv[TXG_SIZE]; - uint64_t tc_count[TXG_SIZE]; + uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */ list_t tc_callbacks[TXG_SIZE]; /* commit cb list */ char tc_pad[8]; /* pad to fill 3 cache lines */ }; @@ -87,8 +87,8 @@ struct tx_cpu { * every cpu (see txg_quiesce()). */ typedef struct tx_state { - tx_cpu_t *tx_cpu; /* protects right to enter txg */ - kmutex_t tx_sync_lock; /* protects tx_state_t */ + tx_cpu_t *tx_cpu; /* protects access to tx_open_txg */ + kmutex_t tx_sync_lock; /* protects the rest of this struct */ uint64_t tx_open_txg; /* currently open txg id */ uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */ uint64_t tx_syncing_txg; /* currently syncing txg id */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c Thu Jan 16 14:48:26 2014 (r260753) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c Thu Jan 16 15:10:29 2014 (r260754) @@ -241,7 +241,7 @@ txg_thread_exit(tx_state_t *tx, callb_cp } static void -txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time) +txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time) { CALLB_CPR_SAFE_BEGIN(cpr); @@ -370,6 +370,9 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg ASSERT(txg == tx->tx_open_txg); tx->tx_open_txg++; + DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg); + DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg); + /* * Now that we've incremented tx_open_txg, we can let threads * enter the next transaction group. @@ -501,6 +504,7 @@ txg_sync_thread(void *arg) txg = tx->tx_quiesced_txg; tx->tx_quiesced_txg = 0; tx->tx_syncing_txg = txg; + DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_quiesce_more_cv); dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", @@ -514,6 +518,7 @@ txg_sync_thread(void *arg) mutex_enter(&tx->tx_sync_lock); tx->tx_synced_txg = txg; tx->tx_syncing_txg = 0; + DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_sync_done_cv); /* @@ -563,21 +568,22 @@ txg_quiesce_thread(void *arg) */ dprintf("quiesce done, handing off txg %llu\n", txg); tx->tx_quiesced_txg = txg; + DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_sync_more_cv); cv_broadcast(&tx->tx_quiesce_done_cv); } } /* - * Delay this thread by 'ticks' if we are still in the open transaction - * group and there is already a waiting txg quiescing or quiesced. - * Abort the delay if this txg stalls or enters the quiescing state. + * Delay this thread by delay nanoseconds if we are still in the open + * transaction group and there is already a waiting txg quiesing or quiesced. + * Abort the delay if this txg stalls or enters the quiesing state. */ void -txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) +txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) { tx_state_t *tx = &dp->dp_tx; - clock_t timeout = ddi_get_lbolt() + ticks; + hrtime_t start = gethrtime(); /* don't delay if this txg could transition to quiescing immediately */ if (tx->tx_open_txg > txg || @@ -590,10 +596,11 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, return; } - while (ddi_get_lbolt() < timeout && - tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) - (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, - timeout - ddi_get_lbolt()); + while (gethrtime() - start < delay && + tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) { + (void) cv_timedwait_hires(&tx->tx_quiesce_more_cv, + &tx->tx_sync_lock, delay, resolution, 0); + } mutex_exit(&tx->tx_sync_lock); }