Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 06 Nov 2014 13:03:58 +0000
From:      Steven Hartland <steven@multiplay.co.uk>
To:        Andriy Gapon <avg@FreeBSD.org>, src-committers@freebsd.org,  svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r274172 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <545B71BE.8000605@freebsd.org>
In-Reply-To: <201411061108.sA6B821h087308@svn.freebsd.org>
References:  <201411061108.sA6B821h087308@svn.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Nice work :)

On 06/11/2014 11:08, Andriy Gapon wrote:
> Author: avg
> Date: Thu Nov  6 11:08:02 2014
> New Revision: 274172
> URL: https://svnweb.freebsd.org/changeset/base/274172
>
> Log:
>    fix l2arc compression buffers leak
>    
>    We have observed that arc_release() can be called concurrently with a
>    l2arc in-flight write.
>    Also, we have observed that arc_hdr_destroy() can be called from
>    arc_write_done() for a zio with ZIO_FLAG_IO_REWRITE flag in similar
>    circumstances.
>    
>    Previously the l2arc headers would be freed while leaking their
>    associated compression buffers.  Now the buffers are placed on
>    l2arc_free_on_write list for delayed freeing.  This is similar to what
>    was already done to arc buffers that were supposed to be freed
>    concurrently with in-flight writes of those buffers.
>    
>    In addition to fixing the discovered leaks this change also adds some
>    protective code to assert that a compression buffer associated with a
>    l2arc header is never leaked.
>    
>    A new kstat l2_cdata_free_on_write is added.  It keeps a count of
>    delayed compression buffer frees which previously would have been leaks.
>    
>    Tested by:	Vitalij Satanivskij <satan@ukr.net> et al
>    Requested by:	many
>    MFC after:	2 weeks
>    Sponsored by:	HybridCluster / ClusterHQ
>
> Modified:
>    head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
>
> Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
> ==============================================================================
> --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Thu Nov  6 10:30:10 2014	(r274171)
> +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Thu Nov  6 11:08:02 2014	(r274172)
> @@ -387,6 +387,7 @@ typedef struct arc_stats {
>   	kstat_named_t arcstat_l2_evict_lock_retry;
>   	kstat_named_t arcstat_l2_evict_reading;
>   	kstat_named_t arcstat_l2_free_on_write;
> +	kstat_named_t arcstat_l2_cdata_free_on_write;
>   	kstat_named_t arcstat_l2_abort_lowmem;
>   	kstat_named_t arcstat_l2_cksum_bad;
>   	kstat_named_t arcstat_l2_io_error;
> @@ -464,6 +465,7 @@ static arc_stats_t arc_stats = {
>   	{ "l2_evict_lock_retry",	KSTAT_DATA_UINT64 },
>   	{ "l2_evict_reading",		KSTAT_DATA_UINT64 },
>   	{ "l2_free_on_write",		KSTAT_DATA_UINT64 },
> +	{ "l2_cdata_free_on_write",	KSTAT_DATA_UINT64 },
>   	{ "l2_abort_lowmem",		KSTAT_DATA_UINT64 },
>   	{ "l2_cksum_bad",		KSTAT_DATA_UINT64 },
>   	{ "l2_io_error",		KSTAT_DATA_UINT64 },
> @@ -1655,6 +1657,21 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
>   	    data, metadata, hits);
>   }
>   
> +static void
> +arc_buf_free_on_write(void *data, size_t size,
> +    void (*free_func)(void *, size_t))
> +{
> +	l2arc_data_free_t *df;
> +
> +	df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
> +	df->l2df_data = data;
> +	df->l2df_size = size;
> +	df->l2df_func = free_func;
> +	mutex_enter(&l2arc_free_on_write_mtx);
> +	list_insert_head(l2arc_free_on_write, df);
> +	mutex_exit(&l2arc_free_on_write_mtx);
> +}
> +
>   /*
>    * Free the arc data buffer.  If it is an l2arc write in progress,
>    * the buffer is placed on l2arc_free_on_write to be freed later.
> @@ -1665,14 +1682,7 @@ arc_buf_data_free(arc_buf_t *buf, void (
>   	arc_buf_hdr_t *hdr = buf->b_hdr;
>   
>   	if (HDR_L2_WRITING(hdr)) {
> -		l2arc_data_free_t *df;
> -		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
> -		df->l2df_data = buf->b_data;
> -		df->l2df_size = hdr->b_size;
> -		df->l2df_func = free_func;
> -		mutex_enter(&l2arc_free_on_write_mtx);
> -		list_insert_head(l2arc_free_on_write, df);
> -		mutex_exit(&l2arc_free_on_write_mtx);
> +		arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
>   		ARCSTAT_BUMP(arcstat_l2_free_on_write);
>   	} else {
>   		free_func(buf->b_data, hdr->b_size);
> @@ -1684,6 +1694,23 @@ arc_buf_data_free(arc_buf_t *buf, void (
>    * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
>    */
>   static void
> +arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
> +{
> +	l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
> +
> +	ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
> +
> +	if (l2hdr->b_tmp_cdata == NULL)
> +		return;
> +
> +	ASSERT(HDR_L2_WRITING(hdr));
> +	arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
> +	    zio_data_buf_free);
> +	ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
> +	l2hdr->b_tmp_cdata = NULL;
> +}
> +
> +static void
>   arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
>   {
>   	arc_buf_t **bufp;
> @@ -1782,6 +1809,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
>   			trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
>   			    hdr->b_size, 0);
>   			list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
> +			arc_buf_l2_cdata_free(hdr);
>   			ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
>   			ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
>   			vdev_space_update(l2hdr->b_dev->l2ad_vdev,
> @@ -3675,6 +3703,7 @@ arc_release(arc_buf_t *buf, void *tag)
>   	l2hdr = hdr->b_l2hdr;
>   	if (l2hdr) {
>   		mutex_enter(&l2arc_buflist_mtx);
> +		arc_buf_l2_cdata_free(hdr);
>   		hdr->b_l2hdr = NULL;
>   		list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
>   	}
> @@ -4964,6 +4993,11 @@ top:
>   				ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
>   				bytes_evicted += abl2->b_asize;
>   				ab->b_l2hdr = NULL;
> +				/*
> +				 * We are destroying l2hdr, so ensure that
> +				 * its compressed buffer, if any, is not leaked.
> +				 */
> +				ASSERT(abl2->b_tmp_cdata == NULL);
>   				kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
>   				ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
>   			}
> @@ -5202,6 +5236,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
>   		buf_data = l2hdr->b_tmp_cdata;
>   		buf_sz = l2hdr->b_asize;
>   
> +		/*
> +		 * If the data has not been compressed, then clear b_tmp_cdata
> +		 * to make sure that it points only to a temporary compression
> +		 * buffer.
> +		 */
> +		if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
> +			l2hdr->b_tmp_cdata = NULL;
> +
>   		/* Compression may have squashed the buffer to zero length. */
>   		if (buf_sz != 0) {
>   			uint64_t buf_p_sz;
> @@ -5392,15 +5434,18 @@ l2arc_release_cdata_buf(arc_buf_hdr_t *a
>   {
>   	l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
>   
> -	if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
> +	ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
> +	if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
>   		/*
>   		 * If the data was compressed, then we've allocated a
>   		 * temporary buffer for it, so now we need to release it.
>   		 */
>   		ASSERT(l2hdr->b_tmp_cdata != NULL);
>   		zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
> +		l2hdr->b_tmp_cdata = NULL;
> +	} else {
> +		ASSERT(l2hdr->b_tmp_cdata == NULL);
>   	}
> -	l2hdr->b_tmp_cdata = NULL;
>   }
>   
>   /*
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?545B71BE.8000605>