Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 21 Mar 2021 01:25:36 GMT
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: f9693bef8dc8 - main - zfs: merge OpenZFS master-891568c99
Message-ID:  <202103210125.12L1PaZZ046850@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by mm:

URL: https://cgit.FreeBSD.org/src/commit/?id=f9693bef8dc83284e7ac905adc346f7d866b5245

commit f9693bef8dc83284e7ac905adc346f7d866b5245
Merge: 815209920f1d 48a1c304e82e
Author:     Martin Matuska <mm@FreeBSD.org>
AuthorDate: 2021-03-21 00:46:08 +0000
Commit:     Martin Matuska <mm@FreeBSD.org>
CommitDate: 2021-03-21 01:17:59 +0000

    zfs: merge OpenZFS master-891568c99
    
    Notable upstream pull request merges:
      #11652 Split dmu_zfetch() speculation and execution parts
      #11682 Fix zfs_get_data access to files with wrong generation
      #11735 Clean up RAIDZ/DRAID ereport code
      #11737 Initialize metaslab range trees in metaslab_init
      #11739 FreeBSD: make seqc asserts conditional on replay
      #11763 Allow setting bootfs property on pools with indirect vdevs
      #11767 FreeBSD: Fix memory leaks in kstats
    
    Obtained from:  OpenZFS
    MFC after:      2 weeks

 sys/contrib/openzfs/README.md                      |   2 +-
 sys/contrib/openzfs/cmd/raidz_test/raidz_test.c    |   2 -
 sys/contrib/openzfs/cmd/ztest/ztest.c              |   4 +-
 sys/contrib/openzfs/config/kernel-bio_max_segs.m4  |  23 ++
 .../openzfs/config/kernel-generic_fillattr.m4      |  28 +++
 sys/contrib/openzfs/config/kernel-inode-create.m4  |  43 +++-
 sys/contrib/openzfs/config/kernel-inode-getattr.m4 |  63 ++++-
 .../openzfs/config/kernel-is_owner_or_cap.m4       |  23 +-
 sys/contrib/openzfs/config/kernel-mkdir-umode-t.m4 |  32 ---
 sys/contrib/openzfs/config/kernel-mkdir.m4         |  65 +++++
 sys/contrib/openzfs/config/kernel-mknod.m4         |  30 +++
 sys/contrib/openzfs/config/kernel-rename.m4        |  50 +++-
 .../openzfs/config/kernel-setattr-prepare.m4       |  45 +++-
 sys/contrib/openzfs/config/kernel-symlink.m4       |  30 +++
 sys/contrib/openzfs/config/kernel-xattr-handler.m4 |  78 ++++--
 sys/contrib/openzfs/config/kernel.m4               |  20 +-
 sys/contrib/openzfs/configure.ac                   |   1 +
 .../include/os/linux/kernel/linux/kmap_compat.h    |   4 +-
 .../include/os/linux/kernel/linux/vfs_compat.h     |  24 +-
 .../include/os/linux/kernel/linux/xattr_compat.h   |  17 +-
 .../include/os/linux/zfs/sys/zfs_vnops_os.h        |   3 +-
 .../include/os/linux/zfs/sys/zfs_znode_impl.h      |   8 +-
 sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h |  18 ++
 sys/contrib/openzfs/include/sys/dmu_zfetch.h       |  23 +-
 sys/contrib/openzfs/include/sys/vdev_raidz.h       |   2 +
 sys/contrib/openzfs/include/sys/vdev_raidz_impl.h  |   7 +-
 sys/contrib/openzfs/include/sys/zil.h              |   3 +-
 sys/contrib/openzfs/include/sys/zio.h              |  10 +-
 sys/contrib/openzfs/include/sys/zvol_impl.h        |   4 +-
 .../openzfs/man/man5/zfs-module-parameters.5       |  25 +-
 sys/contrib/openzfs/man/man8/zfs-allow.8           |   3 +
 sys/contrib/openzfs/man/man8/zgenhostid.8          |   4 +-
 sys/contrib/openzfs/man/man8/zpoolconcepts.8       |  17 ++
 .../openzfs/module/os/freebsd/spl/spl_kstat.c      |  11 +-
 .../openzfs/module/os/freebsd/zfs/sysctl_os.c      |   6 -
 sys/contrib/openzfs/module/os/linux/zfs/abd_os.c   |  10 +-
 sys/contrib/openzfs/module/os/linux/zfs/policy.c   |   2 +-
 .../openzfs/module/os/linux/zfs/vdev_disk.c        |   5 +
 .../openzfs/module/os/linux/zfs/zfs_ctldir.c       |   3 +-
 sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c  |   4 +-
 .../openzfs/module/os/linux/zfs/zfs_vfsops.c       |   6 +-
 .../openzfs/module/os/linux/zfs/zfs_vnops_os.c     |   5 +-
 .../openzfs/module/os/linux/zfs/zpl_ctldir.c       |  51 +++-
 sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c |   2 +-
 .../openzfs/module/os/linux/zfs/zpl_inode.c        |  52 +++-
 .../openzfs/module/os/linux/zfs/zpl_xattr.c        |   4 +-
 sys/contrib/openzfs/module/zfs/dbuf.c              |   5 +-
 sys/contrib/openzfs/module/zfs/dmu.c               |  35 ++-
 sys/contrib/openzfs/module/zfs/dmu_zfetch.c        | 250 +++++++++++--------
 sys/contrib/openzfs/module/zfs/metaslab.c          | 149 +++++-------
 sys/contrib/openzfs/module/zfs/refcount.c          |  10 +-
 sys/contrib/openzfs/module/zfs/vdev.c              |   4 +-
 sys/contrib/openzfs/module/zfs/vdev_draid.c        | 240 +------------------
 sys/contrib/openzfs/module/zfs/vdev_indirect.c     |   1 -
 sys/contrib/openzfs/module/zfs/vdev_mirror.c       |   5 +-
 sys/contrib/openzfs/module/zfs/vdev_raidz.c        | 266 +++------------------
 sys/contrib/openzfs/module/zfs/zfs_fm.c            |   8 +-
 sys/contrib/openzfs/module/zfs/zfs_fuid.c          |   4 -
 sys/contrib/openzfs/module/zfs/zfs_log.c           |   5 +
 sys/contrib/openzfs/module/zfs/zfs_vnops.c         |  14 +-
 sys/contrib/openzfs/module/zfs/zil.c               |   3 +-
 sys/contrib/openzfs/module/zfs/zio.c               |   4 +-
 sys/contrib/openzfs/module/zfs/zvol.c              |   3 +-
 sys/contrib/openzfs/tests/runfiles/common.run      |   8 +-
 sys/contrib/openzfs/tests/runfiles/freebsd.run     |   4 +
 sys/contrib/openzfs/tests/runfiles/sanity.run      |   4 +
 .../zfs-tests/tests/functional/acl/Makefile.am     |   2 +-
 .../zfs-tests/tests/functional/acl/off/.gitignore  |   1 +
 .../zfs-tests/tests/functional/acl/off/Makefile.am |  16 ++
 .../zfs-tests/tests/functional/acl/off/cleanup.ksh |  33 +++
 .../zfs-tests/tests/functional/acl/off/dosmode.ksh | 199 +++++++++++++++
 .../functional/acl/off/dosmode_readonly_write.c    |  61 +++++
 .../tests/functional/acl/off/posixmode.ksh         | 145 +++++++++++
 .../zfs-tests/tests/functional/acl/off/setup.ksh   |  44 ++++
 .../tests/functional/redacted_send/Makefile.am     |   1 +
 .../functional/redacted_send/redacted_panic.ksh    |  44 ++++
 sys/modules/zfs/zfs_config.h                       |   4 +-
 77 files changed, 1561 insertions(+), 883 deletions(-)

diff --cc sys/contrib/openzfs/README.md
index 31d99386e90e,000000000000..d666df7af309
mode 100644,000000..100644
--- a/sys/contrib/openzfs/README.md
+++ b/sys/contrib/openzfs/README.md
@@@ -1,35 -1,0 +1,35 @@@
 +![img](https://openzfs.github.io/openzfs-docs/_static/img/logo/480px-Open-ZFS-Secondary-Logo-Colour-halfsize.png)
 +
 +OpenZFS is an advanced file system and volume manager which was originally
 +developed for Solaris and is now maintained by the OpenZFS community.
 +This repository contains the code for running OpenZFS on Linux and FreeBSD.
 +
 +[![codecov](https://codecov.io/gh/openzfs/zfs/branch/master/graph/badge.svg)](https://codecov.io/gh/openzfs/zfs)
 +[![coverity](https://scan.coverity.com/projects/1973/badge.svg)](https://scan.coverity.com/projects/openzfs-zfs)
 +
 +# Official Resources
 +
 +  * [Documentation](https://openzfs.github.io/openzfs-docs/) - for using and developing this repo
 +  * [ZoL Site](https://zfsonlinux.org) - Linux release info & links
 +  * [Mailing lists](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
 +  * [OpenZFS site](http://open-zfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)
 +
 +# Installation
 +
 +Full documentation for installing OpenZFS on your favorite operating system can
 +be found at the [Getting Started Page](https://openzfs.github.io/openzfs-docs/Getting%20Started/index.html).
 +
 +# Contribute & Develop
 +
 +We have a separate document with [contribution guidelines](./.github/CONTRIBUTING.md).
 +
 +We have a [Code of Conduct](./CODE_OF_CONDUCT.md).
 +
 +# Release
 +
 +OpenZFS is released under a CDDL license.
 +For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
 +
 +# Supported Kernels
 +  * The `META` file contains the officially recognized supported Linux kernel versions.
-   * Supported FreeBSD versions are 12-STABLE and 13-CURRENT.
++  * Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
diff --cc sys/contrib/openzfs/config/kernel-bio_max_segs.m4
index 000000000000,a90d75455c13..a90d75455c13
mode 000000,100644..100644
--- a/sys/contrib/openzfs/config/kernel-bio_max_segs.m4
+++ b/sys/contrib/openzfs/config/kernel-bio_max_segs.m4
diff --cc sys/contrib/openzfs/config/kernel-generic_fillattr.m4
index 000000000000,50c8031305b3..50c8031305b3
mode 000000,100644..100644
--- a/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
+++ b/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
diff --cc sys/contrib/openzfs/config/kernel-mkdir.m4
index 000000000000,a162bcd880ff..a162bcd880ff
mode 000000,100644..100644
--- a/sys/contrib/openzfs/config/kernel-mkdir.m4
+++ b/sys/contrib/openzfs/config/kernel-mkdir.m4
diff --cc sys/contrib/openzfs/config/kernel-mknod.m4
index 000000000000,ffe45106003a..ffe45106003a
mode 000000,100644..100644
--- a/sys/contrib/openzfs/config/kernel-mknod.m4
+++ b/sys/contrib/openzfs/config/kernel-mknod.m4
diff --cc sys/contrib/openzfs/config/kernel-symlink.m4
index 000000000000,d90366d04b72..d90366d04b72
mode 000000,100644..100644
--- a/sys/contrib/openzfs/config/kernel-symlink.m4
+++ b/sys/contrib/openzfs/config/kernel-symlink.m4
diff --cc sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index 3b0f824115f8,000000000000..3e3fda20c72c
mode 100644,000000..100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@@ -1,333 -1,0 +1,333 @@@
 +/*
 + * CDDL HEADER START
 + *
 + * The contents of this file are subject to the terms of the
 + * Common Development and Distribution License (the "License").
 + * You may not use this file except in compliance with the License.
 + *
 + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 + * or http://www.opensolaris.org/os/licensing.
 + * See the License for the specific language governing permissions
 + * and limitations under the License.
 + *
 + * When distributing Covered Code, include this CDDL HEADER in each
 + * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 + * If applicable, add the following below this CDDL HEADER, with the
 + * fields enclosed by brackets "[]" replaced with your own identifying
 + * information: Portions Copyright [yyyy] [name of copyright owner]
 + *
 + * CDDL HEADER END
 + */
 +/*
 + * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 + * Use is subject to license terms.
 + */
 +
 +/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 +/*	  All Rights Reserved	*/
 +
 +/*
 + * University Copyright- Copyright (c) 1982, 1986, 1988
 + * The Regents of the University of California
 + * All Rights Reserved
 + *
 + * University Acknowledgment- Portions of this document are derived from
 + * software developed by the University of California, Berkeley, and its
 + * contributors.
 + */
 +/*
 + * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
 + */
 +
 +#ifdef _KERNEL
 +
 +#include <sys/types.h>
 +#include <sys/uio_impl.h>
 +#include <sys/sysmacros.h>
 +#include <sys/strings.h>
 +#include <linux/kmap_compat.h>
 +#include <linux/uaccess.h>
 +
 +/*
 + * Move "n" bytes at byte address "p"; "rw" indicates the direction
 + * of the move, and the I/O parameters are provided in "uio", which is
 + * update to reflect the data which was moved.  Returns 0 on success or
 + * a non-zero errno on failure.
 + */
 +static int
 +zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 +{
 +	const struct iovec *iov = uio->uio_iov;
 +	size_t skip = uio->uio_skip;
 +	ulong_t cnt;
 +
 +	while (n && uio->uio_resid) {
 +		cnt = MIN(iov->iov_len - skip, n);
 +		switch (uio->uio_segflg) {
 +		case UIO_USERSPACE:
 +			/*
 +			 * p = kernel data pointer
 +			 * iov->iov_base = user data pointer
 +			 */
 +			if (rw == UIO_READ) {
 +				if (copy_to_user(iov->iov_base+skip, p, cnt))
 +					return (EFAULT);
 +			} else {
 +				unsigned long b_left = 0;
 +				if (uio->uio_fault_disable) {
 +					if (!zfs_access_ok(VERIFY_READ,
 +					    (iov->iov_base + skip), cnt)) {
 +						return (EFAULT);
 +					}
 +					pagefault_disable();
 +					b_left =
 +					    __copy_from_user_inatomic(p,
 +					    (iov->iov_base + skip), cnt);
 +					pagefault_enable();
 +				} else {
 +					b_left =
 +					    copy_from_user(p,
 +					    (iov->iov_base + skip), cnt);
 +				}
 +				if (b_left > 0) {
 +					unsigned long c_bytes =
 +					    cnt - b_left;
 +					uio->uio_skip += c_bytes;
 +					ASSERT3U(uio->uio_skip, <,
 +					    iov->iov_len);
 +					uio->uio_resid -= c_bytes;
 +					uio->uio_loffset += c_bytes;
 +					return (EFAULT);
 +				}
 +			}
 +			break;
 +		case UIO_SYSSPACE:
 +			if (rw == UIO_READ)
 +				bcopy(p, iov->iov_base + skip, cnt);
 +			else
 +				bcopy(iov->iov_base + skip, p, cnt);
 +			break;
 +		default:
 +			ASSERT(0);
 +		}
 +		skip += cnt;
 +		if (skip == iov->iov_len) {
 +			skip = 0;
 +			uio->uio_iov = (++iov);
 +			uio->uio_iovcnt--;
 +		}
 +		uio->uio_skip = skip;
 +		uio->uio_resid -= cnt;
 +		uio->uio_loffset += cnt;
 +		p = (caddr_t)p + cnt;
 +		n -= cnt;
 +	}
 +	return (0);
 +}
 +
 +static int
 +zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 +{
 +	const struct bio_vec *bv = uio->uio_bvec;
 +	size_t skip = uio->uio_skip;
 +	ulong_t cnt;
 +
 +	while (n && uio->uio_resid) {
 +		void *paddr;
 +		cnt = MIN(bv->bv_len - skip, n);
 +
- 		paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
++		paddr = zfs_kmap_atomic(bv->bv_page);
 +		if (rw == UIO_READ)
 +			bcopy(p, paddr + bv->bv_offset + skip, cnt);
 +		else
 +			bcopy(paddr + bv->bv_offset + skip, p, cnt);
- 		zfs_kunmap_atomic(paddr, KM_USER1);
++		zfs_kunmap_atomic(paddr);
 +
 +		skip += cnt;
 +		if (skip == bv->bv_len) {
 +			skip = 0;
 +			uio->uio_bvec = (++bv);
 +			uio->uio_iovcnt--;
 +		}
 +		uio->uio_skip = skip;
 +		uio->uio_resid -= cnt;
 +		uio->uio_loffset += cnt;
 +		p = (caddr_t)p + cnt;
 +		n -= cnt;
 +	}
 +	return (0);
 +}
 +
 +#if defined(HAVE_VFS_IOV_ITER)
 +static int
 +zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
 +    boolean_t revert)
 +{
 +	size_t cnt = MIN(n, uio->uio_resid);
 +
 +	if (uio->uio_skip)
 +		iov_iter_advance(uio->uio_iter, uio->uio_skip);
 +
 +	if (rw == UIO_READ)
 +		cnt = copy_to_iter(p, cnt, uio->uio_iter);
 +	else
 +		cnt = copy_from_iter(p, cnt, uio->uio_iter);
 +
 +	/*
 +	 * When operating on a full pipe no bytes are processed.
 +	 * In which case return EFAULT which is converted to EAGAIN
 +	 * by the kernel's generic_file_splice_read() function.
 +	 */
 +	if (cnt == 0)
 +		return (EFAULT);
 +
 +	/*
 +	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
 +	 * to avoid consuming the uio and its iov_iter structure.
 +	 */
 +	if (revert)
 +		iov_iter_revert(uio->uio_iter, cnt);
 +
 +	uio->uio_resid -= cnt;
 +	uio->uio_loffset += cnt;
 +
 +	return (0);
 +}
 +#endif
 +
 +int
 +zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 +{
 +	if (uio->uio_segflg == UIO_BVEC)
 +		return (zfs_uiomove_bvec(p, n, rw, uio));
 +#if defined(HAVE_VFS_IOV_ITER)
 +	else if (uio->uio_segflg == UIO_ITER)
 +		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
 +#endif
 +	else
 +		return (zfs_uiomove_iov(p, n, rw, uio));
 +}
 +EXPORT_SYMBOL(zfs_uiomove);
 +
 +/*
 + * Fault in the pages of the first n bytes specified by the uio structure.
 + * 1 byte in each page is touched and the uio struct is unmodified. Any
 + * error will terminate the process as this is only a best attempt to get
 + * the pages resident.
 + */
 +int
 +zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
 +{
 +	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
 +		/* There's never a need to fault in kernel pages */
 +		return (0);
 +#if defined(HAVE_VFS_IOV_ITER)
 +	} else if (uio->uio_segflg == UIO_ITER) {
 +		/*
 +		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
 +		 * can be relied on to fault in user pages when referenced.
 +		 */
 +		if (iov_iter_fault_in_readable(uio->uio_iter, n))
 +			return (EFAULT);
 +#endif
 +	} else {
 +		/* Fault in all user pages */
 +		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
 +		const struct iovec *iov = uio->uio_iov;
 +		int iovcnt = uio->uio_iovcnt;
 +		size_t skip = uio->uio_skip;
 +		uint8_t tmp;
 +		caddr_t p;
 +
 +		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
 +			ulong_t cnt = MIN(iov->iov_len - skip, n);
 +			/* empty iov */
 +			if (cnt == 0)
 +				continue;
 +			n -= cnt;
 +			/* touch each page in this segment. */
 +			p = iov->iov_base + skip;
 +			while (cnt) {
 +				if (get_user(tmp, (uint8_t *)p))
 +					return (EFAULT);
 +				ulong_t incr = MIN(cnt, PAGESIZE);
 +				p += incr;
 +				cnt -= incr;
 +			}
 +			/* touch the last byte in case it straddles a page. */
 +			p--;
 +			if (get_user(tmp, (uint8_t *)p))
 +				return (EFAULT);
 +		}
 +	}
 +
 +	if (iterp && iov_iter_fault_in_readable(iterp, n))
 +		return (EFAULT);
 +#endif
 +	return (0);
 +}
 +EXPORT_SYMBOL(zfs_uio_prefaultpages);
 +
 +/*
 + * The same as zfs_uiomove() but doesn't modify uio structure.
 + * return in cbytes how many bytes were copied.
 + */
 +int
 +zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
 +{
 +	zfs_uio_t uio_copy;
 +	int ret;
 +
 +	bcopy(uio, &uio_copy, sizeof (zfs_uio_t));
 +
 +	if (uio->uio_segflg == UIO_BVEC)
 +		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
 +#if defined(HAVE_VFS_IOV_ITER)
 +	else if (uio->uio_segflg == UIO_ITER)
 +		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
 +#endif
 +	else
 +		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
 +
 +	*cbytes = uio->uio_resid - uio_copy.uio_resid;
 +
 +	return (ret);
 +}
 +EXPORT_SYMBOL(zfs_uiocopy);
 +
 +/*
 + * Drop the next n chars out of *uio.
 + */
 +void
 +zfs_uioskip(zfs_uio_t *uio, size_t n)
 +{
 +	if (n > uio->uio_resid)
 +		return;
 +
 +	if (uio->uio_segflg == UIO_BVEC) {
 +		uio->uio_skip += n;
 +		while (uio->uio_iovcnt &&
 +		    uio->uio_skip >= uio->uio_bvec->bv_len) {
 +			uio->uio_skip -= uio->uio_bvec->bv_len;
 +			uio->uio_bvec++;
 +			uio->uio_iovcnt--;
 +		}
 +#if defined(HAVE_VFS_IOV_ITER)
 +	} else if (uio->uio_segflg == UIO_ITER) {
 +		iov_iter_advance(uio->uio_iter, n);
 +#endif
 +	} else {
 +		uio->uio_skip += n;
 +		while (uio->uio_iovcnt &&
 +		    uio->uio_skip >= uio->uio_iov->iov_len) {
 +			uio->uio_skip -= uio->uio_iov->iov_len;
 +			uio->uio_iov++;
 +			uio->uio_iovcnt--;
 +		}
 +	}
 +	uio->uio_loffset += n;
 +	uio->uio_resid -= n;
 +}
 +EXPORT_SYMBOL(zfs_uioskip);
 +
 +#endif /* _KERNEL */
diff --cc sys/contrib/openzfs/module/zfs/zio.c
index 262ca24b1443,000000000000..a7820e75670b
mode 100644,000000..100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@@ -1,5036 -1,0 +1,5036 @@@
 +/*
 + * CDDL HEADER START
 + *
 + * The contents of this file are subject to the terms of the
 + * Common Development and Distribution License (the "License").
 + * You may not use this file except in compliance with the License.
 + *
 + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 + * or http://www.opensolaris.org/os/licensing.
 + * See the License for the specific language governing permissions
 + * and limitations under the License.
 + *
 + * When distributing Covered Code, include this CDDL HEADER in each
 + * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 + * If applicable, add the following below this CDDL HEADER, with the
 + * fields enclosed by brackets "[]" replaced with your own identifying
 + * information: Portions Copyright [yyyy] [name of copyright owner]
 + *
 + * CDDL HEADER END
 + */
 +/*
 + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
 + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
 + * Copyright (c) 2017, Intel Corporation.
 + * Copyright (c) 2019, Klara Inc.
 + * Copyright (c) 2019, Allan Jude
 + */
 +
 +#include <sys/sysmacros.h>
 +#include <sys/zfs_context.h>
 +#include <sys/fm/fs/zfs.h>
 +#include <sys/spa.h>
 +#include <sys/txg.h>
 +#include <sys/spa_impl.h>
 +#include <sys/vdev_impl.h>
 +#include <sys/vdev_trim.h>
 +#include <sys/zio_impl.h>
 +#include <sys/zio_compress.h>
 +#include <sys/zio_checksum.h>
 +#include <sys/dmu_objset.h>
 +#include <sys/arc.h>
 +#include <sys/ddt.h>
 +#include <sys/blkptr.h>
 +#include <sys/zfeature.h>
 +#include <sys/dsl_scan.h>
 +#include <sys/metaslab_impl.h>
 +#include <sys/time.h>
 +#include <sys/trace_zfs.h>
 +#include <sys/abd.h>
 +#include <sys/dsl_crypt.h>
 +#include <cityhash.h>
 +
 +/*
 + * ==========================================================================
 + * I/O type descriptions
 + * ==========================================================================
 + */
 +const char *zio_type_name[ZIO_TYPES] = {
 +	/*
 +	 * Note: Linux kernel thread name length is limited
 +	 * so these names will differ from upstream open zfs.
 +	 */
 +	"z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl", "z_trim"
 +};
 +
 +int zio_dva_throttle_enabled = B_TRUE;
 +int zio_deadman_log_all = B_FALSE;
 +
 +/*
 + * ==========================================================================
 + * I/O kmem caches
 + * ==========================================================================
 + */
 +kmem_cache_t *zio_cache;
 +kmem_cache_t *zio_link_cache;
 +kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 +kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 +#if defined(ZFS_DEBUG) && !defined(_KERNEL)
 +uint64_t zio_buf_cache_allocs[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 +uint64_t zio_buf_cache_frees[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 +#endif
 +
 +/* Mark IOs as "slow" if they take longer than 30 seconds */
 +int zio_slow_io_ms = (30 * MILLISEC);
 +
 +#define	BP_SPANB(indblkshift, level) \
 +	(((uint64_t)1) << ((level) * ((indblkshift) - SPA_BLKPTRSHIFT)))
 +#define	COMPARE_META_LEVEL	0x80000000ul
 +/*
 + * The following actions directly effect the spa's sync-to-convergence logic.
 + * The values below define the sync pass when we start performing the action.
 + * Care should be taken when changing these values as they directly impact
 + * spa_sync() performance. Tuning these values may introduce subtle performance
 + * pathologies and should only be done in the context of performance analysis.
 + * These tunables will eventually be removed and replaced with #defines once
 + * enough analysis has been done to determine optimal values.
 + *
 + * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that
 + * regular blocks are not deferred.
 + *
 + * Starting in sync pass 8 (zfs_sync_pass_dont_compress), we disable
 + * compression (including of metadata).  In practice, we don't have this
 + * many sync passes, so this has no effect.
 + *
 + * The original intent was that disabling compression would help the sync
 + * passes to converge. However, in practice disabling compression increases
 + * the average number of sync passes, because when we turn compression off, a
 + * lot of block's size will change and thus we have to re-allocate (not
 + * overwrite) them. It also increases the number of 128KB allocations (e.g.
 + * for indirect blocks and spacemaps) because these will not be compressed.
 + * The 128K allocations are especially detrimental to performance on highly
 + * fragmented systems, which may have very few free segments of this size,
 + * and may need to load new metaslabs to satisfy 128K allocations.
 + */
 +int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */
 +int zfs_sync_pass_dont_compress = 8; /* don't compress starting in this pass */
 +int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */
 +
 +/*
 + * An allocating zio is one that either currently has the DVA allocate
 + * stage set or will have it later in its lifetime.
 + */
 +#define	IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE)
 +
 +/*
 + * Enable smaller cores by excluding metadata
 + * allocations as well.
 + */
 +int zio_exclude_metadata = 0;
 +int zio_requeue_io_start_cut_in_line = 1;
 +
 +#ifdef ZFS_DEBUG
 +int zio_buf_debug_limit = 16384;
 +#else
 +int zio_buf_debug_limit = 0;
 +#endif
 +
 +static inline void __zio_execute(zio_t *zio);
 +
 +static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
 +
 +void
 +zio_init(void)
 +{
 +	size_t c;
 +
 +	zio_cache = kmem_cache_create("zio_cache",
 +	    sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 +	zio_link_cache = kmem_cache_create("zio_link_cache",
 +	    sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 +
 +	/*
 +	 * For small buffers, we want a cache for each multiple of
 +	 * SPA_MINBLOCKSIZE.  For larger buffers, we want a cache
 +	 * for each quarter-power of 2.
 +	 */
 +	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
 +		size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
 +		size_t p2 = size;
 +		size_t align = 0;
 +		size_t data_cflags, cflags;
 +
 +		data_cflags = KMC_NODEBUG;
 +		cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
 +		    KMC_NODEBUG : 0;
 +
 +#if defined(_ILP32) && defined(_KERNEL)
 +		/*
 +		 * Cache size limited to 1M on 32-bit platforms until ARC
 +		 * buffers no longer require virtual address space.
 +		 */
 +		if (size > zfs_max_recordsize)
 +			break;
 +#endif
 +
 +		while (!ISP2(p2))
 +			p2 &= p2 - 1;
 +
 +#ifndef _KERNEL
 +		/*
 +		 * If we are using watchpoints, put each buffer on its own page,
 +		 * to eliminate the performance overhead of trapping to the
 +		 * kernel when modifying a non-watched buffer that shares the
 +		 * page with a watched buffer.
 +		 */
 +		if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
 +			continue;
 +		/*
 +		 * Here's the problem - on 4K native devices in userland on
 +		 * Linux using O_DIRECT, buffers must be 4K aligned or I/O
 +		 * will fail with EINVAL, causing zdb (and others) to coredump.
 +		 * Since userland probably doesn't need optimized buffer caches,
 +		 * we just force 4K alignment on everything.
 +		 */
 +		align = 8 * SPA_MINBLOCKSIZE;
 +#else
 +		if (size < PAGESIZE) {
 +			align = SPA_MINBLOCKSIZE;
 +		} else if (IS_P2ALIGNED(size, p2 >> 2)) {
 +			align = PAGESIZE;
 +		}
 +#endif
 +
 +		if (align != 0) {
 +			char name[36];
 +			if (cflags == data_cflags) {
 +				/*
 +				 * Resulting kmem caches would be identical.
 +				 * Save memory by creating only one.
 +				 */
 +				(void) snprintf(name, sizeof (name),
 +				    "zio_buf_comb_%lu", (ulong_t)size);
 +				zio_buf_cache[c] = kmem_cache_create(name,
 +				    size, align, NULL, NULL, NULL, NULL, NULL,
 +				    cflags);
 +				zio_data_buf_cache[c] = zio_buf_cache[c];
 +				continue;
 +			}
 +			(void) snprintf(name, sizeof (name), "zio_buf_%lu",
 +			    (ulong_t)size);
 +			zio_buf_cache[c] = kmem_cache_create(name, size,
 +			    align, NULL, NULL, NULL, NULL, NULL, cflags);
 +
 +			(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
 +			    (ulong_t)size);
 +			zio_data_buf_cache[c] = kmem_cache_create(name, size,
 +			    align, NULL, NULL, NULL, NULL, NULL, data_cflags);
 +		}
 +	}
 +
 +	while (--c != 0) {
 +		ASSERT(zio_buf_cache[c] != NULL);
 +		if (zio_buf_cache[c - 1] == NULL)
 +			zio_buf_cache[c - 1] = zio_buf_cache[c];
 +
 +		ASSERT(zio_data_buf_cache[c] != NULL);
 +		if (zio_data_buf_cache[c - 1] == NULL)
 +			zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
 +	}
 +
 +	zio_inject_init();
 +
 +	lz4_init();
 +}
 +
 +void
 +zio_fini(void)
 +{
 +	size_t i, j, n;
 +	kmem_cache_t *cache;
 +
 +	n = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT;
 +
 +#if defined(ZFS_DEBUG) && !defined(_KERNEL)
 +	for (i = 0; i < n; i++) {
 +		if (zio_buf_cache_allocs[i] != zio_buf_cache_frees[i])
 +			(void) printf("zio_fini: [%d] %llu != %llu\n",
 +			    (int)((i + 1) << SPA_MINBLOCKSHIFT),
 +			    (long long unsigned)zio_buf_cache_allocs[i],
 +			    (long long unsigned)zio_buf_cache_frees[i]);
 +	}
 +#endif
 +
 +	/*
 +	 * The same kmem cache can show up multiple times in both zio_buf_cache
 +	 * and zio_data_buf_cache. Do a wasteful but trivially correct scan to
 +	 * sort it out.
 +	 */
 +	for (i = 0; i < n; i++) {
 +		cache = zio_buf_cache[i];
 +		if (cache == NULL)
 +			continue;
 +		for (j = i; j < n; j++) {
 +			if (cache == zio_buf_cache[j])
 +				zio_buf_cache[j] = NULL;
 +			if (cache == zio_data_buf_cache[j])
 +				zio_data_buf_cache[j] = NULL;
 +		}
 +		kmem_cache_destroy(cache);
 +	}
 +
 +	for (i = 0; i < n; i++) {
 +		cache = zio_data_buf_cache[i];
 +		if (cache == NULL)
 +			continue;
 +		for (j = i; j < n; j++) {
 +			if (cache == zio_data_buf_cache[j])
 +				zio_data_buf_cache[j] = NULL;
 +		}
 +		kmem_cache_destroy(cache);
 +	}
 +
 +	for (i = 0; i < n; i++) {
 +		if (zio_buf_cache[i] != NULL)
 +			panic("zio_fini: zio_buf_cache[%d] != NULL", (int)i);
 +		if (zio_data_buf_cache[i] != NULL)
 +			panic("zio_fini: zio_data_buf_cache[%d] != NULL", (int)i);
 +	}
 +
 +	kmem_cache_destroy(zio_link_cache);
 +	kmem_cache_destroy(zio_cache);
 +
 +	zio_inject_fini();
 +
 +	lz4_fini();
 +}
 +
 +/*
 + * ==========================================================================
 + * Allocate and free I/O buffers
 + * ==========================================================================
 + */
 +
 +/*
 + * Use zio_buf_alloc to allocate ZFS metadata.  This data will appear in a
 + * crashdump if the kernel panics, so use it judiciously.  Obviously, it's
 + * useful to inspect ZFS metadata, but if possible, we should avoid keeping
 + * excess / transient data in-core during a crashdump.
 + */
 +void *
 +zio_buf_alloc(size_t size)
 +{
 +	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 +
 +	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 +#if defined(ZFS_DEBUG) && !defined(_KERNEL)
 +	atomic_add_64(&zio_buf_cache_allocs[c], 1);
 +#endif
 +
 +	return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
 +}
 +
 +/*
 + * Use zio_data_buf_alloc to allocate data.  The data will not appear in a
 + * crashdump if the kernel panics.  This exists so that we will limit the amount
 + * of ZFS data that shows up in a kernel crashdump.  (Thus reducing the amount
 + * of kernel heap dumped to disk when the kernel panics)
 + */
 +void *
 +zio_data_buf_alloc(size_t size)
 +{
 +	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 +
 +	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 +
 +	return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
 +}
 +
 +void
 +zio_buf_free(void *buf, size_t size)
 +{
 +	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 +
 +	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 +#if defined(ZFS_DEBUG) && !defined(_KERNEL)
 +	atomic_add_64(&zio_buf_cache_frees[c], 1);
 +#endif
 +
 +	kmem_cache_free(zio_buf_cache[c], buf);
 +}
 +
 +void
 +zio_data_buf_free(void *buf, size_t size)
 +{
 +	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 +
 +	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 +
 +	kmem_cache_free(zio_data_buf_cache[c], buf);
 +}
 +
 +static void
 +zio_abd_free(void *abd, size_t size)
 +{
 +	abd_free((abd_t *)abd);
 +}
 +
 +/*
 + * ==========================================================================
 + * Push and pop I/O transform buffers
 + * ==========================================================================
 + */
 +void
 +zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize,
 +    zio_transform_func_t *transform)
 +{
 +	zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
 +
 +	zt->zt_orig_abd = zio->io_abd;
 +	zt->zt_orig_size = zio->io_size;
 +	zt->zt_bufsize = bufsize;
 +	zt->zt_transform = transform;
 +
 +	zt->zt_next = zio->io_transform_stack;
 +	zio->io_transform_stack = zt;
 +
 +	zio->io_abd = data;
 +	zio->io_size = size;
 +}
 +
 +void
 +zio_pop_transforms(zio_t *zio)
 +{
 +	zio_transform_t *zt;
 +
 +	while ((zt = zio->io_transform_stack) != NULL) {
 +		if (zt->zt_transform != NULL)
 +			zt->zt_transform(zio,
 +			    zt->zt_orig_abd, zt->zt_orig_size);
 +
 +		if (zt->zt_bufsize != 0)
 +			abd_free(zio->io_abd);
 +
 +		zio->io_abd = zt->zt_orig_abd;
 +		zio->io_size = zt->zt_orig_size;
 +		zio->io_transform_stack = zt->zt_next;
 +
 +		kmem_free(zt, sizeof (zio_transform_t));
 +	}
 +}
 +
 +/*
 + * ==========================================================================
 + * I/O transform callbacks for subblocks, decompression, and decryption
 + * ==========================================================================
 + */
 +static void
 +zio_subblock(zio_t *zio, abd_t *data, uint64_t size)
 +{
 +	ASSERT(zio->io_size > size);
 +
 +	if (zio->io_type == ZIO_TYPE_READ)
 +		abd_copy(data, zio->io_abd, size);
 +}
 +
 +static void
 +zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
 +{
 +	if (zio->io_error == 0) {
 +		void *tmp = abd_borrow_buf(data, size);
 +		int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
 +		    zio->io_abd, tmp, zio->io_size, size,
 +		    &zio->io_prop.zp_complevel);
 +		abd_return_buf_copy(data, tmp, size);
 +
 +		if (zio_injection_enabled && ret == 0)
 +			ret = zio_handle_fault_injection(zio, EINVAL);
 +
 +		if (ret != 0)
 +			zio->io_error = SET_ERROR(EIO);
 +	}
 +}
 +
 +static void
 +zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
 +{
 +	int ret;
 +	void *tmp;
 +	blkptr_t *bp = zio->io_bp;
 +	spa_t *spa = zio->io_spa;
 +	uint64_t dsobj = zio->io_bookmark.zb_objset;
 +	uint64_t lsize = BP_GET_LSIZE(bp);
 +	dmu_object_type_t ot = BP_GET_TYPE(bp);
 +	uint8_t salt[ZIO_DATA_SALT_LEN];
 +	uint8_t iv[ZIO_DATA_IV_LEN];
 +	uint8_t mac[ZIO_DATA_MAC_LEN];
 +	boolean_t no_crypt = B_FALSE;
 +
*** 5391 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202103210125.12L1PaZZ046850>