From owner-svn-src-stable@freebsd.org Sat Oct 3 07:33:29 2015 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 0A591A0EA21; Sat, 3 Oct 2015 07:33:29 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id EC1741E77; Sat, 3 Oct 2015 07:33:28 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.70]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id t937XS7D068049; Sat, 3 Oct 2015 07:33:28 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id t937XSUb068044; Sat, 3 Oct 2015 07:33:28 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201510030733.t937XSUb068044@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Sat, 3 Oct 2015 07:33:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r288553 - in stable/10: cddl/contrib/opensolaris/cmd/zstreamdump cddl/contrib/opensolaris/lib/libzfs/common sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/u... X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 03 Oct 2015 07:33:29 -0000 Author: mav Date: Sat Oct 3 07:33:27 2015 New Revision: 288553 URL: https://svnweb.freebsd.org/changeset/base/288553 Log: MFC r286587: 5746 more checksumming in zfs send Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Bayard Bell Approved by: Albert Lee Author: Matthew Ahrens illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1 Modified: stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h Directory Properties: stable/10/ (props changed) Modified: stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Sat Oct 3 07:32:34 2015 (r288552) +++ stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Sat Oct 3 07:33:27 2015 (r288553) @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ #include @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -83,7 +84,6 @@ safe_malloc(size_t size) * * Read while computing incremental checksum */ - static size_t ssread(void *buf, size_t len, zio_cksum_t *cksum) { @@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_ if ((outlen = fread(buf, len, 1, send_stream)) == 0) return (0); - if (do_cksum && cksum) { + if (do_cksum) { if (do_byteswap) fletcher_4_incremental_byteswap(buf, len, cksum); else @@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_ return (outlen); } +static size_t +read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + zio_cksum_t saved_cksum = *cksum; + r = ssread(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && + !ZIO_CHECKSUM_EQUAL(saved_cksum, + drr->drr_u.drr_checksum.drr_checksum)) { + fprintf(stderr, "invalid checksum\n"); + (void) printf("Incorrect checksum in record header.\n"); + (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", + saved_cksum.zc_word[0], + saved_cksum.zc_word[1], + saved_cksum.zc_word[2], + saved_cksum.zc_word[3]); + exit(1); + } + return (sizeof (*drr)); +} + /* * Print part of a block in ASCII characters */ @@ -183,8 +211,10 @@ main(int argc, char *argv[]) struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; char c; boolean_t verbose = B_FALSE; + boolean_t very_verbose = B_FALSE; boolean_t first = B_TRUE; /* * dump flag controls whether the contents of any modified data blocks @@ -202,11 +232,14 @@ main(int argc, char *argv[]) do_cksum = B_FALSE; break; case 'v': + if (verbose) + very_verbose = B_TRUE; verbose = B_TRUE; break; case 'd': dump = B_TRUE; verbose = B_TRUE; + very_verbose = B_TRUE; break; case ':': (void) fprintf(stderr, @@ -230,7 +263,7 @@ main(int argc, char *argv[]) send_stream = stdin; pcksum = zc; - while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) { + while (read_hdr(drr, &zc)) { /* * If this is the first DMU record being processed, check for @@ -432,7 +465,7 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE object = %llu type = %u " "checksum type = %u\n" - "offset = %llu length = %llu " + " offset = %llu length = %llu " "props = %llx\n", (u_longlong_t)drrw->drr_object, drrw->drr_type, @@ -476,9 +509,9 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE_BYREF object = %llu " "checksum type = %u props = %llx\n" - "offset = %llu length = %llu\n" + " offset = %llu length = %llu\n" "toguid = %llx refguid = %llx\n" - "refobject = %llu refoffset = %llu\n", + " refobject = %llu refoffset = %llu\n", (u_longlong_t)drrwbr->drr_object, drrwbr->drr_checksumtype, (u_longlong_t)drrwbr->drr_key.ddk_prop, @@ -538,7 +571,7 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE_EMBEDDED object = %llu " "offset = %llu length = %llu\n" - "toguid = %llx comp = %u etype = %u " + " toguid = %llx comp = %u etype = %u " "lsize = %u psize = %u\n", (u_longlong_t)drrwe->drr_object, (u_longlong_t)drrwe->drr_offset, @@ -553,6 +586,13 @@ main(int argc, char *argv[]) P2ROUNDUP(drrwe->drr_psize, 8), &zc); break; } + if (drr->drr_type != DRR_BEGIN && very_verbose) { + (void) printf(" checksum = %llx/%llx/%llx/%llx\n", + (longlong_t)drrc->drr_checksum.zc_word[0], + (longlong_t)drrc->drr_checksum.zc_word[1], + (longlong_t)drrc->drr_checksum.zc_word[2], + (longlong_t)drrc->drr_checksum.zc_word[3]); + } pcksum = zc; } free(buf); Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Sat Oct 3 07:32:34 2015 (r288552) +++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Sat Oct 3 07:33:27 2015 (r288553) @@ -188,10 +188,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t } static int -cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) { - fletcher_4_incremental_native(buf, len, zc); - return (write(outfd, buf, len)); + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); } /* @@ -218,26 +236,18 @@ cksummer(void *arg) char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; - struct drr_begin *drrb = &thedrr.drr_u.drr_begin; - struct drr_end *drre = &thedrr.drr_u.drr_end; - struct drr_object *drro = &thedrr.drr_u.drr_object; - struct drr_write *drrw = &thedrr.drr_u.drr_write; - struct drr_spill *drrs = &thedrr.drr_u.drr_spill; - struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; FILE *ofp; int outfd; - dmu_replay_record_t wbr_drr = {0}; - struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; dedup_table_t ddt; zio_cksum_t stream_cksum; uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); uint64_t numbuckets; ddt.max_ddt_size = - MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, - SMALLEST_POSSIBLE_MAX_DDT_MB<<20); + MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100, + SMALLEST_POSSIBLE_MAX_DDT_MB << 20); - numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); + numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t)); /* * numbuckets must be a power of 2. Increase number to @@ -253,32 +263,29 @@ cksummer(void *arg) ddt.numhashbits = high_order_bit(numbuckets) - 1; ddt.ddt_full = B_FALSE; - /* Initialize the write-by-reference block. */ - wbr_drr.drr_type = DRR_WRITE_BYREF; - wbr_drr.drr_payloadlen = 0; - outfd = dda->outputfd; ofp = fdopen(dda->inputfd, "r"); - while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { + while (ssread(drr, sizeof (*drr), ofp) != 0) { switch (drr->drr_type) { case DRR_BEGIN: { - int fflags; + struct drr_begin *drrb = &drr->drr_u.drr_begin; + int fflags; + int sz = 0; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + /* set the DEDUP feature flag for this stream */ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); fflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { - int sz = drr->drr_payloadlen; + sz = drr->drr_payloadlen; if (sz > SPA_MAXBLOCKSIZE) { buf = zfs_realloc(dda->dedup_hdl, buf, @@ -287,64 +294,60 @@ cksummer(void *arg) (void) ssread(buf, sz, ofp); if (ferror(stdin)) perror("fread"); - if (cksum_and_write(buf, sz, &stream_cksum, - outfd) == -1) - goto out; } + if (dump_record(drr, buf, sz, &stream_cksum, + outfd) != 0) + goto out; break; } case DRR_END: { + struct drr_end *drre = &drr->drr_u.drr_end; /* use the recalculated checksum */ - ZIO_SET_CHECKSUM(&drre->drr_checksum, - stream_cksum.zc_word[0], stream_cksum.zc_word[1], - stream_cksum.zc_word[2], stream_cksum.zc_word[3]); - if ((write(outfd, drr, - sizeof (dmu_replay_record_t))) == -1) + drre->drr_checksum = stream_cksum; + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } case DRR_OBJECT: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { (void) ssread(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), ofp); - if (cksum_and_write(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - &stream_cksum, outfd) == -1) - goto out; } + if (dump_record(drr, buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + &stream_cksum, outfd) != 0) + goto out; break; } case DRR_SPILL: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_spill *drrs = &drr->drr_u.drr_spill; (void) ssread(buf, drrs->drr_length, ofp); - if (cksum_and_write(buf, drrs->drr_length, - &stream_cksum, outfd) == -1) + if (dump_record(drr, buf, drrs->drr_length, + &stream_cksum, outfd) != 0) goto out; break; } case DRR_FREEOBJECTS: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } case DRR_WRITE: { + struct drr_write *drrw = &drr->drr_u.drr_write; dataref_t dataref; (void) ssread(buf, drrw->drr_length, ofp); @@ -382,7 +385,13 @@ cksummer(void *arg) if (ddt_update(dda->dedup_hdl, &ddt, &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, &dataref)) { + dmu_replay_record_t wbr_drr = {0}; + struct drr_write_byref *wbr_drrr = + &wbr_drr.drr_u.drr_write_byref; + /* block already present in stream */ + wbr_drr.drr_type = DRR_WRITE_BYREF; + wbr_drrr->drr_object = drrw->drr_object; wbr_drrr->drr_offset = drrw->drr_offset; wbr_drrr->drr_length = drrw->drr_length; @@ -402,19 +411,13 @@ cksummer(void *arg) wbr_drrr->drr_key.ddk_prop = drrw->drr_key.ddk_prop; - if (cksum_and_write(&wbr_drr, - sizeof (dmu_replay_record_t), &stream_cksum, - outfd) == -1) + if (dump_record(&wbr_drr, NULL, 0, + &stream_cksum, outfd) != 0) goto out; } else { /* block not previously seen */ - if (cksum_and_write(drr, - sizeof (dmu_replay_record_t), &stream_cksum, - outfd) == -1) - goto out; - if (cksum_and_write(buf, - drrw->drr_length, - &stream_cksum, outfd) == -1) + if (dump_record(drr, buf, drrw->drr_length, + &stream_cksum, outfd) != 0) goto out; } break; @@ -422,28 +425,27 @@ cksummer(void *arg) case DRR_WRITE_EMBEDDED: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; (void) ssread(buf, P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp); - if (cksum_and_write(buf, + if (dump_record(drr, buf, P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), - &stream_cksum, outfd) == -1) + &stream_cksum, outfd) != 0) goto out; break; } case DRR_FREE: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } default: - (void) printf("INVALID record type 0x%x\n", + (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); /* should never happen, so assert */ assert(B_FALSE); @@ -1470,18 +1472,11 @@ zfs_send(zfs_handle_t *zhp, const char * sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", zhp->zfs_name, tosnap); drr.drr_payloadlen = buflen; - err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); - /* write header nvlist */ - if (err != -1 && packbuf != NULL) { - err = cksum_and_write(packbuf, buflen, &zc, - outfd); - } + err = dump_record(&drr, packbuf, buflen, &zc, outfd); free(packbuf); - if (err == -1) { - err = errno; + if (err != 0) goto stderr_out; - } /* write end record */ bzero(&drr, sizeof (drr)); @@ -1714,6 +1709,8 @@ recv_read(libzfs_handle_t *hdl, int fd, int rv; int len = ilen; + assert(ilen <= SPA_MAXBLOCKSIZE); + do { rv = read(fd, cp, len); cp += rv; Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 07:32:34 2015 (r288552) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 07:33:27 2015 (r288553) @@ -73,7 +73,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf struct iovec aiov; ASSERT0(len % 8); - fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); aiov.iov_base = buf; aiov.iov_len = len; auio.uio_iov = &aiov; @@ -99,6 +98,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf return (dsp->dsa_err); } +/* + * For all record types except BEGIN, fill in the checksum (overlaid in + * drr_u.drr_checksum.drr_checksum). The checksum verifies everything + * up to the start of the checksum itself. + */ +static int +dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(dsp->dsa_drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + &dsp->dsa_zc); + if (dsp->dsa_drr->drr_type != DRR_BEGIN) { + ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u. + drr_checksum.drr_checksum)); + dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc; + } + fletcher_4_incremental_native(&dsp->dsa_drr-> + drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), &dsp->dsa_zc); + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) + return (SET_ERROR(EINTR)); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, + &dsp->dsa_zc); + if (dump_bytes(dsp, payload, payload_len) != 0) + return (SET_ERROR(EINTR)); + } + return (0); +} + static int dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) @@ -143,8 +174,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o */ if (dsp->dsa_pending_op != PENDING_NONE && dsp->dsa_pending_op != PENDING_FREE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -167,8 +197,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o return (0); } else { /* not a continuation. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -181,8 +210,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o drrf->drr_length = length; drrf->drr_toguid = dsp->dsa_toguid; if (length == -1ULL) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); } else { dsp->dsa_pending_op = PENDING_FREE; @@ -214,12 +242,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec * of different types. */ if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } - /* write a DATA record */ + /* write a WRITE record */ bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); dsp->dsa_drr->drr_type = DRR_WRITE; drrw->drr_object = object; @@ -245,9 +272,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec drrw->drr_key.ddk_cksum = bp->blk_cksum; } - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (SET_ERROR(EINTR)); - if (dump_bytes(dsp, data, blksz) != 0) + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -261,8 +286,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, &(dsp->dsa_drr->drr_u.drr_write_embedded); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (EINTR); dsp->dsa_pending_op = PENDING_NONE; } @@ -282,9 +306,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, decode_embedded_bp_compressed(bp, buf); - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); - if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) + if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) return (EINTR); return (0); } @@ -295,8 +317,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -308,9 +329,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) - return (SET_ERROR(EINTR)); - if (dump_bytes(dsp, data, blksz)) + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -333,8 +352,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin */ if (dsp->dsa_pending_op != PENDING_NONE && dsp->dsa_pending_op != PENDING_FREEOBJECTS) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -348,8 +366,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin return (0); } else { /* can't be aggregated. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -376,8 +393,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t return (dump_freeobjects(dsp, object, 1)); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -398,11 +414,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (SET_ERROR(EINTR)); - - if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + if (dump_record(dsp, DN_BONUS(dnp), + P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) { return (SET_ERROR(EINTR)); + } /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * @@ -651,7 +666,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid; - ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_incremental = (fromzb != NULL); dsp->dsa_featureflags = featureflags; @@ -663,7 +677,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_long_hold(ds, FTAG); dsl_pool_rele(dp, tag); - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + if (dump_record(dsp, NULL, 0) != 0) { err = dsp->dsa_err; goto out; } @@ -672,7 +686,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, backup_cb, dsp); if (dsp->dsa_pending_op != PENDING_NONE) - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) err = SET_ERROR(EINTR); if (err != 0) { @@ -686,7 +700,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + if (dump_record(dsp, NULL, 0) != 0) { err = dsp->dsa_err; goto out; } @@ -1251,14 +1265,20 @@ dmu_recv_begin(char *tofs, char *tosnap, } struct restorearg { + objset_t *os; int err; boolean_t byteswap; kthread_t *td; struct file *fp; - char *buf; uint64_t voff; int bufsize; /* amount of memory allocated for buf */ + + dmu_replay_record_t *drr; + dmu_replay_record_t *next_drr; + char *buf; zio_cksum_t cksum; + zio_cksum_t prev_cksum; + avl_tree_t *guid_to_ds_map; }; @@ -1323,14 +1343,11 @@ restore_bytes(struct restorearg *ra, voi return (error); } -static void * -restore_read(struct restorearg *ra, int len, char *buf) +static int +restore_read(struct restorearg *ra, int len, void *buf) { int done = 0; - if (buf == NULL) - buf = ra->buf; - /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); ASSERT3U(len, <=, ra->bufsize); @@ -1346,24 +1363,21 @@ restore_read(struct restorearg *ra, int ra->voff += len - done - resid; done = len - resid; if (ra->err != 0) - return (NULL); + return (ra->err); } ASSERT3U(done, ==, len); - if (ra->byteswap) - fletcher_4_incremental_byteswap(buf, len, &ra->cksum); - else - fletcher_4_incremental_native(buf, len, &ra->cksum); - return (buf); + return (0); } static void -backup_byteswap(dmu_replay_record_t *drr) +byteswap_record(dmu_replay_record_t *drr) { #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) drr->drr_type = BSWAP_32(drr->drr_type); drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); + switch (drr->drr_type) { case DRR_BEGIN: DO64(drr_begin.drr_magic); @@ -1393,10 +1407,7 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_write.drr_offset); DO64(drr_write.drr_length); DO64(drr_write.drr_toguid); - DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum); DO64(drr_write.drr_key.ddk_prop); break; case DRR_WRITE_BYREF: @@ -1407,10 +1418,8 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_write_byref.drr_refguid); DO64(drr_write_byref.drr_refobject); DO64(drr_write_byref.drr_refoffset); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref. + drr_key.ddk_cksum); DO64(drr_write_byref.drr_key.ddk_prop); break; case DRR_WRITE_EMBEDDED: @@ -1433,13 +1442,15 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_spill.drr_toguid); break; case DRR_END: - DO64(drr_end.drr_checksum.zc_word[0]); - DO64(drr_end.drr_checksum.zc_word[1]); - DO64(drr_end.drr_checksum.zc_word[2]); - DO64(drr_end.drr_checksum.zc_word[3]); DO64(drr_end.drr_toguid); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum); break; } + + if (drr->drr_type != DRR_BEGIN) { + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum); + } + #undef DO64 #undef DO32 } @@ -1456,11 +1467,10 @@ deduce_nblkptr(dmu_object_type_t bonus_t } static int -restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) +restore_object(struct restorearg *ra, struct drr_object *drro, void *data) { dmu_object_info_t doi; dmu_tx_t *tx; - void *data = NULL; uint64_t object; int err; @@ -1471,23 +1481,17 @@ restore_object(struct restorearg *ra, ob drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || drro->drr_blksz < SPA_MINBLOCKSIZE || - drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) || + drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(ra->os)) || drro->drr_bonuslen > DN_MAX_BONUSLEN) { return (SET_ERROR(EINVAL)); } - err = dmu_object_info(os, drro->drr_object, &doi); + err = dmu_object_info(ra->os, drro->drr_object, &doi); if (err != 0 && err != ENOENT) return (SET_ERROR(EINVAL)); object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; - if (drro->drr_bonuslen) { - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); - if (ra->err != 0) - return (ra->err); - } - /* * If we are losing blkptrs or changing the block size this must * be a new file instance. We must clear out the previous file @@ -1501,14 +1505,14 @@ restore_object(struct restorearg *ra, ob if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr) { - err = dmu_free_long_range(os, drro->drr_object, + err = dmu_free_long_range(ra->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); } } - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_bonus(tx, object); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { @@ -1518,7 +1522,7 @@ restore_object(struct restorearg *ra, ob if (object == DMU_NEW_OBJECT) { /* currently free, want to be allocated */ - err = dmu_object_claim(os, drro->drr_object, + err = dmu_object_claim(ra->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); } else if (drro->drr_type != doi.doi_type || @@ -1526,7 +1530,7 @@ restore_object(struct restorearg *ra, ob drro->drr_bonustype != doi.doi_bonus_type || drro->drr_bonuslen != doi.doi_bonus_size) { /* currently allocated, but with different properties */ - err = dmu_object_reclaim(os, drro->drr_object, + err = dmu_object_reclaim(ra->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); } @@ -1535,14 +1539,15 @@ restore_object(struct restorearg *ra, ob return (SET_ERROR(EINVAL)); } - dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, - tx); - dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); + dmu_object_set_checksum(ra->os, drro->drr_object, + drro->drr_checksumtype, tx); + dmu_object_set_compress(ra->os, drro->drr_object, + drro->drr_compress, tx); if (data != NULL) { dmu_buf_t *db; - VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold(ra->os, drro->drr_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); @@ -1561,7 +1566,7 @@ restore_object(struct restorearg *ra, ob /* ARGSUSED */ static int -restore_freeobjects(struct restorearg *ra, objset_t *os, +restore_freeobjects(struct restorearg *ra, struct drr_freeobjects *drrfo) { uint64_t obj; @@ -1571,13 +1576,13 @@ restore_freeobjects(struct restorearg *r for (obj = drrfo->drr_firstobj; obj < drrfo->drr_firstobj + drrfo->drr_numobjs; - (void) dmu_object_next(os, &obj, FALSE, 0)) { + (void) dmu_object_next(ra->os, &obj, FALSE, 0)) { int err; - if (dmu_object_info(os, obj, NULL) != 0) + if (dmu_object_info(ra->os, obj, NULL) != 0) continue; - err = dmu_free_long_object(os, obj); + err = dmu_free_long_object(ra->os, obj); if (err != 0) return (err); } @@ -1585,49 +1590,37 @@ restore_freeobjects(struct restorearg *r } static int -restore_write(struct restorearg *ra, objset_t *os, - struct drr_write *drrw) +restore_write(struct restorearg *ra, struct drr_write *drrw, arc_buf_t *abuf) { dmu_tx_t *tx; - void *data; int err; if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); - if (dmu_object_info(os, drrw->drr_object, NULL) != 0) + if (dmu_object_info(ra->os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); - dmu_buf_t *bonus; - if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) - return (SET_ERROR(EINVAL)); - - arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); - - data = restore_read(ra, drrw->drr_length, abuf->b_data); - if (data == NULL) { - dmu_return_arcbuf(abuf); - dmu_buf_rele(bonus, FTAG); - return (ra->err); - } - - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { - dmu_return_arcbuf(abuf); - dmu_buf_rele(bonus, FTAG); dmu_tx_abort(tx); return (err); } if (ra->byteswap) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drrw->drr_type); - dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); + dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, + drrw->drr_length); } + + dmu_buf_t *bonus; + if (dmu_bonus_hold(ra->os, drrw->drr_object, FTAG, &bonus) != 0) + return (SET_ERROR(EINVAL)); dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); dmu_tx_commit(tx); dmu_buf_rele(bonus, FTAG); @@ -1642,8 +1635,7 @@ restore_write(struct restorearg *ra, obj * data from the stream to fulfill this write. */ static int -restore_write_byref(struct restorearg *ra, objset_t *os, - struct drr_write_byref *drrwbr) +restore_write_byref(struct restorearg *ra, struct drr_write_byref *drrwbr) { dmu_tx_t *tx; int err; @@ -1669,7 +1661,7 @@ restore_write_byref(struct restorearg *r if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) return (SET_ERROR(EINVAL)); } else { - ref_os = os; + ref_os = ra->os; } err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, @@ -1677,7 +1669,7 @@ restore_write_byref(struct restorearg *r if (err != 0) return (err); - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_write(tx, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length); @@ -1686,7 +1678,7 @@ restore_write_byref(struct restorearg *r dmu_tx_abort(tx); return (err); } - dmu_write(os, drrwbr->drr_object, + dmu_write(ra->os, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); dmu_buf_rele(dbp, FTAG); dmu_tx_commit(tx); @@ -1694,12 +1686,11 @@ restore_write_byref(struct restorearg *r } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***