Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Aug 2015 19:32:31 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r286586 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zstreamdump vendor/illumos/dist/lib/libzfs/common vendor/illumo...
Message-ID:  <201508101932.t7AJWV9a098548@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Mon Aug 10 19:32:30 2015
New Revision: 286586
URL: https://svnweb.freebsd.org/changeset/base/286586

Log:
  5746 more checksumming in zfs send
  
  Reviewed by: Christopher Siden <christopher.siden@delphix.com>
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
  Approved by: Albert Lee <trisk@omniti.com>
  Author: Matthew Ahrens <mahrens@delphix.com>
  
  illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1

Modified:
  vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
  vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c
  vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c

Changes in other areas also in this revision:
Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio_checksum.h

Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c	Mon Aug 10 18:27:54 2015	(r286585)
+++ vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c	Mon Aug 10 19:32:30 2015	(r286586)
@@ -25,7 +25,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #include <ctype.h>
@@ -34,6 +34,7 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <stddef.h>
 
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
@@ -83,7 +84,6 @@ safe_malloc(size_t size)
  *
  * Read while computing incremental checksum
  */
-
 static size_t
 ssread(void *buf, size_t len, zio_cksum_t *cksum)
 {
@@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_
 	if ((outlen = fread(buf, len, 1, send_stream)) == 0)
 		return (0);
 
-	if (do_cksum && cksum) {
+	if (do_cksum) {
 		if (do_byteswap)
 			fletcher_4_incremental_byteswap(buf, len, cksum);
 		else
@@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_
 	return (outlen);
 }
 
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	zio_cksum_t saved_cksum = *cksum;
+	r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+	    !ZIO_CHECKSUM_EQUAL(saved_cksum,
+	    drr->drr_u.drr_checksum.drr_checksum)) {
+		fprintf(stderr, "invalid checksum\n");
+		(void) printf("Incorrect checksum in record header.\n");
+		(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+		    saved_cksum.zc_word[0],
+		    saved_cksum.zc_word[1],
+		    saved_cksum.zc_word[2],
+		    saved_cksum.zc_word[3]);
+		exit(1);
+	}
+	return (sizeof (*drr));
+}
+
 /*
  * Print part of a block in ASCII characters
  */
@@ -183,8 +211,10 @@ main(int argc, char *argv[])
 	struct drr_free *drrf = &thedrr.drr_u.drr_free;
 	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+	struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
 	char c;
 	boolean_t verbose = B_FALSE;
+	boolean_t very_verbose = B_FALSE;
 	boolean_t first = B_TRUE;
 	/*
 	 * dump flag controls whether the contents of any modified data blocks
@@ -202,11 +232,14 @@ main(int argc, char *argv[])
 			do_cksum = B_FALSE;
 			break;
 		case 'v':
+			if (verbose)
+				very_verbose = B_TRUE;
 			verbose = B_TRUE;
 			break;
 		case 'd':
 			dump = B_TRUE;
 			verbose = B_TRUE;
+			very_verbose = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr,
@@ -230,7 +263,7 @@ main(int argc, char *argv[])
 
 	send_stream = stdin;
 	pcksum = zc;
-	while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+	while (read_hdr(drr, &zc)) {
 
 		/*
 		 * If this is the first DMU record being processed, check for
@@ -432,7 +465,7 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE object = %llu type = %u "
 				    "checksum type = %u\n"
-				    "offset = %llu length = %llu "
+				    "    offset = %llu length = %llu "
 				    "props = %llx\n",
 				    (u_longlong_t)drrw->drr_object,
 				    drrw->drr_type,
@@ -476,9 +509,9 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE_BYREF object = %llu "
 				    "checksum type = %u props = %llx\n"
-				    "offset = %llu length = %llu\n"
+				    "    offset = %llu length = %llu\n"
 				    "toguid = %llx refguid = %llx\n"
-				    "refobject = %llu refoffset = %llu\n",
+				    "    refobject = %llu refoffset = %llu\n",
 				    (u_longlong_t)drrwbr->drr_object,
 				    drrwbr->drr_checksumtype,
 				    (u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -538,7 +571,7 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE_EMBEDDED object = %llu "
 				    "offset = %llu length = %llu\n"
-				    "toguid = %llx comp = %u etype = %u "
+				    "    toguid = %llx comp = %u etype = %u "
 				    "lsize = %u psize = %u\n",
 				    (u_longlong_t)drrwe->drr_object,
 				    (u_longlong_t)drrwe->drr_offset,
@@ -553,6 +586,13 @@ main(int argc, char *argv[])
 			    P2ROUNDUP(drrwe->drr_psize, 8), &zc);
 			break;
 		}
+		if (drr->drr_type != DRR_BEGIN && very_verbose) {
+			(void) printf("    checksum = %llx/%llx/%llx/%llx\n",
+			    (longlong_t)drrc->drr_checksum.zc_word[0],
+			    (longlong_t)drrc->drr_checksum.zc_word[1],
+			    (longlong_t)drrc->drr_checksum.zc_word[2],
+			    (longlong_t)drrc->drr_checksum.zc_word[3]);
+		}
 		pcksum = zc;
 	}
 	free(buf);

Modified: vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 10 18:27:54 2015	(r286585)
+++ vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 10 19:32:30 2015	(r286586)
@@ -179,10 +179,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t
 }
 
 static int
-cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+    zio_cksum_t *zc, int outfd)
 {
-	fletcher_4_incremental_native(buf, len, zc);
-	return (write(outfd, buf, len));
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	fletcher_4_incremental_native(drr,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+	if (drr->drr_type != DRR_BEGIN) {
+		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+		    drr_checksum.drr_checksum));
+		drr->drr_u.drr_checksum.drr_checksum = *zc;
+	}
+	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), zc);
+	if (write(outfd, drr, sizeof (*drr)) == -1)
+		return (errno);
+	if (payload_len != 0) {
+		fletcher_4_incremental_native(payload, payload_len, zc);
+		if (write(outfd, payload, payload_len) == -1)
+			return (errno);
+	}
+	return (0);
 }
 
 /*
@@ -209,26 +227,18 @@ cksummer(void *arg)
 	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
 	dmu_replay_record_t thedrr;
 	dmu_replay_record_t *drr = &thedrr;
-	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
-	struct drr_end *drre = &thedrr.drr_u.drr_end;
-	struct drr_object *drro = &thedrr.drr_u.drr_object;
-	struct drr_write *drrw = &thedrr.drr_u.drr_write;
-	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
-	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
 	FILE *ofp;
 	int outfd;
-	dmu_replay_record_t wbr_drr = {0};
-	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
 	dedup_table_t ddt;
 	zio_cksum_t stream_cksum;
 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
 	uint64_t numbuckets;
 
 	ddt.max_ddt_size =
-	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
-	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
+	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
 
-	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
 
 	/*
 	 * numbuckets must be a power of 2.  Increase number to
@@ -244,32 +254,29 @@ cksummer(void *arg)
 	ddt.numhashbits = high_order_bit(numbuckets) - 1;
 	ddt.ddt_full = B_FALSE;
 
-	/* Initialize the write-by-reference block. */
-	wbr_drr.drr_type = DRR_WRITE_BYREF;
-	wbr_drr.drr_payloadlen = 0;
-
 	outfd = dda->outputfd;
 	ofp = fdopen(dda->inputfd, "r");
-	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+	while (ssread(drr, sizeof (*drr), ofp) != 0) {
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 		{
-			int	fflags;
+			struct drr_begin *drrb = &drr->drr_u.drr_begin;
+			int fflags;
+			int sz = 0;
 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 
+			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
 			/* set the DEDUP feature flag for this stream */
 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 			    DMU_BACKUP_FEATURE_DEDUPPROPS);
 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
 			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
-				int sz = drr->drr_payloadlen;
+				sz = drr->drr_payloadlen;
 
 				if (sz > SPA_MAXBLOCKSIZE) {
 					buf = zfs_realloc(dda->dedup_hdl, buf,
@@ -278,64 +285,60 @@ cksummer(void *arg)
 				(void) ssread(buf, sz, ofp);
 				if (ferror(stdin))
 					perror("fread");
-				if (cksum_and_write(buf, sz, &stream_cksum,
-				    outfd) == -1)
-					goto out;
 			}
+			if (dump_record(drr, buf, sz, &stream_cksum,
+			    outfd) != 0)
+				goto out;
 			break;
 		}
 
 		case DRR_END:
 		{
+			struct drr_end *drre = &drr->drr_u.drr_end;
 			/* use the recalculated checksum */
-			ZIO_SET_CHECKSUM(&drre->drr_checksum,
-			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
-			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
-			if ((write(outfd, drr,
-			    sizeof (dmu_replay_record_t))) == -1)
+			drre->drr_checksum = stream_cksum;
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_OBJECT:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_object *drro = &drr->drr_u.drr_object;
 			if (drro->drr_bonuslen > 0) {
 				(void) ssread(buf,
 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 				    ofp);
-				if (cksum_and_write(buf,
-				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
-				    &stream_cksum, outfd) == -1)
-					goto out;
 			}
+			if (dump_record(drr, buf,
+			    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+			    &stream_cksum, outfd) != 0)
+				goto out;
 			break;
 		}
 
 		case DRR_SPILL:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_spill *drrs = &drr->drr_u.drr_spill;
 			(void) ssread(buf, drrs->drr_length, ofp);
-			if (cksum_and_write(buf, drrs->drr_length,
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, buf, drrs->drr_length,
+			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREEOBJECTS:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_WRITE:
 		{
+			struct drr_write *drrw = &drr->drr_u.drr_write;
 			dataref_t	dataref;
 
 			(void) ssread(buf, drrw->drr_length, ofp);
@@ -373,7 +376,13 @@ cksummer(void *arg)
 			if (ddt_update(dda->dedup_hdl, &ddt,
 			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
 			    &dataref)) {
+				dmu_replay_record_t wbr_drr = {0};
+				struct drr_write_byref *wbr_drrr =
+				    &wbr_drr.drr_u.drr_write_byref;
+
 				/* block already present in stream */
+				wbr_drr.drr_type = DRR_WRITE_BYREF;
+
 				wbr_drrr->drr_object = drrw->drr_object;
 				wbr_drrr->drr_offset = drrw->drr_offset;
 				wbr_drrr->drr_length = drrw->drr_length;
@@ -393,19 +402,13 @@ cksummer(void *arg)
 				wbr_drrr->drr_key.ddk_prop =
 				    drrw->drr_key.ddk_prop;
 
-				if (cksum_and_write(&wbr_drr,
-				    sizeof (dmu_replay_record_t), &stream_cksum,
-				    outfd) == -1)
+				if (dump_record(&wbr_drr, NULL, 0,
+				    &stream_cksum, outfd) != 0)
 					goto out;
 			} else {
 				/* block not previously seen */
-				if (cksum_and_write(drr,
-				    sizeof (dmu_replay_record_t), &stream_cksum,
-				    outfd) == -1)
-					goto out;
-				if (cksum_and_write(buf,
-				    drrw->drr_length,
-				    &stream_cksum, outfd) == -1)
+				if (dump_record(drr, buf, drrw->drr_length,
+				    &stream_cksum, outfd) != 0)
 					goto out;
 			}
 			break;
@@ -413,28 +416,27 @@ cksummer(void *arg)
 
 		case DRR_WRITE_EMBEDDED:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_write_embedded *drrwe =
+			    &drr->drr_u.drr_write_embedded;
 			(void) ssread(buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
-			if (cksum_and_write(buf,
+			if (dump_record(drr, buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
-			    &stream_cksum, outfd) == -1)
+			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREE:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		default:
-			(void) printf("INVALID record type 0x%x\n",
+			(void) fprintf(stderr, "INVALID record type 0x%x\n",
 			    drr->drr_type);
 			/* should never happen, so assert */
 			assert(B_FALSE);
@@ -1460,18 +1462,11 @@ zfs_send(zfs_handle_t *zhp, const char *
 			    sizeof (drr.drr_u.drr_begin.drr_toname),
 			    "%s@%s", zhp->zfs_name, tosnap);
 			drr.drr_payloadlen = buflen;
-			err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
 
-			/* write header nvlist */
-			if (err != -1 && packbuf != NULL) {
-				err = cksum_and_write(packbuf, buflen, &zc,
-				    outfd);
-			}
+			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
 			free(packbuf);
-			if (err == -1) {
-				err = errno;
+			if (err != 0)
 				goto stderr_out;
-			}
 
 			/* write end record */
 			bzero(&drr, sizeof (drr));
@@ -1702,6 +1697,8 @@ recv_read(libzfs_handle_t *hdl, int fd, 
 	int rv;
 	int len = ilen;
 
+	assert(ilen <= SPA_MAXBLOCKSIZE);
+
 	do {
 		rv = read(fd, cp, len);
 		cp += rv;

Modified: vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c
==============================================================================
--- vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c	Mon Aug 10 18:27:54 2015	(r286585)
+++ vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c	Mon Aug 10 19:32:30 2015	(r286586)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
@@ -485,18 +485,30 @@ lzc_send(const char *snapname, const cha
 }
 
 /*
- * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ * "from" can be NULL, a snapshot, or a bookmark.
+ *
+ * If from is NULL, a full (non-incremental) stream will be estimated.  This
+ * is calculated very efficiently.
+ *
+ * If from is a snapshot, lzc_send_space uses the deadlists attached to
+ * each snapshot to efficiently estimate the stream size.
+ *
+ * If from is a bookmark, the indirect blocks in the destination snapshot
+ * are traversed, looking for blocks with a birth time since the creation TXG of
+ * the snapshot this bookmark was created from.  This will result in
+ * significantly more I/O and be less efficient than a send space estimation on
+ * an equivalent snapshot.
  */
 int
-lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
 {
 	nvlist_t *args;
 	nvlist_t *result;
 	int err;
 
 	args = fnvlist_alloc();
-	if (fromsnap != NULL)
-		fnvlist_add_string(args, "fromsnap", fromsnap);
+	if (from != NULL)
+		fnvlist_add_string(args, "from", from);
 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 	nvlist_free(args);
 	if (err == 0)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201508101932.t7AJWV9a098548>