From owner-freebsd-current@FreeBSD.ORG Thu Aug 26 17:34:54 2004 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id A69B016A4CE; Thu, 26 Aug 2004 17:34:54 +0000 (GMT) Received: from dan.emsphone.com (dan.emsphone.com [199.67.51.101]) by mx1.FreeBSD.org (Postfix) with ESMTP id 5748043D54; Thu, 26 Aug 2004 17:34:54 +0000 (GMT) (envelope-from dan@dan.emsphone.com) Received: (from dan@localhost) by dan.emsphone.com (8.12.11/8.12.11) id i7QHYrIP035193; Thu, 26 Aug 2004 12:34:53 -0500 (CDT) (envelope-from dan) Date: Thu, 26 Aug 2004 12:34:53 -0500 From: Dan Nelson To: Daniel Eriksson Message-ID: <20040826173453.GF91848@dan.emsphone.com> References: Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="YiEDa0DAkWCtVeE4" Content-Disposition: inline In-Reply-To: X-OS: FreeBSD 5.3-BETA1 X-message-flag: Outlook Error User-Agent: Mutt/1.5.6i cc: freebsd-current@freebsd.org cc: 'Robert Watson' Subject: Re: PLEASE TEST: IPI deadlock avoidance patch X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 26 Aug 2004 17:34:54 -0000 --YiEDa0DAkWCtVeE4 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In the last episode (Aug 26), Daniel Eriksson said: > Robert Watson wrote: > > Could you be more specific about "one way" or "another"? > > Not at this point. The machine is on the other side of town and needs > to be online for the next couple of days. > > The corrupted files are large (10-400MB) binary files, and the > content of the files makes it hard to pinpoint exactly what is > corrupted. > > I will see what I can do about providing more information, but I > cannot make any promises right now. I used the following two programs to help diagnose some file corruption problems in Linux's XFS and NFS code. Genoffsets creates a 1.5gb file called "offsets" with the 4-byte integers 0, 4, 8, 16, etc. in network-byte order (so each number "n" starts at file offset "n"). You than copy that file (or a part of it) to a remote system and run cmpoffsets on it to verify that the file copied ok. Cmpoffsets just reads the file and checks that the contents are correct. For runs of data that are incorrect, it prints the start and end offsets (and whether they're on power-of-2 boundaries). If the corrupted data is zeros, it prints that. Otherwise it assumes the data was copied from another location and prints the start and end offsets of the copied data (and whether they're on power-of-2 boundaries). Run the copy and verify in a loop that exits if cmpoffsets ever returns a nonzero result. -- Dan Nelson dnelson@allantgroup.com --YiEDa0DAkWCtVeE4 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="genoffsets.c" #if 0 /* Magic self-compiling C source code. Run "sh cmpoffsets.c" -dnelson set -ex gcc -g -Wall -O2 $0 -o genoffsets exit 0 */ #endif #include #include #include #include int main(int argc, char **argv) { unsigned int i; FILE *out; out = fopen("offsets", "wb"); for (i = 0; i < 1536 * 1024 * 1024; i += 4) { unsigned int o; o = htonl(i); fwrite(&o, sizeof(int), 1, out); } fclose(out); return 0; } --YiEDa0DAkWCtVeE4 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="cmpoffsets.c" #if 0 /* Magic self-compiling C source code. Run "sh cmpoffsets.c" -dnelson set -ex gcc -g -Wall -O2 $0 -o cmpoffsets exit 0 */ #endif #include #include #include #include unsigned int boundarycheck(int number); int main(int argc, char **argv) { unsigned int i; FILE *in; int tosmall = 0; int firstbad = -1, lastbad = -1; int firstbadoff = 0, lastbadoff = 0; int failed = 0; if (argc != 2) { printf("Usage: cmpoffsets file.copy\n"); exit(1); } in = fopen(argv[1], "rb"); if (!in) { perror("cannot open file"); exit(1); } setvbuf(in, malloc(65536), _IOFBF, 65536); for (i = 0; i < 1536 * 1024 * 1024 && !tosmall; i += 4) { unsigned int o; if (fread(&o, 4, 1, in) != 1) { o = htonl(i); tosmall = 1; } o = ntohl(o); if (o != i) { failed = 1; if (lastbad != i - 4) { firstbad = i; firstbadoff = o; } lastbad = i; lastbadoff = o; } else { if (lastbad == i - 4) { if (firstbad == lastbad) { /* range of one */ printf("%u (4) (copied data from %u)\n", firstbad, firstbadoff); } else if (firstbadoff == 0 && lastbadoff == 0) { /* run of nulls */ printf("%u-%u (%u) are zero\n", firstbad, lastbad + 3, lastbad + 4 - firstbad); } else { /* range > 1 */ printf("%u-%u (%u) (copied data from %u-%u)\n", firstbad, lastbad + 3, lastbad + 4 - firstbad, firstbadoff, lastbadoff + 3); if (boundarycheck(firstbad) >= 1024) printf(" starts at a %u-byte block\n", boundarycheck(firstbad)); if (boundarycheck(lastbad + 4) >= 1024) printf(" ends at a %u-byte block\n", boundarycheck(lastbadoff + 4)); if (boundarycheck(firstbadoff) >= 1024) printf(" copied data starts at a %u-byte block\n", boundarycheck(firstbadoff)); if (boundarycheck(lastbadoff + 4) >= 1024) printf(" copied data ends at a %u-byte block\n", boundarycheck(lastbadoff + 4)); } firstbad = lastbad = -1; } } } if (tosmall) { printf("file is truncated\n"); failed = 1; } return failed; } /* if this number is on a block boundary less than 1mb, find it */ unsigned int boundarycheck(int number) { long long block = 1; while (block < number && block < 1048576) { block *= 2; if (number / block * block != number) break; } return block; } --YiEDa0DAkWCtVeE4--