Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 26 Aug 2004 12:34:53 -0500
From:      Dan Nelson <dnelson@allantgroup.com>
To:        Daniel Eriksson <daniel_k_eriksson@telia.com>
Cc:        'Robert Watson' <rwatson@freebsd.org>
Subject:   Re: PLEASE TEST: IPI deadlock avoidance patch
Message-ID:  <20040826173453.GF91848@dan.emsphone.com>
In-Reply-To: <!~!UENERkVCMDkAAQACAAAAAAAAAAAAAAAAABgAAAAAAAAA0VcX9IoJqUaXPS8MjT1PdsKAAAAQAAAAWLfpAGzkoka1Igv2noS3ugEAAAAA@telia.com>
References:  <Pine.NEB.3.96L.1040826114453.82006A-100000@fledge.watson.org> <!~!UENERkVCMDkAAQACAAAAAAAAAAAAAAAAABgAAAAAAAAA0VcX9IoJqUaXPS8MjT1PdsKAAAAQAAAAWLfpAGzkoka1Igv2noS3ugEAAAAA@telia.com>

Next in thread | Previous in thread | Raw E-Mail | Index | Archive | Help

--YiEDa0DAkWCtVeE4
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

In the last episode (Aug 26), Daniel Eriksson said:
> Robert Watson wrote:
> > Could you be more specific about "one way" or "another"?  
>
> Not at this point. The machine is on the other side of town and needs
> to be online for the next couple of days.
> 
> The corrupted files are large (10-400MB) binary files, and the
> content of the files makes it hard to pinpoint exactly what is
> corrupted.
> 
> I will see what I can do about providing more information, but I
> cannot make any promises right now.

I used the following two programs to help diagnose some file corruption
problems in Linux's XFS and NFS code.  Genoffsets creates a 1.5gb file
called "offsets" with the 4-byte integers 0, 4, 8, 16, etc. in
network-byte order (so each number "n" starts at file offset "n").  You
than copy that file (or a part of it) to a remote system and run
cmpoffsets on it to verify that the file copied ok.

Cmpoffsets just reads the file and checks that the contents are
correct.  For runs of data that are incorrect, it prints the start and
end offsets (and whether they're on power-of-2 boundaries).  If the
corrupted data is zeros, it prints that.  Otherwise it assumes the data
was copied from another location and prints the start and end offsets
of the copied data (and whether they're on power-of-2 boundaries).

Run the copy and verify in a loop that exits if cmpoffsets ever returns
a nonzero result.

-- 
	Dan Nelson
	dnelson@allantgroup.com

--YiEDa0DAkWCtVeE4
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="genoffsets.c"

#if 0 /* Magic self-compiling C source code.  Run "sh cmpoffsets.c"   -dnelson
set -ex
gcc -g -Wall -O2 $0 -o genoffsets
exit 0
*/
#endif

#include <stdio.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
	unsigned int i;
	FILE *out;

	out = fopen("offsets", "wb");

	for (i = 0; i < 1536 * 1024 * 1024; i += 4)
	{
		unsigned int o;

		o = htonl(i);
		fwrite(&o, sizeof(int), 1, out);
	}
	fclose(out);
	return 0;
}

--YiEDa0DAkWCtVeE4
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="cmpoffsets.c"

#if 0 /* Magic self-compiling C source code.  Run "sh cmpoffsets.c"   -dnelson
set -ex
gcc -g -Wall -O2 $0 -o cmpoffsets
exit 0
*/
#endif

#include <stdio.h>
#include <stdlib.h>
#include <sys/param.h>
#include <netinet/in.h>

unsigned int boundarycheck(int number);

int main(int argc, char **argv)
{
	unsigned int i;
	FILE *in;
	int tosmall = 0;
	int firstbad = -1, lastbad = -1;
	int firstbadoff = 0, lastbadoff = 0;
	int failed = 0;

	if (argc != 2)
	{
		printf("Usage: cmpoffsets file.copy\n");
		exit(1);
	}
	in = fopen(argv[1], "rb");
	if (!in)
	{
		perror("cannot open file");
		exit(1);
	}
	setvbuf(in, malloc(65536), _IOFBF, 65536);

	for (i = 0; i < 1536 * 1024 * 1024 && !tosmall; i += 4)
	{
		unsigned int o;

		if (fread(&o, 4, 1, in) != 1)
		{
			o = htonl(i);
			tosmall = 1;
		}
		o = ntohl(o);
		if (o != i)
		{
			failed = 1;
			if (lastbad != i - 4)
			{
				firstbad = i;
				firstbadoff = o;
			}
			lastbad = i;
			lastbadoff = o;
		} else
		{
			if (lastbad == i - 4)
			{
				if (firstbad == lastbad)
				{	/* range of one */
					printf("%u (4) (copied data from %u)\n", firstbad, firstbadoff);
				} else if (firstbadoff == 0 && lastbadoff == 0)
				{	/* run of nulls */
					printf("%u-%u (%u) are zero\n", firstbad, lastbad + 3, lastbad + 4 - firstbad);
				} else
				{	/* range > 1 */
					printf("%u-%u (%u) (copied data from %u-%u)\n", firstbad, lastbad + 3,
					       lastbad + 4 - firstbad,
					       firstbadoff, lastbadoff + 3);
					if (boundarycheck(firstbad) >= 1024)
						printf(" starts at a %u-byte block\n", boundarycheck(firstbad));
					if (boundarycheck(lastbad + 4) >= 1024)
						printf(" ends at a %u-byte block\n", boundarycheck(lastbadoff + 4));
					if (boundarycheck(firstbadoff) >= 1024)
						printf(" copied data starts at a %u-byte block\n", boundarycheck(firstbadoff));
					if (boundarycheck(lastbadoff + 4) >= 1024)
						printf(" copied data ends at a %u-byte block\n", boundarycheck(lastbadoff + 4));
				}

				firstbad = lastbad = -1;
			}
		}
	}
	if (tosmall)
	{
		printf("file is truncated\n");
		failed = 1;
	}
	return failed;
}

/* if this number is on a block boundary less than 1mb, find it */
unsigned int boundarycheck(int number)
{
	long long block = 1;
	while (block < number && block < 1048576)
	{
		block *= 2;
		if (number / block * block != number)
			break;
	}
	return block;
}

--YiEDa0DAkWCtVeE4--



Want to link to this message? Use this URL: <http://docs.FreeBSD.org/cgi/mid.cgi?20040826173453.GF91848>