FreeBSD Mail Archives

Date:      Wed, 24 Aug 2011 08:14:06 -0700
From:      perryh@pluto.rain.com
To:        vincepoy@gmail.com, freebsd@jdc.parodius.com
Cc:        freebsd-stable@freebsd.org, freebsd-ports@freebsd.org, utisoft@gmail.com
Subject:   ports/sysutils/diskcheckd (Re: bad sector in gmirror HDD)
Message-ID:  <4e55153e.Tj16zX3SskfuVesE%perryh@pluto.rain.com>
In-Reply-To: <4e5141dd.mmh6t9R/knnc8Jll%perryh@pluto.rain.com>
References:  <1B4FC0D8-60E6-49DA-BC52-688052C4DA51@langille.org> <20110819232125.GA4965@icarus.home.lan> <B6B0AD0F-A74C-4F2C-88B0-101443D7831A@langille.org> <20110820032438.GA21925@icarus.home.lan> <4774BC00-F32B-4BF4-A955-3728F885CAA1@langille.org> <4E4FF4D6.1090305@os2.kiev.ua> <20110820183456.GA38317@icarus.home.lan> <4e50c931.gCNlQFqn5sVQXXax%perryh@pluto.rain.com> <20110821050051.GA47415@icarus.home.lan> <4e5141dd.mmh6t9R/knnc8Jll%perryh@pluto.rain.com>

This is a multi-part message in MIME format.

--=_4e55153e.L1trZsjhfo2jeiv2tq66KJ/nqbPar7gh7CvMeIrYte3j0fod
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

> When the specified or calculated rate exceeds 64KB/sec, the
> required sleep interval between 64KB chunks is less than one
> second.  Since diskcheckd calculates the interval in whole seconds
> -- because it calls sleep() rather than usleep() or nanosleep()
> -- an interval of less than one second is calculated as zero ...
> I suspect the fix will be to calculate in microseconds, and call
> usleep() instead of sleep().

I think I may have this fixed.

Could one of you try the attached patch?  I'm especially interested
to see if this also clears up the issues reported as connected with
gmirror (http://www.freebsd.org/cgi/query-pr.cgi?pr=ports/143566),
since I haven't been able to reproduce that part here.

Summary of changes:

* Calculate delays in microseconds, so that delays of less than
  one second between reads (needed to implement rates exceeding
  64KB/sec) do not get rounded down to zero.

* Fix a reinitialization problem when handling SIGHUP.

* Additional debug messages (only with -d).

* Comment and manpage improvememts.

--=_4e55153e.L1trZsjhfo2jeiv2tq66KJ/nqbPar7gh7CvMeIrYte3j0fod
Content-Type: text/plain;
 charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="2011-0823.test.diff"

diff -ruN diskcheckd.old/files/diskcheckd.8 diskcheckd.new/files/diskcheckd.8
--- diskcheckd.old/files/diskcheckd.8	2001-08-28 22:16:22.000000000 -0700
+++ diskcheckd.new/files/diskcheckd.8	2011-08-24 07:09:16.000000000 -0700
@@ -58,15 +58,16 @@
 The second format consists of four white space separated
 fields,
 which are the full pathname of the disk device,
-the size of that disk,
-the frequency in days at which to check that disk,
+the size of this disk,
+the frequency in days at which to check this disk,
 and the rate in kilobytes per second at which to check this disk.
-Naturally,
-it would be contradictory to specify both the frequency and the rate,
-so only one of these should be specified.
-Additionally,
-the size of the disk should not be specified if the rate is specified,
-as this information is unnecessary.
+.Pp
+Either the frequency or the rate should be specified, not both,
+since a specified frequency will be internally converted to whatever
+rate will result in the disk being scanned at (approximately) that
+frequency.
+The size of the disk should not be specified if the rate is specified,
+since the size is used only to convert a specified frequency into a rate.
 .Pp
 If the disk is specified as
 .Dq * ,
@@ -89,7 +90,7 @@
 Note that
 .Nm
 always reads data from the disk in 64KB blocks,
-so the rate your specify may not be the exact rate at which the disk is
+so the rate you specify may not be the exact rate at which the disk is
 actually checked.
 Similarly,
 if you specify the third field (days for complete scan) it is unlikely
@@ -132,8 +133,14 @@
 If this flag is specified,
 .Nm
 will not fork into the background and detach from its controlling terminal
-to become a daemon.
-This flag is primarily used for debugging.
+to become a daemon,
+and it will duplicate its system log messages on its standard error output.
+.Pp
+This flag is primarily used for debugging,
+and may be specified more than once.
+Additional instances will result in additional
+debugging messages on standard error;
+these added messages will not be written to the system log.
 .It Fl f
 Specify the configuration file to use,
 instead of the default
@@ -143,6 +150,13 @@
 instead of the default
 .Pa /var/db/diskcheckd.offsets .
 .El
+.Sh PROGRESS REPORTING
+After every 5 minutes or so of sleep time between reads
+(not including time spent waiting for the reads themselves to complete),
+.Nm
+will update its command parameter space to show its progress
+in scanning each disk.  This report can be viewed using
+.Xr ps 1 .
 .Sh FILES
 .Bl -tag -width /var/db/diskcheckd.offsets -compact
 .It Pa /usr/local/etc/diskcheckd.conf
@@ -188,3 +202,6 @@
 .Sh BUGS
 .Nm
 assumes all disks have 512 byte sectors.
+.Pp
+The code that attempts to identify and report which slice and partition
+contain a detected error does not understand GPT partitions.
diff -ruN diskcheckd.old/files/diskcheckd.c diskcheckd.new/files/diskcheckd.c
--- diskcheckd.old/files/diskcheckd.c	2004-10-27 22:18:33.000000000 -0700
+++ diskcheckd.new/files/diskcheckd.c	2011-08-24 07:05:00.000000000 -0700
@@ -63,10 +63,12 @@
 	char *device;
 	off_t size;
 	int secsize;
-	int days, rate, errors, interval, next;
+	int days, rate, errors;
+	long long interval, next;
 };
 
 volatile sig_atomic_t got_sighup = 0, got_sigterm = 0;
+volatile int debug = 0;
 
 char **getdisknames(char **, int);
 off_t dseek(struct disk *, off_t, int);
@@ -85,9 +87,11 @@
 main(int argc, char *argv[]) {
 	char *buf;
 	struct disk *disks, *dp;
-	int ch, ok, minwait, nextwait;
+	int ch, ok;
+	long long minwait, nextwait;
 	struct sigaction sa;
-	int counter, debug;
+	long long counter;
+	int initial_debug;
 	const char *conf_file, *save_file;
 
 	conf_file = _PATH_CONF;
@@ -97,7 +101,10 @@
 	while ((ch = getopt(argc, argv, "df:o:")) != -1)
 		switch (ch) {
 		case 'd':
-			debug = 1;
+			if (debug)
+				debug *= 2;
+			else
+				debug = 1;
 			break;
 		case 'f':
 			conf_file = optarg;
@@ -116,7 +123,9 @@
 	if (argc != 0)
 		usage();
 
-	openlog("diskcheckd", LOG_CONS|LOG_PID, LOG_DAEMON);
+	initial_debug = debug;
+
+	openlog("diskcheckd", LOG_CONS|LOG_PID|(debug?LOG_PERROR:0), LOG_DAEMON);
 
 	if (!debug && daemon(0, 0) < 0) {
 		syslog(LOG_NOTICE, "daemon failure: %m");
@@ -150,20 +159,24 @@
 	 * each disk's 'next' field, and when that reaches zero,
 	 * that disk is read again.
 	 */
-	counter = 0;
-	minwait = 0;
+	counter = 0LL;
+	minwait = 0LL;
 	while (!got_sigterm) {
 		ok = 0;
-		nextwait = INT_MAX;
+		nextwait = LLONG_MAX;
 		for (dp = disks; dp->device != NULL; dp++)
 			if (dp->fd != -1) {
+				if (debug > 1)
+					fprintf(stderr,
+						"%s:  next(%qd) -= %qd\n",
+						dp->device, dp->next, minwait);
 				if ((dp->next -= minwait) == 0) {
 					ok = 1;
 					readchunk(dp, buf);
 				}
 
 				/* XXX debugging */
-				if (dp->next < 0) {
+				if (dp->next < 0LL) {
 					syslog(LOG_NOTICE,
 					  "dp->next < 0 for %s", dp->device);
 					abort();
@@ -178,14 +191,33 @@
 			exit(EXIT_FAILURE);
 		}
 
-		if (counter >= 300) {
+		/* 300 seconds => 5 minutes */
+		if (counter >= 300000000LL) {
+			if (debug)
+				fprintf(stderr, "counter rollover %qd => 0\n",
+					counter);
 			updateproctitle(disks);
 			writeoffsets(disks, save_file);
-			counter = 0;
+			counter = 0LL;
 		}
 
 		minwait = nextwait;
-		sleep(minwait);
+		if (debug > 1) {
+			--debug;
+			fprintf(stderr, "sleep %qd, counter %qd\n",
+				minwait, counter);
+		}
+
+		/*
+		 * Handle whole seconds and usec separately to avoid overflow
+		 * when calling usleep -- useconds_t is only 32 bits on at
+		 * least some architectures, and minwait (being long long)
+		 * may exceed INT_MAX.
+		 */
+		if (minwait > 1000000LL)
+			sleep((unsigned int)(minwait / 1000000));
+		if ((minwait % 1000000) > 0)
+			usleep((useconds_t)(minwait % 1000000));
 		counter += minwait;
 
 		if (got_sighup) {
@@ -194,6 +226,11 @@
 			 * memory used for the disk structures, and then
 			 * re-read the config file and the disk offsets.
 			 */
+			if (debug) {
+				fprintf(stderr, "got SIGHUP, counter == %qd\n",
+					counter);
+				debug = initial_debug;
+			}
 			writeoffsets(disks, save_file);
 			for (dp = disks; dp->device != NULL; dp++) {
 				free(dp->device);
@@ -202,10 +239,14 @@
 			free(disks);
 			disks = readconf(conf_file);
 			readoffsets(disks, save_file);
+			minwait = 0LL;
 			got_sighup = 0;
 		}
 	}
 
+	if (debug)
+		fprintf(stderr, "got %s, counter == %qd\n",
+			got_sigterm==SIGTERM?"SIGTERM":"SIGINT", counter);
 	writeoffsets(disks, save_file);
 	return (EXIT_SUCCESS);
 }
@@ -384,6 +425,9 @@
 	char *space, buf[1024];
 
 	if ((fp = fopen(save_file, "r")) == NULL) {
+		if (debug > 1 && errno == ENOENT)
+			fprintf(stderr, "open %s failed: %s [continuing]\n",
+				save_file, strerror(errno));
 		if (errno != ENOENT)
 			syslog(LOG_NOTICE, "open %s failed: %m", save_file);
 		return;
@@ -398,10 +442,15 @@
 		    dp->device != NULL && strcmp(dp->device, buf) != 0; dp++)
 			; /* nothing */
 
-		if (dp->device != NULL)
+		if (dp->device != NULL) {
+			if (debug)
+				fprintf(stderr, "%s: seek %s", buf, space + 1);
 			dseek(dp, (off_t)strtoq(space + 1, NULL, 0), SEEK_SET);
+		}
 	}
 	fclose(fp);
+	if (debug)
+		fprintf(stderr, "readoffsets: done\n");
 }
 
 /*
@@ -418,10 +467,21 @@
 		return;
 	}
 
+	if (debug)
+		fprintf(stderr,
+			"#\tposition/size\tdays\trate\terrors\tintvl\tnext\n");
 	for (dp = disks; dp->device != NULL; dp++)
-		if (strcmp(dp->device, "*") != 0)
+		if (strcmp(dp->device, "*") != 0) {
 			fprintf(fp, "%s %qd\n", dp->device,
 			    (quad_t)dseek(dp, 0, SEEK_CUR));
+			if (debug) {
+				fprintf(stderr, "%s %qd\n", dp->device,
+					(quad_t)dseek(dp, 0, SEEK_CUR));
+				fprintf(stderr, "#\t%qd\t%d\t%d\t%d\t%qd\t%qd\n",
+					(quad_t)dp->size, dp->days, dp->rate,
+					dp->errors, dp->interval, dp->next);
+			}
+		}
 	fclose(fp);
 }
 
@@ -577,8 +637,8 @@
 				dp->fd = -1;
 				dp->rate = -1;
 				dp->size = -1;
-				dp->interval = -1;
-				dp->next = 0;
+				dp->interval = -1LL;
+				dp->next = 0LL;
 				break;
 			case 1:
 				/* size */
@@ -688,7 +748,7 @@
 				disks[numdisks].size = dp->size;
 				disks[numdisks].days = dp->days;
 				disks[numdisks].interval = dp->interval;
-				disks[numdisks].next = 0;
+				disks[numdisks].next = 0LL;
 				disks[numdisks].device = *np;
 				numdisks++;
 			}
@@ -727,13 +787,20 @@
 		if (dp->size < 0)
 			getdisksize(dp);
 		if (dp->rate < 0)
-			dp->rate = dp->size / (dp->days * 86400);
+			dp->rate = (dp->size + dp->days * 43200)
+				   / (dp->days * 86400);
 
+		/*   * 1000000 => convert seconds to usec   */
 		if (dp->rate == 0)
 			/* paranoia, should never really happen */
-			dp->interval = READ_SIZE;
+			dp->interval = (long long)READ_SIZE * 1000000;
 		else
-			dp->interval = READ_SIZE / dp->rate;
+			dp->interval = ((long long)READ_SIZE * 1000000)
+				       / dp->rate;
+
+		if (debug > 1)
+			fprintf(stderr, "%s:  rate %d  intvl %qd  next %qd\n",
+				dp->device, dp->rate, dp->interval, dp->next);
 	}
 
 	if (numdisks == 0) {
@@ -873,8 +940,7 @@
 void
 sigterm(int sig) {
 
-	sig = sig;
-	got_sigterm = 1;
+	got_sigterm = sig;
 }
 
 void

--=_4e55153e.L1trZsjhfo2jeiv2tq66KJ/nqbPar7gh7CvMeIrYte3j0fod--

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4e55153e.Tj16zX3SskfuVesE%perryh>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation