From owner-svn-src-projects@FreeBSD.ORG  Thu Jan 27 02:00:02 2011
Return-Path: <owner-svn-src-projects@FreeBSD.ORG>
Delivered-To: svn-src-projects@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 225E5106564A;
	Thu, 27 Jan 2011 02:00:02 +0000 (UTC) (envelope-from imp@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 0A1148FC08;
	Thu, 27 Jan 2011 02:00:02 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id p0R201Gp097313;
	Thu, 27 Jan 2011 02:00:01 GMT (envelope-from imp@svn.freebsd.org)
Received: (from imp@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id p0R2016b097311;
	Thu, 27 Jan 2011 02:00:01 GMT (envelope-from imp@svn.freebsd.org)
Message-Id: <201101270200.p0R2016b097311@svn.freebsd.org>
From: Warner Losh <imp@FreeBSD.org>
Date: Thu, 27 Jan 2011 02:00:01 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-projects@freebsd.org
X-SVN-Group: projects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r217918 - projects/graid/head/sys/geom/raid
X-BeenThere: svn-src-projects@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: "SVN commit messages for the src &quot; projects&quot;
	tree" <svn-src-projects.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-projects>, 
	<mailto:svn-src-projects-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-projects>
List-Post: <mailto:svn-src-projects@freebsd.org>
List-Help: <mailto:svn-src-projects-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-projects>, 
	<mailto:svn-src-projects-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Thu, 27 Jan 2011 02:00:02 -0000

Author: imp
Date: Thu Jan 27 02:00:01 2011
New Revision: 217918
URL: http://svn.freebsd.org/changeset/base/217918

Log:
  Checkpoint commit:
  
  o Kick of rebuild of the volume correctly.
  o properly create and clone the master bp for the rebuild
  o tweak a few logging items
  o add a few logging items.
  o tweak timeout goo
  o add a semi-snarky comment about some semi-lame code I wrote

Modified:
  projects/graid/head/sys/geom/raid/tr_raid1.c

Modified: projects/graid/head/sys/geom/raid/tr_raid1.c
==============================================================================
--- projects/graid/head/sys/geom/raid/tr_raid1.c	Thu Jan 27 00:36:54 2011	(r217917)
+++ projects/graid/head/sys/geom/raid/tr_raid1.c	Thu Jan 27 02:00:01 2011	(r217918)
@@ -125,19 +125,23 @@ g_raid_tr_raid1_rebuild_some(struct g_ra
     struct g_raid_subdisk *sd)
 {
 	struct g_raid_tr_raid1_object *trs;
-	struct bio *bp;
+	struct bio *bp, *bp2;
 
+/* XXX need interlock here? */
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	trs->trso_recover_slabs = SD_REBUILD_CLUSTER_IDLE;
 	trs->trso_fair_io = SD_REBUILD_FAIR_IO;
 	bp = g_new_bio();
 	bp->bio_offset = sd->sd_rebuild_pos;
+	bp->bio_length = MIN(SD_REBUILD_SLAB,
+	    sd->sd_volume->v_mediasize - sd->sd_rebuild_pos);
 	bp->bio_data = trs->trso_buffer;
 	bp->bio_cmd = BIO_READ;
-	bp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
-	bp->bio_caller1 = trs->trso_good_sd;
+	bp2 = g_clone_bio(bp);
+	bp2->bio_cflags = G_RAID_BIO_FLAG_SYNC;
+	bp2->bio_caller1 = trs->trso_good_sd;
 	g_raid_lock_range(sd->sd_volume,	/* Lock callback starts I/O */
-	    sd->sd_rebuild_pos, SD_REBUILD_SLAB, bp);
+	    bp2->bio_offset, bp2->bio_length, bp2);
 }
 
 static void
@@ -170,6 +174,7 @@ g_raid_tr_raid1_rebuild_finish(struct g_
 	trs->trso_failed_sd = NULL;
 	trs->trso_good_sd = NULL;
 	trs->trso_buffer = NULL;
+	vol->v_timeout = 0;
 	/* xxx transition array? */
 }
 
@@ -186,6 +191,7 @@ g_raid_tr_raid1_rebuild_abort(struct g_r
 	trs->trso_failed_sd = NULL;
 	trs->trso_good_sd = NULL;
 	trs->trso_buffer = NULL;
+	vol->v_timeout = 0;
 }
 
 static struct g_raid_subdisk *
@@ -233,10 +239,37 @@ g_raid_tr_raid1_rebuild_start(struct g_r
 	trs->trso_type = TR_RAID1_REBUILD;
 	trs->trso_failed_sd->sd_rebuild_pos = 0;
 	trs->trso_buffer = malloc(SD_REBUILD_SLAB, M_TR_raid1, M_WAITOK);
+	vol->v_to_arg = trs;
+	vol->v_timeout = g_raid_tr_raid1_idle_rebuild;
 	/* XXX what else do I need to setup the first time? */
 	g_raid_tr_raid1_rebuild_some(tr, trs->trso_failed_sd);
 }
 
+
+static void
+g_raid_tr_raid1_maybe_rebuild(struct g_raid_tr_object *tr, struct g_raid_volume *vol)
+{
+	struct g_raid_tr_raid1_object *trs;
+	int na, nr;
+	
+	/*
+	 * If we're stopped, don't do anything.  If we don't have at least
+	 * one good disk and one bad disk, we don't do anything.  And if there's
+	 * a 'good disk' stored in the trs, then we're in progress and we punt.
+	 * If we make it past all these checks, we need to rebuild.
+	 */
+	trs = (struct g_raid_tr_raid1_object *)tr;
+	if (trs->trso_stopped)
+		return;
+	na = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
+	nr = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_REBUILD);
+	if (na == 0 || nr == 0)
+		return;
+	if (trs->trso_good_sd)
+		return;
+	g_raid_tr_raid1_rebuild_start(tr, vol);
+}
+
 static int
 g_raid_tr_update_state_raid1(struct g_raid_volume *vol)
 {
@@ -261,18 +294,11 @@ g_raid_tr_update_state_raid1(struct g_ra
 				s = G_RAID_VOLUME_S_BROKEN;
 		}
 	}
+	g_raid_tr_raid1_maybe_rebuild(vol->v_tr, vol);
 	if (s != vol->v_state) {
 		g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
 		    G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
-		if (s == G_RAID_VOLUME_S_DEGRADED) {
-			g_raid_tr_raid1_rebuild_start(vol->v_tr, vol);
-			vol->v_to_arg = trs;
-			vol->v_timeout = g_raid_tr_raid1_idle_rebuild;
-		} else {
-			vol->v_timeout = 0;
-			vol->v_to_arg = 0;
-		}
 		g_raid_change_volume_state(vol, s);
 	}
 	return (0);
@@ -289,8 +315,9 @@ g_raid_tr_event_raid1(struct g_raid_tr_o
 	vol = tr->tro_volume;
 	switch (event) {
 	case G_RAID_SUBDISK_E_NEW:
-		// XXX do I need to start a rebuild here?
-//		g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
+		printf("Current disk state is %d\n", sd->sd_state);
+		if (sd->sd_state == G_RAID_SUBDISK_S_NEW)
+			g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_REBUILD);
 		break;
 	case G_RAID_SUBDISK_E_FAILED:
 		// XXX do I need to stop a rebuild here?
@@ -517,13 +544,16 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 * inactive ones, we do 50MB.
 		 */
 		if (trs->trso_type == TR_RAID1_REBUILD) {
+			printf("Rebuild BIO\n");
 			vol = tr->tro_volume;
+			pbp->bio_inbed++;
 			if (bp->bio_cmd == BIO_READ) {
 				/*
 				 * The read operation finished, queue the
 				 * write and get out.
 				 */
-				pbp->bio_inbed++;
+				G_RAID_LOGREQ(1, bp,
+				    "rebuild read done. Error %d", bp->bio_error);
 				if (bp->bio_error != 0) {
 					g_raid_tr_raid1_rebuild_abort(tr, vol);
 					goto out;
@@ -533,6 +563,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 				cbp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
 				cbp->bio_offset = bp->bio_offset; /* Necessary? */
 				cbp->bio_length = bp->bio_length;
+				G_RAID_LOGREQ(1, bp, "Queueing reguild write.");
 				g_raid_subdisk_iostart(trs->trso_failed_sd, cbp);
 				return;
 			} else {
@@ -546,15 +577,20 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 				 * SD_REBUILD_CLUSTER should be small, that
 				 * shouldn't be a problem.
 				 */
-				pbp->bio_inbed++;
+				G_RAID_LOGREQ(1, bp,
+				    "rebuild write done. Error %d", bp->bio_error);
 				if (bp->bio_error != 0) {
 					g_raid_tr_raid1_rebuild_abort(tr, vol);
 					goto out;
 				}
-				/* A lot of the following is needed when we kick of the work -- refactor */
+/* XXX A lot of the following is needed when we kick of the work -- refactor */
 				nsd = trs->trso_failed_sd;
+				if (nsd == NULL) {
+					printf("WTF?  nsd is null\n");
+					goto out;
+				}
 				g_raid_unlock_range(sd->sd_volume,
-				    nsd->sd_rebuild_pos, SD_REBUILD_SLAB);
+				    bp->bio_offset, bp->bio_length);
 				nsd->sd_rebuild_pos += pbp->bio_length;
 				if (nsd->sd_rebuild_pos >= vol->v_mediasize) {
 					g_raid_tr_raid1_rebuild_finish(tr, vol);
@@ -570,8 +606,10 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 				cbp->bio_offset = nsd->sd_rebuild_pos;
 				cbp->bio_length = MIN(SD_REBUILD_SLAB, vol->v_mediasize - nsd->sd_rebuild_pos);
 				cbp->bio_caller1 = trs->trso_good_sd;
+				G_RAID_LOGREQ(1, bp,
+				    "Rebuild read at %jd.", cbp->bio_offset);
 				g_raid_lock_range(sd->sd_volume,	/* Lock callback starts I/O */
-				    nsd->sd_rebuild_pos, SD_REBUILD_SLAB, cbp);
+				    cbp->bio_offset, cbp->bio_length, cbp);
 				goto out;
 			}
 		} else if (trs->trso_type == TR_RAID1_RESYNC) {
@@ -584,9 +622,9 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 			panic("Somehow, we think we're doing a resync");
 		}
 	}
+	printf("Woof.  bp is %p pbp is %p\n", bp, pbp);
 	if (bp->bio_error != 0 && bp->bio_cmd == BIO_READ &&
-	    pbp->bio_children == 1) {
-
+	    pbp->bio_children == 1 && bp->bio_cflags == 0) {
 		/*
 		 * Read failed on first drive.  Retry the read error on
 		 * another disk drive, if available, before erroring out the
@@ -594,7 +632,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 */
 		vol = tr->tro_volume;
 		sd->sd_read_errs++;
-		G_RAID_LOGREQ(3, bp,
+		G_RAID_LOGREQ(1, bp,
 		    "Read failure, attempting recovery. %d total read errs",
 		    sd->sd_read_errs);
 
@@ -628,10 +666,11 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 * We don't need to fail the raid, since its actual state is
 		 * based on the state of the subdisks.
 		 */
-		G_RAID_LOGREQ(3, bp, "Couldn't retry read, failing it");
+		G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it");
 	}
 	pbp->bio_inbed++;
-	if (pbp->bio_cmd == BIO_READ && pbp->bio_children == 2) {
+	if (pbp->bio_cmd == BIO_READ && pbp->bio_children == 2 &&
+	    bp->bio_cflags == 0) {
 		/*
 		 * If it was a read, and bio_children is 2, then we just
 		 * recovered the data from the second drive.  We should try to
@@ -663,6 +702,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 * failed, the parent's bio isn't failed since the recovered
 		 * read for that actually succeeded.
 		 */
+		G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
 		g_raid_unlock_range(sd->sd_volume, bp->bio_offset,
 		    bp->bio_length);
 		if (bp->bio_error) {