From owner-p4-projects@FreeBSD.ORG Wed Jun 13 22:30:10 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 5BF4D16A469; Wed, 13 Jun 2007 22:30:10 +0000 (UTC) X-Original-To: perforce@FreeBSD.org Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 169BB16A477 for ; Wed, 13 Jun 2007 22:30:10 +0000 (UTC) (envelope-from lulf@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id 04A6413C489 for ; Wed, 13 Jun 2007 22:30:10 +0000 (UTC) (envelope-from lulf@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.8/8.13.8) with ESMTP id l5DMU9Ri025976 for ; Wed, 13 Jun 2007 22:30:09 GMT (envelope-from lulf@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.8/8.13.8/Submit) id l5DMU9Ug025969 for perforce@freebsd.org; Wed, 13 Jun 2007 22:30:09 GMT (envelope-from lulf@FreeBSD.org) Date: Wed, 13 Jun 2007 22:30:09 GMT Message-Id: <200706132230.l5DMU9Ug025969@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to lulf@FreeBSD.org using -f From: Ulf Lilleengen To: Perforce Change Reviews Cc: Subject: PERFORCE change 121605 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Jun 2007 22:30:10 -0000 http://perforce.freebsd.org/chv.cgi?CH=121605 Change 121605 by lulf@lulf_carrot on 2007/06/13 22:29:57 - Integrate gv_sync routine to be able to sync a plex from another, like if you add another plex to create a mirror, you are now able to get this in sync with the original. What's missing is to delay write-requests while syncing. - Modify gv_sync to operate the same way as rebuild etc: Send down a read BIO, and when it is done, send down the write BIO to the one stored as the receiver (use bio_caller2 for that, but I need to make sure later that RAID5 mirrors are not affected by this). gv_sync_request is used to send down new BIOs, while gv_sync_completed is called by gv_bio_done. - Set gv_done in rebuild/check in BIO to make sure it always return to the correct place. Affected files ... .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#13 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#6 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#11 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_volume.c#2 edit Differences ... ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#13 (text+ko) ==== @@ -109,7 +109,8 @@ void gv_plex_done(struct gv_plex *, struct bio *); void gv_bio_done(struct gv_softc *, struct bio *); void gv_cleanup(struct gv_softc *); - +int gv_sync_request(struct gv_plex *, struct gv_plex *, + off_t, off_t, int, caddr_t); void gv_create_drive(struct gv_softc *, struct gv_drive *); void gv_create_volume(struct gv_softc *, struct gv_volume *); void gv_create_plex(struct gv_softc *, struct gv_plex *); ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#6 (text+ko) ==== @@ -118,8 +118,8 @@ error = 0; v = p->vol_sc; - if ((v != NULL) && (v->plexcount > 1)); -/* error = gv_sync(v);*/ + if ((v != NULL) && (v->plexcount > 1)) + error = gv_sync(v); else if (p->org == GV_PLEX_RAID5) { if (p->state == GV_PLEX_DEGRADED) error = gv_rebuild_plex(p); @@ -170,13 +170,14 @@ return (error); } +#endif static int gv_sync(struct gv_volume *v) { struct gv_softc *sc; struct gv_plex *p, *up; - struct gv_sync_args *sync; + int error; KASSERT(v != NULL, ("gv_sync: NULL v")); sc = v->vinumconf; @@ -193,6 +194,17 @@ if (up == NULL) return (ENXIO); + g_topology_lock(); + error = gv_access(v->provider, 1, 1, 0); + if (error) { + g_topology_unlock(); + printf("VINUM: sync from '%s' failed to access volume: %d\n", + up->name, error); + return (error); + } + g_topology_unlock(); + + /* Go through the good plex, and issue BIO's to all other plexes. */ LIST_FOREACH(p, &v->plexes, in_volume) { if ((p == up) || (p->state == GV_PLEX_UP)) continue; @@ -200,24 +212,23 @@ return (EINPROGRESS); } p->flags |= GV_PLEX_SYNCING; - sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); - sync->v = v; - sync->from = up; - sync->to = p; - sync->syncsize = GV_DFLT_SYNCSIZE; - kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'", - p->name); + printf("VINUM: starting sync of plex %s\n", p->name); + error = gv_sync_request(up, p, 0, GV_DFLT_SYNCSIZE, BIO_READ, + g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK)); + if (error) { + printf("VINUM: error syncing plex %s\n", p->name); + break; + } } return (0); } -#endif static int gv_rebuild_plex(struct gv_plex *p) { -/* XXX: Is this safe? (Allows for mounted rebuild) +/* XXX: Is this safe? (Allows for mounted rebuild)*/ /* if (gv_provider_is_open(p->vol_sc->provider)) return (EBUSY);*/ ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#11 (text+ko) ==== @@ -372,8 +372,9 @@ } else if (pbp->bio_cmd == BIO_WRITE && (pbp->bio_cflags & GV_BIO_REBUILD)) { gv_rebuild_completed(p, pbp); - } else + } else { g_io_deliver(pbp, pbp->bio_error); + } } /* Clean up what we allocated. */ @@ -498,7 +499,7 @@ g_topology_unlock(); bp->bio_cmd = BIO_WRITE; - bp->bio_done = NULL; + bp->bio_done = gv_done; bp->bio_error = 0; bp->bio_length = p->stripesize; ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_volume.c#2 (text+ko) ==== @@ -41,6 +41,8 @@ #include #include +static void gv_sync_completed(struct gv_plex *, struct bio *); + void gv_volume_start(struct gv_softc *sc, struct bio *bp) { @@ -127,11 +129,107 @@ pbp->bio_completed += bp->bio_completed; g_destroy_bio(bp); pbp->bio_inbed++; - if (pbp->bio_children == pbp->bio_inbed) - g_io_deliver(pbp, pbp->bio_error); + if (pbp->bio_children == pbp->bio_inbed) { + if (pbp->bio_cflags & GV_BIO_SYNCREQ) + gv_sync_completed(p, pbp); + else + g_io_deliver(pbp, pbp->bio_error); + } break; case GV_PLEX_RAID5: gv_plex_done(p, bp); break; } } + +/* + * Handle a finished plex sync bio. + */ +static void +gv_sync_completed(struct gv_plex *to, struct bio *bp) +{ + struct gv_plex *from, *p; + struct gv_volume *v; + int err; + + g_topology_assert_not(); + + from = bp->bio_caller2; + v = to->vol_sc; + + err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, + bp->bio_cmd, bp->bio_data); + if (err) { + if (bp->bio_cflags & GV_BIO_MALLOC) + g_free(bp->bio_data); + g_destroy_bio(bp); + return; + } + /* Free data if we're writing and destroy bio. */ + if (bp->bio_cmd == BIO_WRITE && bp->bio_cflags & GV_BIO_MALLOC) + g_free(bp->bio_data); + g_destroy_bio(bp); + + /* Check if all plexes are synced, and lower refcounts. */ + g_topology_lock(); + LIST_FOREACH(p, &v->plexes, in_volume) { + if (p->flags & GV_PLEX_SYNCING) + goto cleanup; + } + /* If we came here, all plexes are synced, and we're free. */ + gv_access(v->provider, -1, -1, 0); + printf("VINUM: plex sync completed\n"); +cleanup: + g_topology_unlock(); +} + +int +gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, off_t + length, int type, caddr_t data) +{ + struct bio *bp; + + bp = g_new_bio(); + if (bp == NULL) { + printf("VINUM: sync from '%s' failed at offset " + " %jd; out of memory\n", from->name, offset); + return (ENOMEM); + } + bp->bio_length = length; + bp->bio_done = gv_done; + bp->bio_cflags |= GV_BIO_SYNCREQ; + bp->bio_offset = offset; + bp->bio_caller2 = to; /* Reverse the caller. */ + + /* If it was a read, write it to the destination. */ + if (type == BIO_READ) { + /*printf("We just read %s at %jd, and now write %s at %jd\n", + to->name, offset, from->name, offset);*/ + bp->bio_cmd = BIO_WRITE; + bp->bio_data = data; + /* If it was a write, read the next one. */ + } else if (type == BIO_WRITE) { + bp->bio_cmd = BIO_READ; + bp->bio_offset += bp->bio_length; + bp->bio_data = g_malloc(bp->bio_length, M_WAITOK); + bp->bio_cflags |= GV_BIO_MALLOC; + + /*printf("We just wrote %s at %jd, and now read %s at %jd\n", + to->name, bp->bio_offset, from->name, bp->bio_offset); + */ + /* If we're finished, clean up. */ + if (bp->bio_offset >= from->size) { + printf("VINUM: syncing of %s from %s completed\n", + to->name, from->name); + to->flags &= ~GV_PLEX_SYNCING; + return (0); + } + } + +/* printf("Sending next bio: "); + g_print_bio(bp); + printf("\n");*/ + /* Send down next. */ + gv_plex_start(from, bp); + return (0); +}