From owner-p4-projects@FreeBSD.ORG  Sun Aug 16 22:06:17 2009
Return-Path: <owner-p4-projects@FreeBSD.ORG>
Delivered-To: p4-projects@freebsd.org
Received: by hub.freebsd.org (Postfix, from userid 32767)
	id 535F21065691; Sun, 16 Aug 2009 22:06:17 +0000 (UTC)
Delivered-To: perforce@FreeBSD.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id F3623106568D
	for <perforce@FreeBSD.org>; Sun, 16 Aug 2009 22:06:16 +0000 (UTC)
	(envelope-from fabio@FreeBSD.org)
Received: from repoman.freebsd.org (repoman.freebsd.org
	[IPv6:2001:4f8:fff6::29])
	by mx1.freebsd.org (Postfix) with ESMTP id E18AC8FC45
	for <perforce@FreeBSD.org>; Sun, 16 Aug 2009 22:06:16 +0000 (UTC)
Received: from repoman.freebsd.org (localhost [127.0.0.1])
	by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id n7GM6GYJ035925
	for <perforce@FreeBSD.org>; Sun, 16 Aug 2009 22:06:16 GMT
	(envelope-from fabio@FreeBSD.org)
Received: (from perforce@localhost)
	by repoman.freebsd.org (8.14.3/8.14.3/Submit) id n7GM6GZ6035923
	for perforce@freebsd.org; Sun, 16 Aug 2009 22:06:16 GMT
	(envelope-from fabio@FreeBSD.org)
Date: Sun, 16 Aug 2009 22:06:16 GMT
Message-Id: <200908162206.n7GM6GZ6035923@repoman.freebsd.org>
X-Authentication-Warning: repoman.freebsd.org: perforce set sender to
	fabio@FreeBSD.org using -f
From: Fabio Checconi <fabio@FreeBSD.org>
To: Perforce Change Reviews <perforce@FreeBSD.org>
Cc: 
Subject: PERFORCE change 167421 for review
X-BeenThere: p4-projects@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: p4 projects tree changes <p4-projects.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/p4-projects>,
	<mailto:p4-projects-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/p4-projects>
List-Post: <mailto:p4-projects@freebsd.org>
List-Help: <mailto:p4-projects-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/p4-projects>,
	<mailto:p4-projects-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sun, 16 Aug 2009 22:06:17 -0000

http://perforce.freebsd.org/chv.cgi?CH=167421

Change 167421 by fabio@fabio_granpasso on 2009/08/16 22:05:59

	Resync with the private repo: (hopefully) safer proxy
	insertion/deletion, a new proportional share scheduler,
	revised heuristics.

Affected files ...

.. //depot/projects/soc2009/fabio_gsched/geom_sched/geom_sched.c#2 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#3 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_bfq.c#1 add
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#3 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#3 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/modules/geom/geom_sched/Makefile#2 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/modules/geom/geom_sched/gsched_bfq/Makefile#1 add

Differences ...

==== //depot/projects/soc2009/fabio_gsched/geom_sched/geom_sched.c#2 (text+ko) ====

@@ -69,7 +69,7 @@
 	else
 		reqalgo = algo;
 
-	snprintf(name, sizeof(name), "gsched_%s", algo);
+	snprintf(name, sizeof(name), "gsched_%s", reqalgo);
 	/*
 	 * Do not complain about errors here, gctl_issue()
 	 * will fail anyway.

==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#3 (text+ko) ====

@@ -410,7 +410,8 @@
 }
 
 static void
-g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask)
+g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask,
+    struct g_gsched *gsp, void *data)
 {
 	struct g_sched_class *cp, *cp2;
 	int i;
@@ -418,6 +419,9 @@
 	if (!hp)
 		return;
 
+	if (data && gsp->gs_hash_unref)
+		gsp->gs_hash_unref(data);
+
 	for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
 		LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2)
 			g_sched_put_class(gp, cp->gsc_priv);
@@ -468,58 +472,70 @@
  *
  * Must be called with the gp mutex held and topology locked.
  */
-static void
+static int
 g_sched_wait_pending(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
+	int endticks = ticks + hz;
 
 	g_topology_assert();
 
-	while (sc->sc_pending)
+	while (sc->sc_pending && endticks - ticks >= 0)
 		msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4);
+
+	return (sc->sc_pending ? ETIMEDOUT : 0);
 }
 
-static void
+static int
 g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp)
 {
 	struct g_sched_softc *sc = gp->softc;
+	int error;
 
 	/* Set the flushing flag: new bios will not enter the scheduler. */
 	sc->sc_flags |= G_SCHED_FLUSHING;
 
 	g_sched_forced_dispatch(gp);
-	g_sched_wait_pending(gp);
-
+	error = g_sched_wait_pending(gp);
+	if (error)
+		goto failed;
+	
 	/* No more requests pending or in flight from the old gsp. */
 
-	g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask);
+	g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data);
 	sc->sc_hash = NULL;
 
 	/*
-	 * XXX avoid deadlock here by releasing the gp mutex and
-	 * reacquiring it once done.
-	 * It should be safe, since no reconfiguration
-	 * or destruction can take place due to the geom topology
-	 * lock; no new request can use the current sc_data since
-	 * we flagged the geom as being flushed.
+	 * Avoid deadlock here by releasing the gp mutex and reacquiring
+	 * it once done.  It should be safe, since no reconfiguration or
+	 * destruction can take place due to the geom topology lock; no
+	 * new request can use the current sc_data since we flagged the
+	 * geom as being flushed.
 	 */
 	g_sched_unlock(gp);
 	gsp->gs_fini(sc->sc_data);
 	g_sched_lock(gp);
 
 	sc->sc_gsched = NULL;
+	sc->sc_data = NULL;
+	g_gsched_unref(gsp);
+
+failed:
 	sc->sc_flags &= ~G_SCHED_FLUSHING;
 
-	g_gsched_unref(gsp);
+	return (error);
 }
 
-static void
+static int
 g_sched_remove(struct g_geom *gp, struct g_gsched *gsp)
 {
+	int error;
 
 	g_sched_lock(gp);
-	g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */
+	error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */
 	g_sched_unlock(gp);
+
+	return (error);
 }
 
 /*
@@ -607,6 +623,7 @@
 	struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp;
 	struct g_sched_softc *sc;
 	struct g_geom *gp, *gp_tmp;
+	int error;
 
 	parm->gup_error = 0;
 
@@ -622,8 +639,11 @@
 			continue;	/* Should not happen. */
 
 		sc = gp->softc;
-		if (sc->sc_gsched == gsp)
-			g_sched_remove(gp, gsp);
+		if (sc->sc_gsched == gsp) {
+			error = g_sched_remove(gp, gsp);
+			if (error)
+				goto failed;
+		}
 	}
 		
 	LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) {
@@ -647,6 +667,7 @@
 		parm->gup_error = ENOENT;
 	}
 
+failed:
 	mtx_unlock(&me.gs_mtx);
 }
 
@@ -878,6 +899,65 @@
 	mtx_unlock(&me.gs_mtx);
 }
 
+static void
+g_sched_flush_pending(g_start_t *start)
+{
+	struct bio *bp;
+
+	while ((bp = gs_bioq_takefirst(&me.gs_pending)))
+		start(bp);
+}
+
+static int
+g_insert_proxy(struct g_geom *gp, struct g_provider *newpp,
+    struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp)
+{
+	struct g_sched_softc *sc = gp->softc;
+	g_start_t *saved_start, *flush = g_sched_start;
+	int error = 0, endticks = ticks + hz;
+
+	g_cancel_event(newpp);	/* prevent taste() */
+	/* copy private fields */
+	newpp->private = pp->private;
+	newpp->index = pp->index;
+
+	/* Queue all the early requests coming for us. */
+	me.gs_npending = 0;
+	saved_start = pp->geom->start;
+	dstgp->start = g_sched_temporary_start;
+
+	while (pp->nstart - pp->nend != me.gs_npending &&
+	    endticks - ticks >= 0)
+		tsleep(pp, PRIBIO, "-", hz/10);
+
+	if (pp->nstart - pp->nend != me.gs_npending) {
+		flush = saved_start;
+		error = ETIMEDOUT;
+		goto fail;
+	}
+
+	/* link pp to this geom */
+	LIST_REMOVE(pp, provider);
+	pp->geom = gp;
+	LIST_INSERT_HEAD(&gp->provider, pp, provider);
+
+	/*
+	 * replicate the counts from the parent in the
+	 * new provider and consumer nodes
+	 */
+	cp->acr = newpp->acr = pp->acr;
+	cp->acw = newpp->acw = pp->acw;
+	cp->ace = newpp->ace = pp->ace;
+	sc->sc_flags |= G_SCHED_PROXYING;
+
+fail:
+	dstgp->start = saved_start;
+
+	g_sched_flush_pending(flush);
+
+	return (error);
+}
+
 /*
  * Create a geom node for the device passed as *pp.
  * If successful, add a reference to this gsp.
@@ -886,12 +966,10 @@
 g_sched_create(struct gctl_req *req, struct g_class *mp,
     struct g_provider *pp, struct g_gsched *gsp, int proxy)
 {
-	struct g_sched_softc *sc;
-	struct g_geom *gp, *dst_gp;
+	struct g_sched_softc *sc = NULL;
+	struct g_geom *gp, *dstgp;
 	struct g_provider *newpp = NULL;
 	struct g_consumer *cp = NULL;
-	void (*saved_start)(struct bio *bp);
-	struct bio *bp;
 	char name[64];
 	int error;
 
@@ -907,7 +985,7 @@
 	}
 
 	gp = g_new_geomf(mp, name);
-	dst_gp = proxy ? pp->geom : gp; /* where do we link the provider */
+	dstgp = proxy ? pp->geom : gp; /* where do we link the provider */
 	if (gp == NULL) {
 		gctl_error(req, "Cannot create geom %s.", name);
 		error = ENOMEM;
@@ -937,7 +1015,7 @@
 	gp->access = g_sched_access;
 	gp->dumpconf = g_sched_dumpconf;
 
-	newpp = g_new_providerf(dst_gp, gp->name);
+	newpp = g_new_providerf(dstgp, gp->name);
 	if (newpp == NULL) {
 		gctl_error(req, "Cannot create provider %s.", name);
 		error = ENOMEM;
@@ -962,44 +1040,16 @@
 		goto fail;
 	}
 
-	g_gsched_ref(gsp);
-
 	g_error_provider(newpp, 0);
 	if (proxy) {
-		g_cancel_event(newpp);	/* prevent taste() */
-		/* copy private fields */
-		newpp->private = pp->private;
-		newpp->index = pp->index;
-
-		/* Queue all the early requests coming for us. */
-		me.gs_npending = 0;
-		saved_start = pp->geom->start;
-		dst_gp->start = g_sched_temporary_start;
-
-		while (pp->nstart - pp->nend != me.gs_npending)
-			tsleep(pp, PRIBIO, "-", hz/10);
-
-		/* link pp to this geom */
-		LIST_REMOVE(pp, provider);
-		pp->geom = gp;
-		LIST_INSERT_HEAD(&gp->provider, pp, provider);
-
-		dst_gp->start = saved_start;
-
-		/*
-		 * replicate the counts from the parent in the
-		 * new provider and consumer nodes
-		 */
-		cp->acr = newpp->acr = pp->acr;
-		cp->acw = newpp->acw = pp->acw;
-		cp->ace = newpp->ace = pp->ace;
-		sc->sc_flags |= G_SCHED_PROXYING;
-
-		while ((bp = gs_bioq_takefirst(&me.gs_pending)))
-			g_sched_start(bp);
+		error = g_insert_proxy(gp, newpp, dstgp, pp, cp);
+		if (error)
+			goto fail;
 	}
 	G_SCHED_DEBUG(0, "Device %s created.", gp->name);
 
+	g_gsched_ref(gsp);
+
 	return (0);
 
 fail:
@@ -1012,13 +1062,14 @@
 	if (newpp != NULL)
 		g_destroy_provider(newpp);
 
-	if (pp->geom == gp) {
-		/* Link pp back to the original geom */
-		LIST_REMOVE(pp, provider);
-		pp->geom = dst_gp;
-		LIST_INSERT_HEAD(&pp->geom->provider, pp, provider);
+	if (sc && sc->sc_hash) {
+		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
+		    gsp, sc->sc_data);
 	}
 
+	if (sc && sc->sc_data)
+		gsp->gs_fini(sc->sc_data);
+
 	if (gp != NULL) {
 		if (gp->softc != NULL)
 			g_free(gp->softc);
@@ -1043,6 +1094,7 @@
 	struct g_hash *newh;
 	void *data;
 	u_long mask;
+	int error = 0;
 
 	gp = pp->geom;
 	sc = gp->softc;
@@ -1052,14 +1104,18 @@
 		return (ENOMEM);
 
 	newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK);
-	if (gsp->gs_priv_size && newh == NULL) {
-		gsp->gs_fini(data);
-		return (ENOMEM);
+	if (gsp->gs_priv_size && !newh) {
+		error = ENOMEM;
+		goto fail;
 	}
 
 	g_sched_lock(gp);
-	if (sc->sc_gsched)	/* can be NULL in some cases */
-		g_sched_remove_locked(gp, sc->sc_gsched);
+	if (sc->sc_gsched) {	/* can be NULL in some cases */
+		error = g_sched_remove_locked(gp, sc->sc_gsched);
+		if (error)
+			goto fail;
+	}
+
 	g_gsched_ref(gsp);
 	sc->sc_gsched = gsp;
 	sc->sc_data = data;
@@ -1069,8 +1125,23 @@
 	g_sched_unlock(gp);
 
 	return (0);
+
+fail:
+	if (newh)
+		g_sched_hash_fini(gp, newh, mask, gsp, data);
+
+	if (data)
+		gsp->gs_fini(data);
+
+	g_sched_unlock(gp);
+
+	return (error);
 }
 
+/*
+ * Stop the request flow directed to the proxy, redirecting the new
+ * requests to the me.gs_pending queue.
+ */
 static struct g_provider *
 g_detach_proxy(struct g_geom *gp)
 {
@@ -1091,13 +1162,6 @@
 		me.gs_npending = 0;
 		pp->geom->start = g_sched_temporary_start;
 
-		/* Link provider to the original geom. */
-		LIST_REMOVE(pp, provider);
-		pp->private = newpp->private;
-		pp->index = newpp->index;
-		pp->geom = newpp->geom;
-		LIST_INSERT_HEAD(&pp->geom->provider, pp, provider);
-
 		return (pp);
 	} while (0);
 	printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name);
@@ -1106,11 +1170,51 @@
 }
 
 static void
-g_destroy_proxy(struct g_geom *gp, struct g_provider *old_pp)
+g_sched_blackhole(struct bio *bp)
+{
+
+	g_io_deliver(bp, ENXIO);
+}
+
+static inline void
+g_reparent_provider(struct g_provider *pp, struct g_geom *gp,
+    struct g_provider *newpp)
+{
+
+	LIST_REMOVE(pp, provider);
+	if (newpp) {
+		pp->private = newpp->private;
+		pp->index = newpp->index;
+	}
+	pp->geom = gp;
+	LIST_INSERT_HEAD(&gp->provider, pp, provider);
+}
+
+static inline void
+g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp)
+{
+	struct g_geom *gp = oldpp->geom;
+
+	g_reparent_provider(oldpp, newpp->geom, newpp);
+
+	/*
+	 * Hackish: let the system destroy the old provider for us, just
+	 * in case someone attached a consumer to it, in which case a
+	 * direct call to g_destroy_provider() would not work.
+	 */
+	g_reparent_provider(newpp, gp, NULL);
+}
+
+/*
+ * Complete the proxy destruction, linking the old provider to its
+ * original geom, and destroying the proxy provider.  Also take care
+ * of issuing the pending requests collected in me.gs_pending (if any).
+ */
+static int
+g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp)
 {
 	struct g_consumer *cp;
 	struct g_provider *newpp;
-	struct bio *bp;
 
 	do {
 		cp = LIST_FIRST(&gp->consumer);
@@ -1119,52 +1223,58 @@
 		newpp = cp->provider;
 		if (newpp == NULL)
 			break;
-		/* detach consumer from provider, and destroy provider. */
+
+		/* Relink the provider to its original geom. */
+		g_unproxy_provider(oldpp, newpp);
+
+		/* Detach consumer from provider, and destroy provider. */
 		cp->acr = newpp->acr = 0;
 		cp->acw = newpp->acw = 0;
 		cp->ace = newpp->ace = 0;
 		g_detach(cp);
-		printf("destroying provider %s\n", newpp->name);
-		g_destroy_provider(newpp);
 
-		while ((bp = gs_bioq_takefirst(&me.gs_pending)))
-			old_pp->geom->start(bp);
+		/* Send the pending bios through the right start function. */
+		g_sched_flush_pending(oldpp->geom->start);
 
-		return;
+		return (0);
 	} while (0);
 	printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name);
+
+	/* We cannot send the pending bios anywhere... */
+	g_sched_flush_pending(g_sched_blackhole);
+
+	return (EINVAL);
 }
 
 static int
 g_sched_destroy(struct g_geom *gp, boolean_t force)
 {
-	struct g_provider *pp, *old_pp = NULL;
+	struct g_provider *pp, *oldpp = NULL;
 	struct g_sched_softc *sc;
 	struct g_gsched *gsp;
+	int error;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return (ENXIO);
-	if ((sc->sc_flags & G_SCHED_PROXYING)) {
-		old_pp = g_detach_proxy(gp);
-		goto flush;
-	}
-	pp = LIST_FIRST(&gp->provider);
-	if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
-		const char *msg = force ?
-			"but we force removal" : "cannot remove";
+	if (!(sc->sc_flags & G_SCHED_PROXYING)) {
+		pp = LIST_FIRST(&gp->provider);
+		if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
+			const char *msg = force ?
+				"but we force removal" : "cannot remove";
 
-		G_SCHED_DEBUG(!force,
-		    "Device %s is still open (r%dw%de%d), %s.",
-		    pp->name, pp->acr, pp->acw, pp->ace, msg);
-		if (!force)
-			return (EBUSY);
-	} else {
-		G_SCHED_DEBUG(0, "Device %s removed.", gp->name);
-	}
+			G_SCHED_DEBUG(!force,
+			    "Device %s is still open (r%dw%de%d), %s.",
+			    pp->name, pp->acr, pp->acw, pp->ace, msg);
+			if (!force)
+				return (EBUSY);
+		} else {
+			G_SCHED_DEBUG(0, "Device %s removed.", gp->name);
+		}
+	} else
+		oldpp = g_detach_proxy(gp);
 
-flush:
 	gsp = sc->sc_gsched;
 	if (gsp) {
 		/*
@@ -1174,26 +1284,69 @@
 		 */
 		g_sched_lock(gp);
 		/*
-		 * We are dying here, no new request should enter
+		 * We are dying here, no new requests should enter
 		 * the scheduler.  This is granted by the topolgy,
-		 * either in case we were proxying (the provider we
-		 * stole now returned home) or not (see the access
-		 * check above).
+		 * either in case we were proxying (new bios are
+		 * being redirected) or not (see the access check
+		 * above).
 		 */
 		g_sched_forced_dispatch(gp);
-		g_sched_wait_pending(gp);
+		error = g_sched_wait_pending(gp);
+
+		if (error) {
+			/*
+			 * Not all the requests came home: this might happen
+			 * under heavy load, or if we were waiting for any
+			 * bio which is served in the event path (see
+			 * geom_slice.c for an example of how this can
+			 * happen).  Try to restore a working configuration
+			 * if we can fail.
+			 */
+			if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
+				g_sched_flush_pending(force ?
+				    g_sched_blackhole : g_sched_start);
+			}
+
+			/*
+			 * In the forced destroy case there is not so much
+			 * we can do, we have pending bios that will call
+			 * g_sched_done() somehow, and we don't want them
+			 * to crash the system using freed memory.  We tell
+			 * the user that something went wrong, and leak some
+			 * memory here.
+			 * Note: the callers using force = 1 ignore the
+			 * return value.
+			 */
+			if (force) {
+				G_SCHED_DEBUG(0, "Pending requests while "
+				    " destroying geom, some memory leaked.");
+			}
+
+			return (error);
+		}
+
 		g_sched_unlock(gp);
-
-		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask);
+		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
+		    gsp, sc->sc_data);
 		sc->sc_hash = NULL;
-
 		gsp->gs_fini(sc->sc_data);
-
 		g_gsched_unref(gsp);
+		sc->sc_gsched = NULL;
 	}
 
-	if ((sc->sc_flags & G_SCHED_PROXYING) && old_pp)
-		g_destroy_proxy(gp, old_pp);
+	if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
+		error = g_destroy_proxy(gp, oldpp);
+
+		if (error) {
+			if (force) {
+				G_SCHED_DEBUG(0, "Unrecoverable error while "
+				    "destroying a proxy geom, leaking some "
+				    " memory.");
+			}
+
+			return (error);
+		}
+	}
 
 	mtx_destroy(&sc->sc_mtx);
 
@@ -1201,7 +1354,7 @@
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
-	return (0);
+	return (error);
 }
 
 static int
@@ -1372,7 +1525,7 @@
 
 	g_ioreq_restore();
 }
-#else /* !HAVE_BIO_CLASSIFIER */
+#else /* HAVE_BIO_CLASSIFIER */
 static int
 g_sched_tag(void *arg, struct bio *bp)
 {

==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#3 (text+ko) ====

@@ -96,6 +96,8 @@
 	struct g_savg	q_thinktime;	/* Thinktime average. */
 	struct g_savg	q_seekdist;	/* Seek distance average. */
 
+	int		q_bionum;	/* Number of requests. */
+
 	off_t		q_lastoff;	/* Last submitted req. offset. */
 	int		q_lastsub;	/* Last submitted req. time. */
 
@@ -182,7 +184,7 @@
 	.sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
 	.w_anticipate =	1,
 	.queue_depth =	{ 1,	1,	50 },
-	.wait_ms =	{ 1, 	5,	30 },
+	.wait_ms =	{ 1, 	10,	30 },
 	.quantum_ms =	{ 1, 	100,	500 },
 	.quantum_kb =	{ 16, 	8192,	65536 },
 };
@@ -375,14 +377,14 @@
 		return (0);
 
 	if (g_savg_valid(&qp->q_seekdist) &&
-	    g_savg_read(&qp->q_seekdist) > 2048)
+	    g_savg_read(&qp->q_seekdist) > 8192)
 		return (0);
 
 	return (1);
 }
 
 /*
- * called on a request arrival, timeout or completion.
+ * Called on a request arrival, timeout or completion.
  * Try to serve a request among those queued.
  */
 static struct bio *
@@ -468,7 +470,7 @@
 			sc->sc_active = NULL;
 		}
 	}
-	/* if sc_active != NULL, its q_status is always correct */
+	/* If sc_active != NULL, its q_status is always correct. */
 
 	sc->sc_in_flight++;
 
@@ -480,9 +482,13 @@
 {
 	int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
 
+	if (qp->q_sc->sc_active != qp)
+		return;
+
 	qp->q_lastsub = ticks;
 	delta = (delta > 2 * wait) ? 2 * wait : delta;
-	g_savg_add_sample(&qp->q_thinktime, delta);
+	if (qp->q_bionum > 7)
+		g_savg_add_sample(&qp->q_thinktime, delta);
 }
 
 static inline void
@@ -495,8 +501,13 @@
 	else
 		dist = bp->bio_offset - qp->q_lastoff;
 
+	if (dist > (8192 * 8))
+		dist = 8192 * 8;
+
 	qp->q_lastoff = bp->bio_offset + bp->bio_length;
-	g_savg_add_sample(&qp->q_seekdist, qp->q_seekdist.gs_smpl ? dist : 0);
+
+	if (qp->q_bionum > 7)
+		g_savg_add_sample(&qp->q_seekdist, dist);
 }
 
 /*
@@ -536,6 +547,8 @@
 		}
 	}
 
+	qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
+
 	g_rr_update_thinktime(qp);
 	g_rr_update_seekdist(qp, bp);
 

==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#3 (text+ko) ====

@@ -79,6 +79,10 @@
  *
  * gs_init_class() is called when a new client (as determined by
  *    the classifier) starts being used.
+ *
+ * gs_hash_unref() is called right before the class hashtable is
+ *    destroyed; after this call, the scheduler is supposed to hold no
+ *    more references to the elements in the table.
  */
 
 /* Forward declarations for prototypes. */
@@ -92,6 +96,7 @@
 typedef struct bio *gs_next_t (void *data, int force);
 typedef int gs_init_class_t (void *data, void *priv);
 typedef void gs_fini_class_t (void *data, void *priv);
+typedef void gs_hash_unref_t (void *data);
 
 struct g_gsched {
 	const char	*gs_name;
@@ -107,6 +112,7 @@
 
 	gs_init_class_t	*gs_init_class;
 	gs_fini_class_t	*gs_fini_class;
+	gs_hash_unref_t *gs_hash_unref;
 
 	LIST_ENTRY(g_gsched) glist;
 };

==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/modules/geom/geom_sched/Makefile#2 (text+ko) ====

@@ -1,6 +1,6 @@
 # $FreeBSD: $
 
-SUBDIR=	gs_sched gsched_as gsched_rr
+SUBDIR=	gs_sched gsched_as gsched_rr gsched_bfq
 .if defined(WITH_SSD)
 SUBDIR +=  gsched_ssd
 .endif