Date: Mon, 6 Jul 2009 14:38:02 GMT From: Fabio Checconi <fabio@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 165689 for review Message-ID: <200907061438.n66Ec2I8067152@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=165689 Change 165689 by fabio@fabio_granpasso on 2009/07/06 14:37:41 Snapshot of private repo: add seekiness and thinktime heuristics, improve async writeout accounting, update the bio classification code to use the hooks committed in HEAD (when configured). Affected files ... .. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#2 edit .. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#2 edit .. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#2 edit Differences ... ==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#2 (text+ko) ==== @@ -114,6 +114,7 @@ #include "gs_scheduler.h" #include "g_sched.h" /* geom hooks */ +#define HAVE_BIO_CLASSIFIER /* * Size of the per-geom hash table storing traffic classes. * We may decide to change it at a later time, it has no ABI @@ -178,14 +179,6 @@ .gs_expire_secs = 10, }; -/* - * What kind of classifier we want to use ? - * (not supported yet) - */ -#define G_CLASS_PID 0 - -static const int g_sched_classifier = G_CLASS_PID; - SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0, "GEOM_SCHED stuff"); @@ -321,31 +314,24 @@ * so we do not make assumptions on the return value which for * us is just an opaque identifier. */ -static inline struct thread * -g_sched_issuer(struct bio *bp) +#ifndef HAVE_BIO_CLASSIFIER +static inline u_long +g_sched_classify(struct bio *bp) { while (bp->bio_parent != NULL) bp = bp->bio_parent; - return (bp->bio_caller1); + return ((u_long)bp->bio_caller1); } - -/* - * Fetch the actual field used for classification, among the - * ones available in the credentials associated with the bio. - * Not much to do so far. - */ -static u_long -g_sched_classify(struct thread *tp) +#else +static inline u_long +g_sched_classify(struct bio *bp) { - switch (g_sched_classifier) { - case G_CLASS_PID: - default: - return (tp->td_tid); - } + return ((u_long)bp->bio_classifier1); } +#endif /* Return the hash chain for the given key. */ static inline struct g_hash * @@ -369,12 +355,10 @@ struct g_sched_class *gsc; struct g_gsched *gsp; struct g_hash *bucket; - struct thread *tp; u_long key; sc = gp->softc; - tp = g_sched_issuer(bp); - key = g_sched_classify(tp); + key = g_sched_classify(bp); bucket = g_sched_hash(sc, key); LIST_FOREACH(gsc, bucket, gsc_clist) { if (key == gsc->gsc_key) { @@ -389,7 +373,7 @@ if (!gsc) return (NULL); - if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv, tp)) { + if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) { free(gsc, M_GEOM_SCHED); return (NULL); } @@ -1258,6 +1242,7 @@ * code in g_ioreq_patch() for the details. */ +#ifndef HAVE_BIO_CLASSIFIER #if defined(__i386__) #define CODE_SIZE 29 #define STORE_SIZE 5 @@ -1374,6 +1359,47 @@ } } +static inline void +g_classifier_ini(void) +{ + + g_ioreq_patch(); +} + +static inline void +g_classifier_fini(void) +{ + + g_ioreq_restore(); +} +#else /* !HAVE_BIO_CLASSIFIER */ +static int +g_sched_tag(void *arg, struct bio *bp) +{ + + bp->bio_classifier1 = curthread; + return (1); +} + +static struct g_classifier_hook g_sched_classifier = { + .func = g_sched_tag, +}; + +static inline void +g_classifier_ini(void) +{ + + g_register_classifier(&g_sched_classifier); +} + +static inline void +g_classifier_fini(void) +{ + + g_unregister_classifier(&g_sched_classifier); +} +#endif + static void g_sched_init(struct g_class *mp) { @@ -1384,14 +1410,14 @@ mp, &g_sched_class); /* Patch g_io_request to store classification info in the bio. */ - g_ioreq_patch(); + g_classifier_ini(); } static void g_sched_fini(struct g_class *mp) { - g_ioreq_restore(); + g_classifier_fini(); G_SCHED_DEBUG(0, "Unloading..."); ==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#2 (text+ko) ==== @@ -65,6 +65,11 @@ G_QUEUE_IDLING /* Waiting for a new request. */ }; +/* possible queue flags */ +enum g_rr_flags { + G_FLAG_COMPLETED = 1, /* Completed a req. in the current budget. */ +}; + struct g_rr_softc; /* @@ -79,6 +84,7 @@ enum g_rr_state q_status; unsigned int q_service; /* service received so far */ int q_slice_end; /* actual slice end in ticks */ + enum g_rr_flags q_flags; /* queue flags */ struct bio_queue_head q_bioq; /* Scheduling parameters */ @@ -86,6 +92,13 @@ unsigned int q_slice_duration; /* slice size in ticks */ unsigned int q_wait_ticks; /* wait time for anticipation */ + /* Stats to drive the various heuristics. */ + struct g_savg q_thinktime; /* Thinktime average. */ + struct g_savg q_seekdist; /* Seek distance average. */ + + off_t q_lastoff; /* Last submitted req. offset. */ + int q_lastsub; /* Last submitted req. time. */ + /* Expiration deadline for an empty queue. */ int q_expire; @@ -289,7 +302,7 @@ } static int -g_rr_init_class(void *data, void *priv, struct thread *tp) +g_rr_init_class(void *data, void *priv) { struct g_rr_softc *sc = data; struct g_rr_queue *qp = priv; @@ -339,8 +352,33 @@ g_rr_queue_expired(struct g_rr_queue *qp) { - return (qp->q_service >= qp->q_budget || - ticks - qp->q_slice_end >= 0); + if (qp->q_service >= qp->q_budget) + return (1); + + if ((qp->q_flags & G_FLAG_COMPLETED) && + ticks - qp->q_slice_end >= 0) + return (1); + + return (0); +} + +static inline int +g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp) +{ + int wait = get_bounded(&me.wait_ms, 2); + + if (!me.w_anticipate && (bp->bio_cmd & BIO_WRITE)) + return (0); + + if (g_savg_valid(&qp->q_thinktime) && + g_savg_read(&qp->q_thinktime) > wait) + return (0); + + if (g_savg_valid(&qp->q_seekdist) && + g_savg_read(&qp->q_seekdist) > 2048) + return (0); + + return (1); } /* @@ -389,9 +427,7 @@ TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq); sc->sc_active = qp; qp->q_service = 0; - /* in case we want to make the slice adaptive */ - qp->q_slice_duration = get_bounded(&me.quantum_ms, 2); - qp->q_slice_end = ticks + qp->q_slice_duration; + qp->q_flags &= ~G_FLAG_COMPLETED; } bp = gs_bioq_takefirst(&qp->q_bioq); /* surely not NULL */ @@ -412,7 +448,7 @@ * on read or writes (e.g., anticipate only on reads). */ expired = g_rr_queue_expired(qp); /* are we expired ? */ - next = gs_bioq_first(&qp->q_bioq); /* do we have one more ? */ + next = gs_bioq_first(&qp->q_bioq); /* do we have one more ? */ if (expired) { sc->sc_active = NULL; /* Either requeue or release reference. */ @@ -423,7 +459,7 @@ } else if (next != NULL) { qp->q_status = G_QUEUE_READY; } else { - if (!force && (me.w_anticipate || bp->bio_cmd & BIO_READ)) { + if (!force && g_rr_should_anticipate(qp, bp)) { /* anticipate */ qp->q_status = G_QUEUE_BUSY; } else { @@ -439,6 +475,30 @@ return (bp); } +static inline void +g_rr_update_thinktime(struct g_rr_queue *qp) +{ + int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2); + + qp->q_lastsub = ticks; + delta = (delta > 2 * wait) ? 2 * wait : delta; + g_savg_add_sample(&qp->q_thinktime, delta); +} + +static inline void +g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp) +{ + off_t dist; + + if (qp->q_lastoff > bp->bio_offset) + dist = qp->q_lastoff - bp->bio_offset; + else + dist = bp->bio_offset - qp->q_lastoff; + + qp->q_lastoff = bp->bio_offset + bp->bio_length; + g_savg_add_sample(&qp->q_seekdist, qp->q_seekdist.gs_smpl ? dist : 0); +} + /* * Called when a real request for disk I/O arrives. * Locate the queue associated with the client. @@ -476,6 +536,9 @@ } } + g_rr_update_thinktime(qp); + g_rr_update_seekdist(qp, bp); + /* Inherit the reference returned by g_rr_queue_get(). */ bp->bio_caller1 = qp; gs_bioq_disksort(&qp->q_bioq, bp); @@ -559,6 +622,13 @@ qp = bp->bio_caller1; if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) { + if (!(qp->q_flags & G_FLAG_COMPLETED)) { + qp->q_flags |= G_FLAG_COMPLETED; + /* in case we want to make the slice adaptive */ + qp->q_slice_duration = get_bounded(&me.quantum_ms, 2); + qp->q_slice_end = ticks + qp->q_slice_duration; + } + /* The queue is trying anticipation, start the timer. */ qp->q_status = G_QUEUE_IDLING; /* may make this adaptive */ ==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#2 (text+ko) ==== @@ -90,7 +90,7 @@ typedef int gs_start_t (void *data, struct bio *bio); typedef void gs_done_t (void *data, struct bio *bio); typedef struct bio *gs_next_t (void *data, int force); -typedef int gs_init_class_t (void *data, void *priv, struct thread *tp); +typedef int gs_init_class_t (void *data, void *priv); typedef void gs_fini_class_t (void *data, void *priv); struct g_gsched { @@ -175,6 +175,40 @@ void g_sched_dispatch(struct g_geom *geom); /* + * Simple gathering of statistical data, used by schedulers to collect + * info on process history. Just keep an exponential average of the + * samples, with some extra bits of precision. + */ +struct g_savg { + uint64_t gs_avg; + unsigned int gs_smpl; +}; + +static inline void +g_savg_add_sample(struct g_savg *ss, uint64_t sample) +{ + + /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ + ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); + ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); +} + +static inline int +g_savg_valid(struct g_savg *ss) +{ + + /* We want at least 8 samples to deem an average as valid. */ + return (ss->gs_smpl > 7); +} + +static inline uint64_t +g_savg_read(struct g_savg *ss) +{ + + return (ss->gs_avg / ss->gs_smpl); +} + +/* * Declaration of a scheduler module. */ int g_gsched_modevent(module_t mod, int cmd, void *arg);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200907061438.n66Ec2I8067152>