Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Aug 1999 10:22:33 +0900
From:      Akira Wada <a-wada@mars.dti.ne.jp>
To:        seiwald@perforce.com (Christopher Seiwald)
Cc:        freebsd-hackers@FreeBSD.ORG
Subject:   Re: anybody love qsort.c?
Message-ID:  <199908200122.AA00048@a.mars.dti.ne.jp>

next in thread | raw e-mail | index | archive | help

Christopher Seiwald writes:
> The FreeBSD qsort implementation has a rather nasty degenerate case.
> If you have data that partitions exactly about the median pivot, yet
> which is unsorted in either partition, you get get treated to an insertion
> sort.  Example:
>
>       1 2 3 4 5 6 7 8 9 10 19 18 17 16 14 14 13 12 11
>
> qsort picks 10 as the median, does no swaps on its first pass, and so
> decides to switch to an insertion sort.   The idea is that if no swaps
> occur, the data is likely to be nearly already sorted and a good candidate
> for insertion sort.  This turns out to be a (very) bad idea if you have
> some unsorted data buried in the middle of a (very) large dataset.
>
> The implementation claims to come from Bentley and McIlroy's paper,
> "Engineering a Sort Function," and this is largely true, but the insertion
> sort optimization(?) isn't in that paper.  The GNU qsort.c only does an
> insertion sort if it is below a certain threshhold in size, and so never
> attempts such a sort on the whole dataset.  (The GNU qsort does a number
> of other things pooh-poohed in the Bentley paper.)
>
> It's a pretty straightforward change to bypass the insertion sort for
> large subsets of the data.  If no one has a strong love for qsort, I'll
> educate myself on how to make and contribute this change.

How about the code following sig. ?
And the other codes and information on:

  http://www.mars.dti.ne.jp/~a-wada/qsortlib.html

--
Akira Wada <a-wada@mars.dti.ne.jp>
---------------------------------------------------------------------------------
/*  a-wada/qsortlib/ccn/qsorts.c                                Mar. 30, 1999 */
/*  Copyright (c) 1999  Akira Wada <a-wada@mars.dti.ne.jp>                    */

/*      Quick sort compatible with "qsort (); in <stdlib.h>"                  */
/*          efficiency by reducing the times of key comparisons and           */
/*              word_mode_swapping for word_aligned object                    */
/*          prevention from the degeneration caused by peculiar input         */
/*      This qsort is not stable, but could avoid the bad behavior by many    */
/*      equal sort-keys. For stability, add a unique-key to compare-function. */

/*      If the object to be sorted is an array of pointers to structs with    */
/*      an unsigned integer key in ascending order, set the 3'rd argument     */
/*      to the key position in bytes and the 4'th to NULL, then               */
/*      radixsort would be applied.  for example :                            */
/*                                   kps = 16; qsort (pv, n, kps, (ftp) 0);   */

#include <time.h>
#include <stdlib.h>
void radixspi (void *, size_t, size_t);
void srandom (unsigned long);                         /* if not in <stdlib.h> */
unsigned long random ();                              /* if not in <stdlib.h> */

#define MDT     16  /* threshold for median of the 5 or the more,    MDT >= 8 */
#define MDA      2  /* adjusting threshold slightly,           MDT + MDA >= 4 */
#define MDM      2  /* multiplier for threshold,                     MDM >= 2 */

#define DGT    256  /* threshold for checking degeneration                    */
#define DGV     16  /* degeneration assumed if the smaller < DGL              */
#define DGM      0  /* threshold for selecting no median                      */
#define DGS      2  /* threshold for samples distributed uniformly            */
#define DGU      4  /* threshold for sampling at random                       */
#define DGL     ((unsigned) psz / DGV) * rl
#define defdgn  int dgn = 0, nsp, itv
#define ckdgnr(s, e) if (psz >= DGT && s + DGL > e) dgn++
#define no_med  (psz > MDT && DGM < dgn && dgn <= DGS)
#define symptm  (psz > MDT && dgn > DGS)
#define xcsmpl() do {                                                          \
        if (dgn <= DGU) {                   /* samples distributed uniformly */\
            nsp = 3; while (psz > thr) thr *= MDM, nsp += 2;                   \
            itv = ((unsigned) psz / (nsp - 1)) * rl; k = l, n = r;             \
            for (thr = MDT; psz > thr; thr *= MDM) {                           \
                i += rl, k += itv; swap (i, k);                                \
                j -= rl, n -= itv; swap (n, j);}}                              \
        else {                                          /* samples at random */\
            if (dgn == DGU + 1) dgn++, srandom (time (0));                     \
            for (thr = (unsigned) thr / MDM; psz > thr; thr *= MDM) {          \
                rdmsmpl (i, i, j); i += rl;                                    \
                rdmsmpl (j, i, j); j -= rl;}                                   \
            rdmsmpl (m, i, j);}                                                \
        i = l, j = r, thr = MDT;} while (0)
#define rdmsmpl(x, s, e) if (x != (n = s + rl * (                              \
        random () % ((unsigned) (e - (s)) / rl + 1)))) swap (x, n)

#define swpwrk int t, w, z = sizeof (int), alg = ((int) av | rl) & (z - 1)
#define swap(i, j) do {                                                        \
        w = rl; if (alg == 0) do                                               \
            w -= z,          t = *((int *) (i + w)),                           \
            *((int *) (i + w)) = *((int *) (j + w)),                           \
            *((int *) (j + w)) = t; while (w > 0);                             \
        else do                                                                \
            --w,   t = *(i + w), *(i + w) = *(j + w), *(j + w) = t;            \
            while (w > 0);} while (0)
#define rotate(i, j, k) do {                                                   \
        w = rl; if (alg == 0) do                                               \
            w -= z,          t = *((int *) (i + w)),                           \
            *((int *) (i + w)) = *((int *) (j + w)),                           \
            *((int *) (j + w)) = *((int *) (k + w)),                           \
            *((int *) (k + w)) = t; while (w > 0);                             \
        else do                                                                \
            --w,   t = *(i + w), *(i + w) = *(j + w),                          \
            *(j + w) = *(k + w), *(k + w) = t; while (w > 0);} while (0)

#define findxc(minmax, x, s, e, m) do {                                        \
        n = x, k = s; do                                                       \
            if ((*qcmp) minmax > 0) n = k; while ((k += rl) <= e);             \
        if (n == x) swap (m, x); else rotate (m, n, x);} while (0)
#define MIN    (n, k)
#define MAX    (k, n)

#define cksrtd(l, r) if (psz > MDT) do {                                       \
            k = l; while (k < r && (*qcmp)(k, k + rl) <= 0) k += rl;           \
            if (k >= r) i = j; else if (k >= m) i = m;} while (0)

#define stack       ptrtyp s[stksz], *p = s
#define stksz       sizeof (size_t) * 8 * 2
#define push(x, y)  *(p++) = x, *(p++) = y
#define empty       (s >= p)
#define pop(y, x)   y = *(--p), x = *(--p)

#define defwrk  ptrtyp k, n; defdgn; swpwrk; stack
typedef int (*ftp) (const void *, const void *);
typedef char *  ptrtyp;

void qsort (void *av, size_t rn, size_t rl, ftp qcmp) {
    ptrtyp l, r, m, i, j;  int psz, thr, cst, c;  defwrk;
    if (qcmp == (ftp) 0) radixspi (av, rn, rl);   else     /* call RADIX SORT */
    if (rn > 1 && rl > 0) {
        l = (char *) av, r = l + (rn - 1) * rl;

        for ( ; ; ) {                     /* initialize for current partition */
            m = l + ((unsigned) (psz = (r - l) / rl) / 2) * rl,
            i = l, j = r, thr = MDT, psz -= (MDA), cst = 1;
            if (!no_med) {
                if (symptm) xcsmpl ();

                for ( ; ; ) {            /* bring median of samples to middle */
                    if ((*qcmp)(m, j) <= 0) {
                        if (i <  m && (*qcmp)(i, m) > 0) {
                            findxc (MIN, i, j, r, m); cst = 0;}}
                    else {
                        if (i >= m || (*qcmp)(i, m) > 0) swap (i, j); else
                            findxc (MAX, j, l, i, m); cst = 0;}
                    i += rl, j -= rl;
                    if (psz > thr) thr *= MDM; else break;}}

            if (cst != 0) cksrtd (l, r);
            if (i < j) {                /* do partitioning by middle as pivot */
                for (n = m; ; ) {      /* gathering equal keys close to pivot */
                    while (i < m) {
                        if ((c = (*qcmp)(i, m)) < 0) i += rl; else
                        if (c > 0)      break; else {
                            while (i < (m -= rl) && (c = (*qcmp)(m, i)) == 0);
                            if (i >= m) break; swap (i, m);
                            if (c > 0)  break; else  i += rl;}}
                    while (n < j) {
                        if ((c = (*qcmp)(n, j)) < 0) j -= rl; else
                        if (c > 0)      break; else {
                            while ((n += rl) < j && (c = (*qcmp)(j, n)) == 0);
                            if (n >= j) break; swap (n, j);
                            if (c > 0)  break; else  j -= rl;}}
                    if (i >= m && n >= j)      break;
                    if (n == j) {
                        if (m == n) {
                            swap (i, j); j -= rl, n = m = i;} else
                        if (i < (m -= rl)) {
                            rotate (i, m, j); n = j -= rl;}   else {
                        swap (i, j); j -= rl;  break;}}           else
                    if (i == m) {
                        if (n == m) {
                            swap (i, j); i += rl; m = n = j;} else
                        if ((n += rl) < j) {
                            rotate (j, n, i); m = i += rl;}   else {
                        swap (i, j); i += rl;  break;}}           else {
                    swap (i, j); j -= rl, i += rl;}}

                                    /* select subpartition for next iteration */
                if ((i -= rl) - l < r - (j += rl)) {
                    ckdgnr (l, j); if (l >= i) l = j; else {
                        push (j, r), r = i; continue;}}
                else {
                    ckdgnr (i, r); if (j >= r) r = i; else {
                        push (l, i), l = j; continue;}}
                if (l < r) continue;}
                                          /* pop up next partition from stack */
            if (!empty) pop (r, l); else break;}}}

#include "radixspi-ar.c"

/*  a-wada/qsortlib/ccn/qsorts.c end  */
/*----------------------------------------------------------------------------**
 *source codes on :
    radix_ar.c (original for array of unsigned longs by Andre Reinald)
      http://www.cubic.org/~submissive/sourcerer/download/radix_ar.c  25-May-97
    radixspi-ar.c (modified for array of pointers to structs with u_int key)
      http://www.mars.dti.ne.jp/~a-wada/qsortlib/ccn/radixspi-ar.c
**----------------------------------------------------------------------------*/



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199908200122.AA00048>