Date: Wed, 28 Nov 2007 19:09:07 GMT From: Peter Wemm <peter@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 129713 for review Message-ID: <200711281909.lASJ977e026972@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=129713 Change 129713 by peter@peter_overcee on 2007/11/28 19:08:51 IFC @129711 Affected files ... .. //depot/projects/hammer/ObsoleteFiles.inc#41 integrate .. //depot/projects/hammer/UPDATING#111 integrate .. //depot/projects/hammer/etc/defaults/periodic.conf#21 integrate .. //depot/projects/hammer/etc/gss/mech#2 integrate .. //depot/projects/hammer/gnu/usr.bin/groff/tmac/mdoc.local#29 integrate .. //depot/projects/hammer/lib/libc/gen/Symbol.map#6 integrate .. //depot/projects/hammer/lib/libc/stdlib/malloc.3#16 integrate .. //depot/projects/hammer/lib/libc/stdlib/malloc.c#37 integrate .. //depot/projects/hammer/lib/libgssapi/gss_acquire_cred.c#2 integrate .. //depot/projects/hammer/lib/libkse/kse.map#2 integrate .. //depot/projects/hammer/lib/libkse/sys/lock.c#2 integrate .. //depot/projects/hammer/lib/libkse/sys/lock.h#2 integrate .. //depot/projects/hammer/lib/libkse/thread/thr_cond.c#2 integrate .. //depot/projects/hammer/lib/libkse/thread/thr_init.c#2 integrate .. //depot/projects/hammer/lib/libkse/thread/thr_kern.c#2 integrate .. //depot/projects/hammer/lib/libkse/thread/thr_mutex.c#3 integrate .. //depot/projects/hammer/lib/libkse/thread/thr_rtld.c#2 integrate .. //depot/projects/hammer/lib/libthr/pthread.map#14 integrate .. //depot/projects/hammer/lib/libthr/thread/thr_mutex.c#36 integrate .. //depot/projects/hammer/release/doc/en_US.ISO8859-1/relnotes/article.sgml#26 integrate .. //depot/projects/hammer/release/doc/share/sgml/release.ent#22 integrate .. //depot/projects/hammer/sbin/mdconfig/mdconfig.8#21 integrate .. //depot/projects/hammer/sbin/mount/mount.8#24 integrate .. //depot/projects/hammer/sbin/newfs/newfs.8#13 integrate .. //depot/projects/hammer/sbin/newfs/newfs.c#16 integrate .. //depot/projects/hammer/sbin/newfs/newfs.h#9 integrate .. //depot/projects/hammer/sbin/sysctl/sysctl.8#15 integrate .. //depot/projects/hammer/share/man/man4/agp.4#8 integrate .. //depot/projects/hammer/share/man/man4/ichsmb.4#4 integrate .. //depot/projects/hammer/share/man/man4/nfe.4#6 integrate .. //depot/projects/hammer/share/man/man4/rl.4#18 integrate .. //depot/projects/hammer/share/man/man9/Makefile#75 integrate .. //depot/projects/hammer/share/man/man9/stack.9#1 branch .. //depot/projects/hammer/share/misc/iso3166#8 integrate .. //depot/projects/hammer/share/mk/sys.mk#27 integrate .. //depot/projects/hammer/sys/amd64/amd64/busdma_machdep.c#46 integrate .. //depot/projects/hammer/sys/amd64/conf/GENERIC#100 integrate .. //depot/projects/hammer/sys/arm/arm/busdma_machdep.c#26 integrate .. //depot/projects/hammer/sys/arm/include/atomic.h#18 integrate .. //depot/projects/hammer/sys/conf/NOTES#129 integrate .. //depot/projects/hammer/sys/conf/options#116 integrate .. //depot/projects/hammer/sys/dev/an/if_an.c#38 integrate .. //depot/projects/hammer/sys/dev/sound/pci/hda/hdac.c#12 integrate .. //depot/projects/hammer/sys/dev/wpi/if_wpi.c#3 integrate .. //depot/projects/hammer/sys/dev/wpi/if_wpireg.h#2 integrate .. //depot/projects/hammer/sys/i386/conf/GENERIC#58 integrate .. //depot/projects/hammer/sys/i386/conf/XBOX#6 integrate .. //depot/projects/hammer/sys/i386/i386/busdma_machdep.c#35 integrate .. //depot/projects/hammer/sys/ia64/ia64/busdma_machdep.c#22 integrate .. //depot/projects/hammer/sys/ia64/include/atomic.h#6 integrate .. //depot/projects/hammer/sys/kern/kern_mutex.c#51 integrate .. //depot/projects/hammer/sys/kern/kern_rwlock.c#14 integrate .. //depot/projects/hammer/sys/netinet/tcp_output.c#47 integrate .. //depot/projects/hammer/sys/netinet/tcp_subr.c#79 integrate .. //depot/projects/hammer/sys/powerpc/include/atomic.h#11 integrate .. //depot/projects/hammer/sys/sparc64/conf/GENERIC#55 integrate .. //depot/projects/hammer/sys/sun4v/conf/GENERIC#6 integrate .. //depot/projects/hammer/sys/sys/signal.h#17 integrate .. //depot/projects/hammer/usr.sbin/newsyslog/newsyslog.conf.5#6 integrate Differences ... ==== //depot/projects/hammer/ObsoleteFiles.inc#41 (text+ko) ==== @@ -1,5 +1,5 @@ # -# $FreeBSD: src/ObsoleteFiles.inc,v 1.120 2007/11/21 10:49:33 ru Exp $ +# $FreeBSD: src/ObsoleteFiles.inc,v 1.121 2007/11/27 13:58:25 brix Exp $ # # This file lists old files (OLD_FILES), libraries (OLD_LIBS) and # directories (OLD_DIRS) which should get removed at an update. Recently @@ -3964,9 +3964,10 @@ # - usr/share/tmac/mm/se_locale # - var/yp/Makefile -# 20071119: shared library version bump +# 20071120: shared library version bump OLD_LIBS+=usr/lib/libasn1.so.8 OLD_LIBS+=usr/lib/libgssapi.so.8 +OLD_LIBS+=usr/lib/libgssapi_krb5.so.8 OLD_LIBS+=usr/lib/libhdb.so.8 OLD_LIBS+=usr/lib/libkadm5clnt.so.8 OLD_LIBS+=usr/lib/libkadm5srv.so.8 ==== //depot/projects/hammer/UPDATING#111 (text+ko) ==== @@ -21,6 +21,10 @@ developers choose to disable these features on build machines to maximize performance. +20071128: + The ADAPTIVE_GIANT kernel option has been retired because its + functionality is the default now. + 20071118: The AT keyboard emulation of sunkbd(4) has been turned on by default. In order to make the special symbols of the Sun @@ -945,4 +949,4 @@ Contact Warner Losh if you have any questions about your use of this document. -$FreeBSD: src/UPDATING,v 1.512 2007/11/18 18:11:16 marius Exp $ +$FreeBSD: src/UPDATING,v 1.513 2007/11/28 13:04:11 matteo Exp $ ==== //depot/projects/hammer/etc/defaults/periodic.conf#21 (text+ko) ==== @@ -13,7 +13,7 @@ # For a more detailed explanation of all the periodic.conf variables, please # refer to the periodic.conf(5) manual page. # -# $FreeBSD: src/etc/defaults/periodic.conf,v 1.44 2007/05/29 06:22:13 dougb Exp $ +# $FreeBSD: src/etc/defaults/periodic.conf,v 1.45 2007/11/28 17:31:11 jhb Exp $ # # What files override these defaults ? @@ -45,7 +45,9 @@ daily_clean_tmps_enable="NO" # Delete stuff daily daily_clean_tmps_dirs="/tmp" # Delete under here daily_clean_tmps_days="3" # If not accessed for -daily_clean_tmps_ignore=".X*-lock quota.user quota.group" # Don't delete these +daily_clean_tmps_ignore=".X*-lock .X11-unix .ICE-unix .font-unix .XIM-unix" +daily_clean_tmps_ignore="$daily_clean_tmps_ignore quota.user quota.group" + # Don't delete these daily_clean_tmps_verbose="YES" # Mention files deleted # 120.clean-preserve ==== //depot/projects/hammer/etc/gss/mech#2 (text+ko) ==== @@ -1,4 +1,4 @@ -# $FreeBSD: src/etc/gss/mech,v 1.1 2005/12/29 14:40:18 dfr Exp $ +# $FreeBSD: src/etc/gss/mech,v 1.2 2007/11/27 21:47:56 jhb Exp $ # # Name OID Library name Kernel module -kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.8 - +kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.9 - ==== //depot/projects/hammer/gnu/usr.bin/groff/tmac/mdoc.local#29 (text+ko) ==== @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $FreeBSD: src/gnu/usr.bin/groff/tmac/mdoc.local,v 1.61 2007/10/22 10:01:58 ru Exp $ +.\" $FreeBSD: src/gnu/usr.bin/groff/tmac/mdoc.local,v 1.62 2007/11/27 10:00:33 jkoshy Exp $ .\" .\" %beginstrip% . @@ -52,7 +52,7 @@ .ds doc-str-Lb-libmd Message Digest (MD4, MD5, etc.) Support Library (libmd, \-lmd) .ds doc-str-Lb-libmemstat Kernel Memory Allocator Statistics Library (libmemstat, \-lmemstat) .ds doc-str-Lb-libnetgraph Netgraph User Library (libnetgraph, \-lnetgraph) -.ds doc-str-Lb-libpmc Performance Monitoring Counters API (libpmc, \-lpmc) +.ds doc-str-Lb-libpmc Performance Monitoring Counters Interface Library (libpmc, \-lpmc) .ds doc-str-Lb-librpcsvc RPC Service Library (librpcsvc, \-lrpcsvc) .ds doc-str-Lb-libsdp Bluetooth Service Discovery Protocol User Library (libsdp, \-lsdp) .ds doc-str-Lb-libthr 1:1 Threading Library (libthr, \-lthr) ==== //depot/projects/hammer/lib/libc/gen/Symbol.map#6 (text) ==== @@ -1,5 +1,5 @@ /* - * $FreeBSD: src/lib/libc/gen/Symbol.map,v 1.6 2007/05/31 13:01:33 deischen Exp $ + * $FreeBSD: src/lib/libc/gen/Symbol.map,v 1.7 2007/11/27 16:22:21 jasone Exp $ */ FBSD_1.0 { @@ -378,6 +378,7 @@ _pthread_kill; _pthread_main_np; _pthread_mutex_destroy; + _pthread_mutex_init_calloc_cb; _pthread_mutex_init; _pthread_mutex_lock; _pthread_mutex_trylock; ==== //depot/projects/hammer/lib/libc/stdlib/malloc.3#16 (text+ko) ==== @@ -30,9 +30,9 @@ .\" SUCH DAMAGE. .\" .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 -.\" $FreeBSD: src/lib/libc/stdlib/malloc.3,v 1.73 2007/06/15 22:32:33 jasone Exp $ +.\" $FreeBSD: src/lib/libc/stdlib/malloc.3,v 1.74 2007/11/27 03:18:26 jasone Exp $ .\" -.Dd June 15, 2007 +.Dd October 1, 2007 .Dt MALLOC 3 .Os .Sh NAME @@ -177,6 +177,19 @@ The process will call .Xr abort 3 in these cases. +.It B +Increase/decrease the per-arena lock contention threshold at which a thread is +randomly re-assigned to an arena. +This dynamic load balancing tends to push threads away from highly contended +arenas, which avoids worst case contention scenarios in which threads +disproportionately utilize arenas. +However, due to the highly dynamic load that applications may place on the +allocator, it is impossible for the allocator to know in advance how sensitive +it should be to contention over arenas. +Therefore, some applications may benefit from increasing or decreasing this +threshold parameter. +This option is not available for some configurations (non-PIC). +This option can be specified multiple times. .It H Use .Xr madvise 2 @@ -204,6 +217,18 @@ Increase/decrease the virtual memory chunk size by a factor of two. The default chunk size is 1 MB. This option can be specified multiple times. +.It L +Increase/decrease the per-arena number of slots for lazy deallocation. +Lazy deallocation can decrease lock contention, especially for programs that use +the producer/consumer model. +The default is 256 slots per arena (so +.Ev MALLOC_OPTIONS=lllllllll +will disable lazy deallocation), but note that due to algorithmic details, the +cache is typically flushed well before it completely fills. +This option has no impact unless there are multiple CPUs, and lazy deallocation +does not activate unless the program uses multiple threads. +This option is not available for some configurations (non-PIC). +This option can be specified multiple times. .It N Increase/decrease the number of arenas by a factor of two. The default number of arenas is four times the number of CPUs, or one if there ==== //depot/projects/hammer/lib/libc/stdlib/malloc.c#37 (text+ko) ==== @@ -98,14 +98,14 @@ * defaults the A and J runtime options to off. These settings are appropriate * for production systems. */ -/* #define MALLOC_PRODUCTION */ +/* #define MALLOC_PRODUCTION */ #ifndef MALLOC_PRODUCTION # define MALLOC_DEBUG #endif #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); +__FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.152 2007/11/28 00:17:34 jasone Exp $"); #include "libc_private.h" #ifdef MALLOC_DEBUG @@ -171,6 +171,7 @@ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# define CPU_SPINWAIT __asm__ volatile("pause") #endif #ifdef __ia64__ # define QUANTUM_2POW_MIN 4 @@ -189,6 +190,7 @@ #ifdef __amd64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define CPU_SPINWAIT __asm__ volatile("pause") #endif #ifdef __arm__ # define QUANTUM_2POW_MIN 3 @@ -202,9 +204,9 @@ # define USE_BRK #endif -#define SIZEOF_PTR (1 << SIZEOF_PTR_2POW) +#define SIZEOF_PTR (1U << SIZEOF_PTR_2POW) -/* sizeof(int) == (1 << SIZEOF_INT_2POW). */ +/* sizeof(int) == (1U << SIZEOF_INT_2POW). */ #ifndef SIZEOF_INT_2POW # define SIZEOF_INT_2POW 2 #endif @@ -226,7 +228,7 @@ * negatively affect performance. */ #define CACHELINE_2POW 6 -#define CACHELINE ((size_t)(1 << CACHELINE_2POW)) +#define CACHELINE ((size_t)(1U << CACHELINE_2POW)) /* Smallest size class to support. */ #define TINY_MIN_2POW 1 @@ -237,7 +239,7 @@ * power of 2. */ #define SMALL_MAX_2POW_DEFAULT 9 -#define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT) +#define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT) /* * Maximum desired run header overhead. Runs are sized as small as possible @@ -252,18 +254,69 @@ * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of * overhead for which RUN_MAX_OVRHD is relaxed. */ -#define RUN_MAX_OVRHD 0.015 -#define RUN_MAX_OVRHD_RELAX 1.5 +#define RUN_MAX_OVRHD 0.015 +#define RUN_MAX_OVRHD_RELAX 1.5 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ -#define RUN_MAX_SMALL_2POW 15 -#define RUN_MAX_SMALL (1 << RUN_MAX_SMALL_2POW) +#define RUN_MAX_SMALL_2POW 15 +#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW) + +/* Default size of each arena's lazy free cache. */ +#define LAZY_FREE_2POW_DEFAULT 8 +/* + * Number of pseudo-random probes to conduct before considering the cache to be + * overly full. It takes on average n probes to detect fullness of (n-1)/n. + * However, we are effectively doing multiple non-independent trials (each + * deallocation is a trial), so the actual average threshold for clearing the + * cache is somewhat lower. + */ +#define LAZY_FREE_NPROBES 5 + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. If no such instruction is defined + * above, make CPU_SPINWAIT a no-op. + */ +#ifndef CPU_SPINWAIT +# define CPU_SPINWAIT +#endif + +/* + * Adaptive spinning must eventually switch to blocking, in order to avoid the + * potential for priority inversion deadlock. Backing off past a certain point + * can actually waste time. + */ +#define SPIN_LIMIT_2POW 11 + +/* + * Conversion from spinning to blocking is expensive; we use (1U << + * BLOCK_COST_2POW) to estimate how many more times costly blocking is than + * worst-case spinning. + */ +#define BLOCK_COST_2POW 4 + +/* + * We use an exponential moving average to track recent lock contention, where + * the size of the history window is N, and alpha=2/(N+1). + * + * Due to integer math rounding, very small values here can cause substantial + * degradation in accuracy, thus making the moving average decay faster than it + * would with precise calculation. + */ +#define BALANCE_ALPHA_INV_2POW 9 + +/* + * Threshold value for the exponential moving contention average at which to + * re-assign a thread. + */ +#define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4)) /******************************************************************************/ /* - * Mutexes based on spinlocks. We can't use normal pthread mutexes, because - * they require malloc()ed memory. + * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all + * places, because they require malloc()ed memory, which causes bootstrapping + * issues in some cases. */ typedef struct { spinlock_t lock; @@ -319,6 +372,11 @@ size_t allocated_large; uint64_t nmalloc_large; uint64_t ndalloc_large; + +#ifndef NO_TLS + /* Number of times this arena reassigned a thread due to contention. */ + uint64_t nbalance; +#endif }; typedef struct chunk_stats_s chunk_stats_t; @@ -374,17 +432,27 @@ typedef struct arena_chunk_map_s arena_chunk_map_t; struct arena_chunk_map_s { - /* Number of pages in run. */ + /* + * Number of pages in run. For a free run that has never been touched, + * this is NPAGES_EMPTY for the central pages, which allows us to avoid + * zero-filling untouched pages for calloc(). + */ +#define NPAGES_EMPTY ((uint32_t)0x0U) uint32_t npages; /* - * Position within run. For a free run, this is POS_FREE for the first - * and last pages. The POS_FREE special value makes it possible to - * quickly coalesce free runs. + * Position within run. For a free run, this is POS_EMPTY/POS_FREE for + * the first and last pages. The special values make it possible to + * quickly coalesce free runs. POS_EMPTY indicates that the run has + * never been touched, which allows us to avoid zero-filling untouched + * pages for calloc(). * * This is the limiting factor for chunksize; there can be at most 2^31 * pages in a run. + * + * POS_EMPTY is assumed by arena_run_dalloc() to be less than POS_FREE. */ -#define POS_FREE ((uint32_t)0xffffffffU) +#define POS_EMPTY ((uint32_t)0xfffffffeU) +#define POS_FREE ((uint32_t)0xffffffffU) uint32_t pos; }; @@ -496,8 +564,8 @@ # define ARENA_MAGIC 0x947d3d24 #endif - /* All operations on this arena require that mtx be locked. */ - malloc_mutex_t mtx; + /* All operations on this arena require that lock be locked. */ + pthread_mutex_t lock; #ifdef MALLOC_STATS arena_stats_t stats; @@ -517,7 +585,23 @@ * order to avoid interactions between multiple threads that could make * a single spare inadequate. */ - arena_chunk_t *spare; + arena_chunk_t *spare; + +#ifndef NO_TLS + /* + * The arena load balancing machinery needs to keep track of how much + * lock contention there is. This value is exponentially averaged. + */ + uint32_t contention; + + /* + * Deallocation of small objects can be lazy, in which case free_cache + * stores pointers to those objects that have not yet been deallocated. + * In order to avoid lock contention, slots are chosen randomly. Empty + * slots contain NULL. + */ + void **free_cache; +#endif /* * bins is used to store rings of free regions of the following sizes, @@ -650,9 +734,9 @@ static arena_t **arenas; static unsigned narenas; #ifndef NO_TLS -static unsigned next_arena; +static unsigned narenas_2pow; #endif -static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */ +static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */ #ifndef NO_TLS /* @@ -681,6 +765,10 @@ static bool opt_junk = false; #endif static bool opt_hint = false; +#ifndef NO_TLS +static int opt_lazy_free_2pow = LAZY_FREE_2POW_DEFAULT; +static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT; +#endif static bool opt_print_stats = false; static size_t opt_quantum_2pow = QUANTUM_2POW_MIN; static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; @@ -689,7 +777,7 @@ static bool opt_sysv = false; static bool opt_xmalloc = false; static bool opt_zero = false; -static int32_t opt_narenas_lshift = 0; +static int opt_narenas_lshift = 0; typedef struct { void *p; @@ -709,6 +797,7 @@ */ static void malloc_mutex_init(malloc_mutex_t *a_mutex); +static bool malloc_spin_init(pthread_mutex_t *lock); static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4); #ifdef MALLOC_STATS @@ -717,6 +806,7 @@ static char *umax2s(uintmax_t x, char *s); static bool base_pages_alloc(size_t minsize); static void *base_alloc(size_t size); +static void *base_calloc(size_t number, size_t size); static chunk_node_t *base_chunk_node_alloc(void); static void base_chunk_node_dealloc(chunk_node_t *node); #ifdef MALLOC_STATS @@ -729,15 +819,16 @@ #ifndef NO_TLS static arena_t *choose_arena_hard(void); #endif -static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size); +static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, + bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); -static arena_run_t *arena_run_alloc(arena_t *arena, size_t size); +static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool zero); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); -static void *arena_malloc(arena_t *arena, size_t size); +static void *arena_malloc(arena_t *arena, size_t size, bool zero); static void *arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size); static size_t arena_salloc(const void *ptr); @@ -745,7 +836,7 @@ static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); static bool arena_new(arena_t *arena); static arena_t *arenas_extend(unsigned ind); -static void *huge_malloc(size_t size); +static void *huge_malloc(size_t size, bool zero); static void *huge_palloc(size_t alignment, size_t size); static void *huge_ralloc(void *ptr, size_t size, size_t oldsize); static void huge_dalloc(void *ptr); @@ -763,7 +854,9 @@ */ /******************************************************************************/ /* - * Begin mutex. + * Begin mutex. We can't use normal pthread mutexes in all places, because + * they require malloc()ed memory, which causes bootstrapping issues in some + * cases. */ static void @@ -795,6 +888,86 @@ */ /******************************************************************************/ /* + * Begin spin lock. Spin locks here are actually adaptive mutexes that block + * after a period of spinning, because unbounded spinning would allow for + * priority inversion. + */ + +/* + * We use an unpublished interface to initialize pthread mutexes with an + * allocation callback, in order to avoid infinite recursion. + */ +int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); + +__weak_reference(_pthread_mutex_init_calloc_cb_stub, + _pthread_mutex_init_calloc_cb); + +int +_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)) +{ + + return (0); +} + +static bool +malloc_spin_init(pthread_mutex_t *lock) +{ + + if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0) + return (true); + + return (false); +} + +static inline unsigned +malloc_spin_lock(pthread_mutex_t *lock) +{ + unsigned ret = 0; + + if (__isthreaded) { + if (_pthread_mutex_trylock(lock) != 0) { + unsigned i; + volatile unsigned j; + + /* Exponentially back off. */ + for (i = 1; i <= SPIN_LIMIT_2POW; i++) { + for (j = 0; j < (1U << i); j++) + ret++; + + CPU_SPINWAIT; + if (_pthread_mutex_trylock(lock) == 0) + return (ret); + } + + /* + * Spinning failed. Block until the lock becomes + * available, in order to avoid indefinite priority + * inversion. + */ + _pthread_mutex_lock(lock); + assert((ret << BLOCK_COST_2POW) != 0); + return (ret << BLOCK_COST_2POW); + } + } + + return (ret); +} + +static inline void +malloc_spin_unlock(pthread_mutex_t *lock) +{ + + if (__isthreaded) + _pthread_mutex_unlock(lock); +} + +/* + * End spin lock. + */ +/******************************************************************************/ +/* * Begin Utility functions/macros. */ @@ -840,6 +1013,63 @@ return (x); } +#ifndef NO_TLS +/* + * Use a simple linear congruential pseudo-random number generator: + * + * prn(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example. the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + */ +#define PRN_DEFINE(suffix, var, a, c) \ +static inline void \ +sprn_##suffix(uint32_t seed) \ +{ \ + var = seed; \ +} \ + \ +static inline uint32_t \ +prn_##suffix(uint32_t lg_range) \ +{ \ + uint32_t ret, x; \ + \ + assert(lg_range > 0); \ + assert(lg_range <= 32); \ + \ + x = (var * (a)) + (c); \ + var = x; \ + ret = x >> (32 - lg_range); \ + \ + return (ret); \ +} +#define SPRN(suffix, seed) sprn_##suffix(seed) +#define PRN(suffix, lg_range) prn_##suffix(lg_range) + +/* + * Define PRNGs, one for each purpose, in order to avoid auto-correlation + * problems. + */ + +/* Define the per-thread PRNG used for lazy deallocation. */ +static __thread uint32_t lazy_free_x; +PRN_DEFINE(lazy_free, lazy_free_x, 12345, 12347) + +/* Define the PRNG used for arena assignment. */ +static __thread uint32_t balance_x; +PRN_DEFINE(balance, balance_x, 1297, 1301); +#endif + static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4) { @@ -876,7 +1106,7 @@ * integer printing functionality, so that malloc_printf() use can be limited to * MALLOC_STATS code. */ -#define UMAX2S_BUFSIZE 21 +#define UMAX2S_BUFSIZE 21 static char * umax2s(uintmax_t x, char *s) { @@ -995,6 +1225,17 @@ return (ret); } +static void * +base_calloc(size_t number, size_t size) +{ + void *ret; + + ret = base_alloc(number * size); + memset(ret, 0, number * size); + + return (ret); +} + static chunk_node_t * base_chunk_node_alloc(void) { @@ -1202,6 +1443,11 @@ && (uintptr_t)chunk < (uintptr_t)brk_max) { /* Re-use a previously freed brk chunk. */ ret = chunk; + /* + * Maintain invariant that all newly allocated + * chunks are untouched or zero-filled. + */ + memset(ret, 0, size); goto RETURN; } #endif @@ -1444,8 +1690,10 @@ } ret = arenas_map; - if (ret == NULL) + if (ret == NULL) { ret = choose_arena_hard(); + assert(ret != NULL); + } #else if (__isthreaded) { unsigned long ind; @@ -1479,12 +1727,12 @@ * Avoid races with another thread that may have already * initialized arenas[ind]. */ - malloc_mutex_lock(&arenas_mtx); + malloc_spin_lock(&arenas_lock); if (arenas[ind] == NULL) ret = arenas_extend((unsigned)ind); else ret = arenas[ind]; - malloc_mutex_unlock(&arenas_mtx); + malloc_spin_unlock(&arenas_lock); } } else ret = arenas[0]; @@ -1506,21 +1754,39 @@ assert(__isthreaded); - /* Assign one of the arenas to this thread, in a round-robin fashion. */ - malloc_mutex_lock(&arenas_mtx); - ret = arenas[next_arena]; - if (ret == NULL) - ret = arenas_extend(next_arena); - if (ret == NULL) { - /* - * Make sure that this function never returns NULL, so that - * choose_arena() doesn't have to check for a NULL return - * value. - */ + /* + * Seed the PRNG used for lazy deallocation. Since seeding only occurs + * on the first allocation by a thread, it is possible for a thread to + * deallocate before seeding. This is not a critical issue though, + * since it is extremely unusual for an application to to use threads + * that deallocate but *never* allocate, and because even if seeding + * never occurs for multiple threads, they will tend to drift apart + * unless some aspect of the application forces deallocation + * synchronization. + */ + SPRN(lazy_free, (uint32_t)(uintptr_t)(_pthread_self())); + + /* + * Seed the PRNG used for arena load balancing. We can get away with + * using the same seed here as for the lazy_free PRNG without + * introducing autocorrelation because the PRNG parameters are + * distinct. + */ + SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self())); + + if (narenas > 1) { + unsigned ind; + + ind = PRN(balance, narenas_2pow); + if ((ret = arenas[ind]) == NULL) { + malloc_spin_lock(&arenas_lock); + if ((ret = arenas[ind]) == NULL) + ret = arenas_extend(ind); + malloc_spin_unlock(&arenas_lock); + } + } else ret = arenas[0]; - } - next_arena = (next_arena + 1) % narenas; - malloc_mutex_unlock(&arenas_mtx); + arenas_map = ret; return (ret); @@ -1588,7 +1854,7 @@ + (bin->reg_size * regind)); /* Clear bit. */ - mask ^= (1 << bit); + mask ^= (1U << bit); run->regs_mask[i] = mask; return (ret); @@ -1605,7 +1871,7 @@ + (bin->reg_size * regind)); /* Clear bit. */ - mask ^= (1 << bit); + mask ^= (1U << bit); run->regs_mask[i] = mask; /* @@ -1635,8 +1901,8 @@ * * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1) +#define SIZE_INV_SHIFT 21 +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1) static const unsigned size_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), @@ -1718,32 +1984,73 @@ if (elm < run->regs_minelm) run->regs_minelm = elm; bit = regind - (elm << (SIZEOF_INT_2POW + 3)); - assert((run->regs_mask[elm] & (1 << bit)) == 0); - run->regs_mask[elm] |= (1 << bit); + assert((run->regs_mask[elm] & (1U << bit)) == 0); + run->regs_mask[elm] |= (1U << bit); #undef SIZE_INV #undef SIZE_INV_SHIFT } static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size) +arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool zero) { arena_chunk_t *chunk; unsigned run_ind, map_offset, total_pages, need_pages, rem_pages; unsigned i; + uint32_t pos_beg, pos_end; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow); total_pages = chunk->map[run_ind].npages; need_pages = (size >> pagesize_2pow); + assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; /* Split enough pages from the front of run to fit allocation size. */ map_offset = run_ind; - for (i = 0; i < need_pages; i++) { + pos_beg = chunk->map[map_offset].pos; + pos_end = chunk->map[map_offset + total_pages - 1].pos; + if (zero == false) { + for (i = 0; i < need_pages; i++) { + chunk->map[map_offset + i].npages = need_pages; + chunk->map[map_offset + i].pos = i; + } + } else { + /* + * Handle first page specially, since we need to look for + * POS_EMPTY rather than NPAGES_EMPTY. + */ + i = 0; + if (chunk->map[map_offset + i].pos != POS_EMPTY) { + memset((void *)((uintptr_t)chunk + ((map_offset + i) << + pagesize_2pow)), 0, pagesize); + } chunk->map[map_offset + i].npages = need_pages; chunk->map[map_offset + i].pos = i; + + /* Handle central pages. */ + for (i++; i < need_pages - 1; i++) { + if (chunk->map[map_offset + i].npages != NPAGES_EMPTY) { + memset((void *)((uintptr_t)chunk + ((map_offset + + i) << pagesize_2pow)), 0, pagesize); + } + chunk->map[map_offset + i].npages = need_pages; + chunk->map[map_offset + i].pos = i; + } + + /* + * Handle last page specially, since we need to look for + * POS_EMPTY rather than NPAGES_EMPTY. + */ + if (i < need_pages) { + if (chunk->map[map_offset + i].npages != POS_EMPTY) { + memset((void *)((uintptr_t)chunk + ((map_offset + + i) << pagesize_2pow)), 0, pagesize); + } + chunk->map[map_offset + i].npages = need_pages; + chunk->map[map_offset + i].pos = i; + } } /* Keep track of trailing unused pages for later use. */ @@ -1751,9 +2058,9 @@ /* Update map for trailing pages. */ map_offset += need_pages; chunk->map[map_offset].npages = rem_pages; - chunk->map[map_offset].pos = POS_FREE; + chunk->map[map_offset].pos = pos_beg; chunk->map[map_offset + rem_pages - 1].npages = rem_pages; - chunk->map[map_offset + rem_pages - 1].pos = POS_FREE; + chunk->map[map_offset + rem_pages - 1].pos = pos_end; } chunk->pages_used += need_pages; @@ -1770,6 +2077,8 @@ RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); } else { + unsigned i; + chunk = (arena_chunk_t *)chunk_alloc(chunksize); if (chunk == NULL) return (NULL); @@ -1794,12 +2103,20 @@ /* * Initialize enough of the map to support one maximal free run. */ - chunk->map[arena_chunk_header_npages].npages = chunk_npages - - arena_chunk_header_npages; - chunk->map[arena_chunk_header_npages].pos = POS_FREE; - chunk->map[chunk_npages - 1].npages = chunk_npages - - arena_chunk_header_npages; - chunk->map[chunk_npages - 1].pos = POS_FREE; + i = arena_chunk_header_npages; + chunk->map[i].npages = chunk_npages - arena_chunk_header_npages; + chunk->map[i].pos = POS_EMPTY; + + /* Mark the free run's central pages as untouched. */ + for (i++; i < chunk_npages - 1; i++) + chunk->map[i].npages = NPAGES_EMPTY; + + /* Take care when (chunk_npages == 2). */ + if (i < chunk_npages) { + chunk->map[i].npages = chunk_npages - + arena_chunk_header_npages; + chunk->map[i].pos = POS_EMPTY; + } } return (chunk); @@ -1833,7 +2150,7 @@ } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size) +arena_run_alloc(arena_t *arena, size_t size, bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -1867,14 +2184,14 @@ arena_chunk_header_npages); for (i = chunk->min_frun_ind; i < chunk_npages;) { mapelm = &chunk->map[i]; - if (mapelm->pos == POS_FREE) { + if (mapelm->pos >= POS_EMPTY) { if (mapelm->npages >= need_npages) { run = (arena_run_t *) ((uintptr_t)chunk + (i << pagesize_2pow)); /* Update page map. */ arena_run_split(arena, run, - size); + size, zero); return (run); } if (mapelm->npages > @@ -1908,7 +2225,7 @@ run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages << >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200711281909.lASJ977e026972>