From owner-svn-src-head@FreeBSD.ORG Mon Apr 8 19:10:46 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.FreeBSD.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id BBFFC993; Mon, 8 Apr 2013 19:10:46 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id ABF4D245; Mon, 8 Apr 2013 19:10:46 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r38JAkP3087359; Mon, 8 Apr 2013 19:10:46 GMT (envelope-from glebius@svn.freebsd.org) Received: (from glebius@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r38JAj9t087353; Mon, 8 Apr 2013 19:10:45 GMT (envelope-from glebius@svn.freebsd.org) Message-Id: <201304081910.r38JAj9t087353@svn.freebsd.org> From: Gleb Smirnoff Date: Mon, 8 Apr 2013 19:10:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r249264 - in head: share/man/man9 sys/vm X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Apr 2013 19:10:46 -0000 Author: glebius Date: Mon Apr 8 19:10:45 2013 New Revision: 249264 URL: http://svnweb.freebsd.org/changeset/base/249264 Log: Merge from projects/counters: UMA_ZONE_PCPU zones. These zones have slab size == sizeof(struct pcpu), but request from VM enough pages to fit (uk_slabsize * mp_ncpus). An item allocated from such zone would have a separate twin for each CPU in the system, and these twins are at a distance of sizeof(struct pcpu) from each other. This magic value of distance would allow us to make some optimizations later. To address private item from a CPU simple arithmetics should be used: item = (type *)((char *)base + sizeof(struct pcpu) * curcpu) These arithmetics are available as zpcpu_get() macro in pcpu.h. To introduce non-page size slabs a new field had been added to uma_keg uk_slabsize. This shifted some frequently used fields of uma_keg to the fourth cache line on amd64. To mitigate this pessimization, uma_keg fields were a bit rearranged and least frequently used uk_name and uk_link moved down to the fourth cache line. All other fields, that are dereferenced frequently fit into first three cache lines. Sponsored by: Nginx, Inc. Modified: head/share/man/man9/zone.9 head/sys/vm/uma.h head/sys/vm/uma_core.c head/sys/vm/uma_int.h Modified: head/share/man/man9/zone.9 ============================================================================== --- head/share/man/man9/zone.9 Mon Apr 8 19:03:01 2013 (r249263) +++ head/share/man/man9/zone.9 Mon Apr 8 19:10:45 2013 (r249264) @@ -153,6 +153,23 @@ See .Fn uma_find_refcnt . .It Dv UMA_ZONE_NODUMP Pages belonging to the zone will not be included into mini-dumps. +.It Dv UMA_ZONE_PCPU +An allocation from zone would have +.Va mp_ncpu +shadow copies, that are privately assigned to CPUs. +A CPU can address its private copy using base allocation address plus +multiple of current CPU id and +.Fn sizeof "struct pcpu" : +.Bd -literal -offset indent +foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); + ... +foo_base = uma_zalloc(foo_zone, ...); + ... +critical_enter(); +foo_pcpu = (foo_t *)zpcpu_get(foo_base); +/* do something with foo_pcpu */ +critical_exit(); +.Ed .It Dv UMA_ZONE_OFFPAGE By default book-keeping of items within a slab is done in the slab page itself. This flag explicitly tells subsystem that book-keeping structure should be Modified: head/sys/vm/uma.h ============================================================================== --- head/sys/vm/uma.h Mon Apr 8 19:03:01 2013 (r249263) +++ head/sys/vm/uma.h Mon Apr 8 19:10:45 2013 (r249264) @@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma * Zone's pages will not be included in * mini-dumps. */ +#define UMA_ZONE_PCPU 0x8000 /* + * Allocates mp_ncpus slabs sized to + * sizeof(struct pcpu). + */ /* * These flags are shared between the keg and zone. In zones wishing to add @@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma */ #define UMA_ZONE_INHERIT \ (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \ - UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB) + UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU) /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ Modified: head/sys/vm/uma_core.c ============================================================================== --- head/sys/vm/uma_core.c Mon Apr 8 19:03:01 2013 (r249263) +++ head/sys/vm/uma_core.c Mon Apr 8 19:10:45 2013 (r249264) @@ -765,9 +765,9 @@ finished: SKIP_NONE, ZFREE_STATFREE); #ifdef UMA_DEBUG printf("%s: Returning %d bytes.\n", - keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera); + keg->uk_name, PAGE_SIZE * keg->uk_ppera); #endif - keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); + keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); } } @@ -865,7 +865,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); + mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, @@ -927,7 +927,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); - keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, + keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); KEG_LOCK(keg); return (NULL); @@ -1138,16 +1138,27 @@ keg_small_init(uma_keg_t keg) u_int wastedspace; u_int shsize; - KASSERT(keg != NULL, ("Keg is null in keg_small_init")); + if (keg->uk_flags & UMA_ZONE_PCPU) { + keg->uk_slabsize = sizeof(struct pcpu); + keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu), + PAGE_SIZE); + } else { + keg->uk_slabsize = UMA_SLAB_SIZE; + keg->uk_ppera = 1; + } + rsize = keg->uk_size; - if (rsize < UMA_SMALLEST_UNIT) - rsize = UMA_SMALLEST_UNIT; if (rsize & keg->uk_align) rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); + if (rsize < keg->uk_slabsize / 256) + rsize = keg->uk_slabsize / 256; keg->uk_rsize = rsize; - keg->uk_ppera = 1; + + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || + keg->uk_rsize < sizeof(struct pcpu), + ("%s: size %u too large", __func__, keg->uk_rsize)); if (keg->uk_flags & UMA_ZONE_OFFPAGE) { shsize = 0; @@ -1159,10 +1170,12 @@ keg_small_init(uma_keg_t keg) shsize = sizeof(struct uma_slab); } - keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; - KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0")); + keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize; + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255, + ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); + memused = keg->uk_ipers * rsize + shsize; - wastedspace = UMA_SLAB_SIZE - memused; + wastedspace = keg->uk_slabsize - memused; /* * We can't do OFFPAGE if we're internal or if we've been @@ -1175,24 +1188,26 @@ keg_small_init(uma_keg_t keg) (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) return; - if ((wastedspace >= UMA_MAX_WASTE) && - (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { - keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; - KASSERT(keg->uk_ipers <= 255, - ("keg_small_init: keg->uk_ipers too high!")); + if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) && + (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) { + keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize; + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255, + ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); #ifdef UMA_DEBUG printf("UMA decided we need offpage slab headers for " "keg: %s, calculated wastedspace = %d, " "maximum wasted space allowed = %d, " "calculated ipers = %d, " "new wasted space = %d\n", keg->uk_name, wastedspace, - UMA_MAX_WASTE, keg->uk_ipers, - UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); + keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers, + keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize); #endif keg->uk_flags |= UMA_ZONE_OFFPAGE; - if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) - keg->uk_flags |= UMA_ZONE_HASH; } + + if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && + (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) + keg->uk_flags |= UMA_ZONE_HASH; } /* @@ -1209,19 +1224,15 @@ keg_small_init(uma_keg_t keg) static void keg_large_init(uma_keg_t keg) { - int pages; KASSERT(keg != NULL, ("Keg is null in keg_large_init")); KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg")); + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0, + ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__)); - pages = keg->uk_size / UMA_SLAB_SIZE; - - /* Account for remainder */ - if ((pages * UMA_SLAB_SIZE) < keg->uk_size) - pages++; - - keg->uk_ppera = pages; + keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE); + keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE; keg->uk_ipers = 1; keg->uk_rsize = keg->uk_size; @@ -1242,6 +1253,9 @@ keg_cachespread_init(uma_keg_t keg) int pages; int rsize; + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0, + ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__)); + alignsize = keg->uk_align + 1; rsize = keg->uk_size; /* @@ -1259,6 +1273,7 @@ keg_cachespread_init(uma_keg_t keg) pages = MIN(pages, (128 * 1024) / PAGE_SIZE); keg->uk_rsize = rsize; keg->uk_ppera = pages; + keg->uk_slabsize = UMA_SLAB_SIZE; keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize; keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; KASSERT(keg->uk_ipers <= uma_max_ipers, @@ -1308,6 +1323,13 @@ keg_ctor(void *mem, int size, void *udat if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC) keg->uk_flags |= UMA_ZONE_VTOSLAB; + if (arg->flags & UMA_ZONE_PCPU) +#ifdef SMP + keg->uk_flags |= UMA_ZONE_OFFPAGE; +#else + keg->uk_flags &= ~UMA_ZONE_PCPU; +#endif + /* * The +UMA_FRITM_SZ added to uk_size is to account for the * linkage that is added to the size in keg_small_init(). If @@ -1385,7 +1407,7 @@ keg_ctor(void *mem, int size, void *udat if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); - keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize; + keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize; if (keg->uk_flags & UMA_ZONE_REFCNT) totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) @@ -1401,7 +1423,7 @@ keg_ctor(void *mem, int size, void *udat * mathematically possible for all cases, so we make * sure here anyway. */ - if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) { + if (totsize > PAGE_SIZE * keg->uk_ppera) { printf("zone %s ipers %d rsize %d size %d\n", zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size); @@ -1676,7 +1698,8 @@ uma_startup(void *bootmem, int boot_page * that we need to go to offpage slab headers. Or, if we do, * then we trap that condition below and panic in the INVARIANTS case. */ - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; + wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - + (UMA_SLAB_SIZE / UMA_MAX_WASTE); totsize = wsize; objsize = UMA_SMALLEST_UNIT; while (totsize >= wsize) { @@ -1689,7 +1712,8 @@ uma_startup(void *bootmem, int boot_page objsize--; uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; + wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - + (UMA_SLAB_SIZE / UMA_MAX_WASTE); totsize = wsize; objsize = UMA_SMALLEST_UNIT; while (totsize >= wsize) { Modified: head/sys/vm/uma_int.h ============================================================================== --- head/sys/vm/uma_int.h Mon Apr 8 19:03:01 2013 (r249263) +++ head/sys/vm/uma_int.h Mon Apr 8 19:10:45 2013 (r249264) @@ -120,8 +120,8 @@ #define UMA_BOOT_PAGES 64 /* Pages allocated for startup */ -/* Max waste before going to off page slab management */ -#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) +/* Max waste percentage before going to off page slab management */ +#define UMA_MAX_WASTE 10 /* * I doubt there will be many cases where this is exceeded. This is the initial @@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t; * */ struct uma_keg { - LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ - struct mtx uk_lock; /* Lock for the keg */ struct uma_hash uk_hash; - const char *uk_name; /* Name of creating zone. */ LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */ LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */ @@ -225,10 +222,15 @@ struct uma_keg { vm_offset_t uk_kva; /* Zone base KVA */ uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ + u_int16_t uk_slabsize; /* Slab size for this keg */ u_int16_t uk_pgoff; /* Offset to uma_slab struct */ u_int16_t uk_ppera; /* pages per allocation from backend */ u_int16_t uk_ipers; /* Items per slab */ u_int32_t uk_flags; /* Internal flags */ + + /* Least used fields go to the last cache line. */ + const char *uk_name; /* Name of creating zone. */ + LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ }; typedef struct uma_keg * uma_keg_t;