Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 23 Nov 2012 20:11:54 +0000 (GMT)
From:      Robert Watson <rwatson@FreeBSD.org>
To:        Gleb Smirnoff <glebius@FreeBSD.org>
Cc:        svn-src-projects@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r243433 - projects/counters/sys/vm
Message-ID:  <alpine.BSF.2.00.1211232011010.38595@fledge.watson.org>
In-Reply-To: <201211230735.qAN7ZoDK064418@svn.freebsd.org>
References:  <201211230735.qAN7ZoDK064418@svn.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

Do you have any thoughts about potential future interactions with NUMA here? 
E.g., some accessor code to round up the size of struct pcpu to the nearest 
page, and to allocate each CPU's per-cpu entry from a page local to the CPU 
where it will be used?

Robert N M Watson
Computer Laboratory
University of Cambridge

On Fri, 23 Nov 2012, Gleb Smirnoff wrote:

> Author: glebius
> Date: Fri Nov 23 07:35:50 2012
> New Revision: 243433
> URL: http://svnweb.freebsd.org/changeset/base/243433
>
> Log:
>    Introduce UMA_ZONE_PCPU zones.
>
>    These zones have slab size == sizeof(struct pcpu), but request from
>  VM enough pages to fit (uk_slabsize * mp_ncpus). An item allocated
>  from such zone would have a separate twin for each CPU in the system, and
>  these twins are at a distance of sizeof(struct pcpu) from each other. This
>  magic value of distance would allow us to make some optimizations later.
>
>    To address private item from a CPU simple arithmetics should be used:
>
>    item = (type *)((char *)base + sizeof(struct pcpu) * curcpu)
>
>    To introduce non-page size slabs a new field had been added to uma_keg
>  uk_slabsize. This shifted some frequently used fields of uma_keg to the
>  fourth cache line on amd64. To mitigate this pessimization, uma_keg fields
>  were a bit rearranged and least frequently used uk_name and uk_link moved
>  down to the fourth cache line. All other fields, that are dereferenced
>  frequently fit into first three cache lines.
>
>    Right now kegs of such kind may have a large waste value, since
>  sizeof(struct pcpu) isn't a denominator of PAGE_SIZE. We plan to pad
>  struct pcpu appropriately to reduce this waste.
>
> Modified:
>  projects/counters/sys/vm/uma.h
>  projects/counters/sys/vm/uma_core.c
>  projects/counters/sys/vm/uma_int.h
>
> Modified: projects/counters/sys/vm/uma.h
> ==============================================================================
> --- projects/counters/sys/vm/uma.h	Fri Nov 23 07:21:35 2012	(r243432)
> +++ projects/counters/sys/vm/uma.h	Fri Nov 23 07:35:50 2012	(r243433)
> @@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma
> 					 * Zone's pages will not be included in
> 					 * mini-dumps.
> 					 */
> +#define	UMA_ZONE_PCPU		0x8000	/*
> +					 * Allocates mp_ncpus slabs sized to
> +					 * sizeof(struct pcpu).
> +					 */
>
> /*
>  * These flags are shared between the keg and zone.  In zones wishing to add
> @@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma
>  */
> #define	UMA_ZONE_INHERIT						\
>     (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |		\
> -    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
> +    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
>
> /* Definitions for align */
> #define UMA_ALIGN_PTR	(sizeof(void *) - 1)	/* Alignment fit for ptr */
>
> Modified: projects/counters/sys/vm/uma_core.c
> ==============================================================================
> --- projects/counters/sys/vm/uma_core.c	Fri Nov 23 07:21:35 2012	(r243432)
> +++ projects/counters/sys/vm/uma_core.c	Fri Nov 23 07:35:50 2012	(r243433)
> @@ -1125,7 +1125,21 @@ keg_small_init(uma_keg_t keg)
> 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
>
> 	keg->uk_rsize = rsize;
> -	keg->uk_ppera = 1;
> +
> +	if (keg->uk_flags & UMA_ZONE_PCPU) {
> +
> +		KASSERT(keg->uk_rsize < sizeof(struct pcpu),
> +		    ("%s: size %u too large", __func__, keg->uk_rsize));
> +
> +		keg->uk_slabsize = sizeof(struct pcpu);
> +		keg->uk_ppera = mp_ncpus/(PAGE_SIZE/sizeof(struct pcpu));
> +		/* Account for remainder. */
> +		if (mp_ncpus * sizeof(struct pcpu) > PAGE_SIZE * keg->uk_ppera)
> +			keg->uk_ppera++;
> +	} else {
> +		keg->uk_slabsize = UMA_SLAB_SIZE;
> +		keg->uk_ppera = 1;
> +	}
>
> 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
> 		shsize = 0;
> @@ -1137,10 +1151,10 @@ keg_small_init(uma_keg_t keg)
> 		shsize = sizeof(struct uma_slab);
> 	}
>
> -	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
> +	keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
> 	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
> 	memused = keg->uk_ipers * rsize + shsize;
> -	wastedspace = UMA_SLAB_SIZE - memused;
> +	wastedspace = keg->uk_slabsize - memused;
>
> 	/*
> 	 * We can't do OFFPAGE if we're internal or if we've been
> @@ -1154,8 +1168,8 @@ keg_small_init(uma_keg_t keg)
> 		return;
>
> 	if ((wastedspace >= UMA_MAX_WASTE) &&
> -	    (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
> -		keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
> +	    (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
> +		keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
> 		KASSERT(keg->uk_ipers <= 255,
> 		    ("keg_small_init: keg->uk_ipers too high!"));
> #ifdef UMA_DEBUG
> @@ -1165,7 +1179,7 @@ keg_small_init(uma_keg_t keg)
> 		    "calculated ipers = %d, "
> 		    "new wasted space = %d\n", keg->uk_name, wastedspace,
> 		    UMA_MAX_WASTE, keg->uk_ipers,
> -		    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
> +		    keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
> #endif
> 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
> 	}
> @@ -1194,6 +1208,8 @@ keg_large_init(uma_keg_t keg)
> 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
> 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
> 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
> +	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
> +	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
>
> 	pages = keg->uk_size / PAGE_SIZE;
>
> @@ -1202,6 +1218,7 @@ keg_large_init(uma_keg_t keg)
> 		pages++;
>
> 	keg->uk_ppera = pages;
> +	keg->uk_slabsize = pages * PAGE_SIZE;
> 	keg->uk_ipers = 1;
> 	keg->uk_rsize = keg->uk_size;
>
> @@ -1222,6 +1239,9 @@ keg_cachespread_init(uma_keg_t keg)
> 	int pages;
> 	int rsize;
>
> +	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
> +	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
> +
> 	alignsize = keg->uk_align + 1;
> 	rsize = keg->uk_size;
> 	/*
> @@ -1239,6 +1259,7 @@ keg_cachespread_init(uma_keg_t keg)
> 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
> 	keg->uk_rsize = rsize;
> 	keg->uk_ppera = pages;
> +	keg->uk_slabsize = UMA_SLAB_SIZE;
> 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
> 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
> 	KASSERT(keg->uk_ipers <= uma_max_ipers,
> @@ -1288,6 +1309,13 @@ keg_ctor(void *mem, int size, void *udat
> 	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
> 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
>
> +	if (arg->flags & UMA_ZONE_PCPU)
> +#ifdef SMP
> +		keg->uk_flags |= UMA_ZONE_OFFPAGE;
> +#else
> +		keg->uk_flags &= ~UMA_ZONE_PCPU;
> +#endif
> +
> 	/*
> 	 * The +UMA_FRITM_SZ added to uk_size is to account for the
> 	 * linkage that is added to the size in keg_small_init().  If
>
> Modified: projects/counters/sys/vm/uma_int.h
> ==============================================================================
> --- projects/counters/sys/vm/uma_int.h	Fri Nov 23 07:21:35 2012	(r243432)
> +++ projects/counters/sys/vm/uma_int.h	Fri Nov 23 07:35:50 2012	(r243433)
> @@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t;
>  *
>  */
> struct uma_keg {
> -	LIST_ENTRY(uma_keg)	uk_link;	/* List of all kegs */
> -
> 	struct mtx	uk_lock;	/* Lock for the keg */
> 	struct uma_hash	uk_hash;
>
> -	const char	*uk_name;		/* Name of creating zone. */
> 	LIST_HEAD(,uma_zone)	uk_zones;	/* Keg's zones */
> 	LIST_HEAD(,uma_slab)	uk_part_slab;	/* partially allocated slabs */
> 	LIST_HEAD(,uma_slab)	uk_free_slab;	/* empty slab list */
> @@ -225,10 +222,15 @@ struct uma_keg {
> 	vm_offset_t	uk_kva;		/* Base kva for zones with objs */
> 	uma_zone_t	uk_slabzone;	/* Slab zone backing us, if OFFPAGE */
>
> +	u_int16_t	uk_slabsize;	/* Slab size for this keg */
> 	u_int16_t	uk_pgoff;	/* Offset to uma_slab struct */
> 	u_int16_t	uk_ppera;	/* pages per allocation from backend */
> 	u_int16_t	uk_ipers;	/* Items per slab */
> 	u_int32_t	uk_flags;	/* Internal flags */
> +
> +	/* Least used fields go to the last cache line. */
> +	const char	*uk_name;		/* Name of creating zone. */
> +	LIST_ENTRY(uma_keg)	uk_link;	/* List of all kegs */
> };
> typedef struct uma_keg	* uma_keg_t;
>
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?alpine.BSF.2.00.1211232011010.38595>