Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 8 Oct 2014 19:14:45 +0400
From:      Dmitry Sivachenko <trtrmitya@gmail.com>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        "hackers@freebsd.org" <hackers@freebsd.org>
Subject:   Re: mmap() question
Message-ID:  <5C10922E-7030-4C89-9FD3-DA770E462067@gmail.com>
In-Reply-To: <20131012095919.GI41229@kib.kiev.ua>
References:  <95E0B821-BF9B-4EBF-A1E5-1DDCBB1C3D1B@gmail.com> <20131011051702.GE41229@kib.kiev.ua> <A5E3C0A2-F0D5-47B1-8992-4B9DA347C275@gmail.com> <20131012095919.GI41229@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
On 12 =D0=BE=D0=BA=D1=82. 2013 =D0=B3., at 13:59, Konstantin Belousov =
<kostikbel@gmail.com> wrote:
>=20
> I was not able to reproduce the situation locally. I even tried to =
start
> a lot of threads accessing the mapped regions, to try to outrun the
> pagedaemon. The user threads sleep on the disk read, while pagedaemon
> has a lot of time to rebalance the queues. It might be a case when SSD
> indeed makes a difference.
>=20
> Still, I see how this situation could appear. The code, which triggers
> OOM, never fires if there is a free space in the swapfile, so the
> absense of swap is neccessary condition to trigger the bug.  Next, OOM
> calculation does not account for a possibility that almost all pages =
on
> the queues can be reused. It just fires if free pages depleted too =
much
> or free target cannot be reached.
>=20
> IMO one of the possible solution is to account the queued pages in
> addition to the swap space.  This is not entirely accurate, since some
> pages on the queues cannot be reused, at least transiently.  Most =
precise
> algorithm would count the hold and busy pages globally, and substract
> this count from queues length, but it is probably too costly.
>=20
> Instead, I think we could rely on the numbers which are counted by
> pagedaemon threads during the passes.  Due to the transient nature of =
the
> pagedaemon failures, this should be fine.
>=20
> Below is the prototype patch, against HEAD.  It is not applicable to
> stable, please use HEAD kernel for test.



Hello,

any chance to commit this patch?

Thanks!



>=20
> diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
> index d2ad920..ee5159a 100644
> --- a/sys/sys/vmmeter.h
> +++ b/sys/sys/vmmeter.h
> @@ -93,9 +93,10 @@ struct vmmeter {
> 	u_int v_free_min;	/* (c) pages desired free */
> 	u_int v_free_count;	/* (f) pages free */
> 	u_int v_wire_count;	/* (a) pages wired down */
> -	u_int v_active_count;	/* (q) pages active */
> +	u_int v_active_count;	/* (a) pages active */
> 	u_int v_inactive_target; /* (c) pages desired inactive */
> -	u_int v_inactive_count;	/* (q) pages inactive */
> +	u_int v_inactive_count;	/* (a) pages inactive */
> +	u_int v_queue_sticky;	/* (a) pages on queues but cannot =
process */
> 	u_int v_cache_count;	/* (f) pages on cache queue */
> 	u_int v_cache_min;	/* (c) min pages desired on cache queue =
*/
> 	u_int v_cache_max;	/* (c) max pages in cached obj (unused) =
*/
> diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
> index 713a2be..4bb1f1f 100644
> --- a/sys/vm/vm_meter.c
> +++ b/sys/vm/vm_meter.c
> @@ -316,6 +316,7 @@ VM_STATS_VM(v_active_count, "Active pages");
> VM_STATS_VM(v_inactive_target, "Desired inactive pages");
> VM_STATS_VM(v_inactive_count, "Inactive pages");
> VM_STATS_VM(v_cache_count, "Pages on cache queue");
> +VM_STATS_VM(v_queue_sticky, "Pages which cannot be moved from =
queues");
> VM_STATS_VM(v_cache_min, "Min pages on cache queue");
> VM_STATS_VM(v_cache_max, "Max pages on cached queue");
> VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel");
> diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
> index 7846702..6943a0e 100644
> --- a/sys/vm/vm_page.h
> +++ b/sys/vm/vm_page.h
> @@ -226,6 +226,7 @@ struct vm_domain {
> 	long vmd_segs;	/* bitmask of the segments */
> 	boolean_t vmd_oom;
> 	int vmd_pass;	/* local pagedaemon pass */
> +	int vmd_queue_sticky;	/* pages on queues which cannot be =
processed */
> 	struct vm_page vmd_marker; /* marker for pagedaemon private use =
*/
> };
>=20
> diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
> index 5660b56..a62cf97 100644
> --- a/sys/vm/vm_pageout.c
> +++ b/sys/vm/vm_pageout.c
> @@ -896,7 +896,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> {
> 	vm_page_t m, next;
> 	struct vm_pagequeue *pq;
> -	int page_shortage, maxscan, pcount;
> +	int failed_scan, page_shortage, maxscan, pcount;
> 	int addl_page_shortage;
> 	vm_object_t object;
> 	int act_delta;
> @@ -960,6 +960,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 	 */
> 	pq =3D &vmd->vmd_pagequeues[PQ_INACTIVE];
> 	maxscan =3D pq->pq_cnt;
> +	failed_scan =3D 0;
> 	vm_pagequeue_lock(pq);
> 	queues_locked =3D TRUE;
> 	for (m =3D TAILQ_FIRST(&pq->pq_pl);
> @@ -1012,6 +1013,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 			vm_page_unlock(m);
> 			VM_OBJECT_WUNLOCK(object);
> 			addl_page_shortage++;
> +			failed_scan++;
> 			continue;
> 		}
>=20
> @@ -1075,6 +1077,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 			 * loop over the active queue below.
> 			 */
> 			addl_page_shortage++;
> +			failed_scan++;
> 			goto relock_queues;
> 		}
>=20
> @@ -1229,6 +1232,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 				 */
> 				if (vm_page_busied(m)) {
> 					vm_page_unlock(m);
> +					failed_scan++;
> 					goto unlock_and_continue;
> 				}
>=20
> @@ -1241,6 +1245,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 					vm_page_requeue_locked(m);
> 					if (object->flags & =
OBJ_MIGHTBEDIRTY)
> 						vnodes_skipped++;
> +					failed_scan++;
> 					goto unlock_and_continue;
> 				}
> 				vm_pagequeue_unlock(pq);
> @@ -1386,6 +1391,11 @@ relock_queues:
> 		m =3D next;
> 	}
> 	vm_pagequeue_unlock(pq);
> +
> +	atomic_add_int(&cnt.v_queue_sticky, failed_scan -
> +	    vmd->vmd_queue_sticky);
> +	vmd->vmd_queue_sticky =3D failed_scan;
> +
> #if !defined(NO_SWAPPING)
> 	/*
> 	 * Idle process swapout -- run once per second.
> @@ -1433,10 +1443,15 @@ static int vm_pageout_oom_vote;
> static void
> vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
> {
> +	u_int queues_count;
> 	int old_vote;
>=20
> -	if (pass <=3D 1 || !((swap_pager_avail < 64 && =
vm_page_count_min()) ||
> -	    (swap_pager_full && vm_paging_target() > 0))) {
> +	queues_count =3D cnt.v_active_count + cnt.v_inactive_count -
> +	    cnt.v_queue_sticky;
> +	if (pass <=3D 1 || !((swap_pager_avail < 64 && =
vm_page_count_min() &&
> +	    queues_count <=3D cnt.v_free_min) ||
> +	    (swap_pager_full && vm_paging_target() > 0 &&
> +	    queues_count <=3D vm_paging_target()))) {
> 		if (vmd->vmd_oom) {
> 			vmd->vmd_oom =3D FALSE;
> 			atomic_subtract_int(&vm_pageout_oom_vote, 1);




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?5C10922E-7030-4C89-9FD3-DA770E462067>