Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Oct 2001 13:26:22 -0700 (PDT)
From:      Matt Dillon <dillon@earth.backplane.com>
To:        Mike Silbersack <silby@silby.com>
Cc:        <cvs-committers@FreeBSD.ORG>, <cvs-all@FreeBSD.ORG>
Subject:   Re: cvs commit: src/sys/vm vnode_pager.c
Message-ID:  <200110122026.f9CKQMS35969@earth.backplane.com>
References:   <20011012145142.L29945-100000@achilles.silby.com>

next in thread | previous in thread | raw e-mail | index | archive | help

:
:
:On Fri, 12 Oct 2001, Matt Dillon wrote:
:
:> dillon      2001/10/12 11:17:34 PDT
:>
:>   Modified files:
:>     sys/vm               vnode_pager.c
:>   Log:
:>   Finally fix the VM bug where a file whos EOF occurs in the middle of a page
:>   would sometimes prevent a dirty page from being cleaned, even when synced,
:>   resulting in the dirty page being re-flushed to disk every 30-60 seconds or
:>   so, forever.  The problem is that when the filesystem flushes a page to
:
:How commonly did this occur?
:
:Mike "Silby" Silbersack

    It depends heavily on the situation.  A file typically must be written
    through an mmap(), or a recently-write()en file must be mmap()'d and
    then accessed via the mmap() before the dirty buffer is flushed.  Only
    the last page of a file can get into this state and typically only
    if it is stored as a single fragment by the filesystem - i.e. the
    physical I/O executed by the filesystem is less then a page. 

    The biggest effect occurs when people are manipulating a large number of
    small files.  Once a vm_page gets into this state it's stuck in it
    until the associated file is either removed or further extended.
    Nothing else will clean up the state of the vm_page, which means that
    vm_pages in this state can accumulate over time until you have hundreds
    or even thousands of them (if your activity is operating on hundreds or
    thousands of files).  When you get to that point the syncer generates
    a huge amount of repeated disk I/O every 30-60 seconds and, of course,
    the system is unable to reclaim the effected pages for other uses.

    In recent years the use of mmap() has increased hugely.  For example,
    'cp', 'install', and 'tail' now use it, as does 'samba' and 'apache'.
    I only know of one or two 'severe' cases.  99% of installations will
    not be effected because they simply do not access a large enough number
    of files or do not access files in a way that hits the bug for it to
    become a problem.  For example, my home system has been up 69 days
    and not a single page is in this state.  One of our production severs
    with an uptime of 4 days has one page in this state.  Another production
    server up 185 days has 0 pages in this state.

    You can test your own boxes by running the vm pagelist dump program
    included below and greping for 'dirty' values in weird states (not 00
    and not fc) that don't get cleaned up when you do a 'sync'.

						-Matt


#!/bin/tcsh -f
#
# MODIFY AS APPROPRIATE.  Must point to a compile/XXX directory to get
# the various config option header files.

cc pagelist.c -o /usr/local/bin/pagelist -I/usr/src/sys -I/usr/src/sys/compile/APOLLO -lkvm

/*
 * PAGELIST.C
 *
 *   ./pagelist | egrep -v 'drty 00|drty ff'
 */

#include <sys/param.h>
/*#include <sys/systm.h>*/
/*#include <sys/kernel.h>*/
#include <sys/proc.h>
/*#include <sys/resourcevar.h>*/
#include <sys/malloc.h>
/*#include <sys/kernel.h>*/
#include <sys/signalvar.h>
#include <sys/vnode.h>

#include <vm/vm.h>
#include <vm/vm_page.h>
/*#include <vm/vm_pageout.h>*/
#include <vm/vm_kern.h>
#include <vm/swap_pager.h>
#include <vm/vnode_pager.h>


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <kvm.h>
#include <nlist.h>

struct nlist Nl[] = {
    { "_vm_page_queues" },
    { "_vm_page_array" },
    { "_vm_page_array_size" },
    { NULL }
};

void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes);
void showpage(long kva, vm_page_t page);

main(int ac, char **av)
{
    struct pglist vm_page_queues[PQ_COUNT];
    vm_page_t vptr;
    kvm_t *kd;
    short ShowAll = 0;
    short ShowInactive = 0;
    short ShowActive = 0;
    short ShowCache = 0;
    short ShowFree = 0;
    long ShowAddr = 0;

    {
	int i;

	for (i = 1; i < ac; ++i) {
	    if (strncmp(av[i], "ina", 3) == 0) {
		ShowInactive = 1;
	    }
	    if (strncmp(av[i], "act", 3) == 0) {
		ShowActive = 1;
	    }
	    if (strncmp(av[i], "cac", 3) == 0) {
		ShowCache = 1;
	    }
	    if (strncmp(av[i], "fre", 3) == 0) {
		ShowFree = 1;
	    }
	    if (strncmp(av[i], "0x", 2) == 0) {
		ShowAddr = (long)strtoul(av[i], NULL, 0);
	    }
	}
	if (ac == 1) {
		ShowAll = 1;
	}
    }

    if ((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, "kvm:")) == NULL) {
	perror("kvm_open");
	exit(1);
    }
    if (kvm_nlist(kd, Nl) != 0) {
	perror("kvm_nlist");
	exit(1);
    }

    kkread(kd, Nl[0].n_value, &vm_page_queues[0], sizeof(vm_page_queues));

    if (ShowAddr) {
	struct vm_page vm_page;

	kkread(kd, (u_long)ShowAddr, &vm_page, sizeof(vm_page));
	showpage((long)ShowAddr, &vm_page);
    }

    if (ShowInactive) {
	vptr = vm_page_queues[PQ_INACTIVE].tqh_first;
	while (vptr != NULL) {
	    struct vm_page vm_page;

	    kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
	    /*
	    if ((vm_page.flags & PG_INACTIVE) == 0) {
		printf("<lost chain>\n");
		break;
	    }
	    */
	    showpage((long)vptr, &vm_page);
	    vptr = vm_page.pageq.tqe_next;
	}
	puts("");
    }

    if (ShowActive) {
	vptr = vm_page_queues[PQ_ACTIVE].tqh_first;
	while (vptr != NULL) {
	    struct vm_page vm_page;

	    kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
	    /*
	    if ((vm_page.flags & PG_ACTIVE) == 0) {
		printf("<lost chain>\n");
		break;
	    }
	    */
	    showpage((long)vptr, &vm_page);
	    vptr = vm_page.pageq.tqe_next;
	}
	puts("");
    }

    if (ShowCache) {
	vptr = vm_page_queues[PQ_CACHE].tqh_first;
	while (vptr != NULL) {
	    struct vm_page vm_page;

	    kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
	    /*
	    if ((vm_page.flags & PG_CACHE) == 0) {
		printf("<lost chain>\n");
		break;
	    }
	    */
	    showpage((long)vptr, &vm_page);
	    vptr = vm_page.pageq.tqe_next;
	}
	puts("");
    }

    if (ShowFree) {
	vptr = vm_page_queues[PQ_FREE].tqh_first;
	while (vptr != NULL) {
	    struct vm_page vm_page;

	    kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
	    /*
	    if ((vm_page.flags & PG_FREE) == 0) {
		printf("<lost chain>\n");
		break;
	    }
	    */
	    showpage((long)vptr, &vm_page);
	    vptr = vm_page.pageq.tqe_next;
	}
	puts("");
    }

    if (ShowAll) {
	int count;

	kkread(kd, Nl[1].n_value, &vptr, sizeof(vptr));
	kkread(kd, Nl[2].n_value, &count, sizeof(count));

	while (count) {
		struct vm_page vm_page;

		kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
		showpage((long)vptr, &vm_page);

		--count;
		++vptr;
	}
    }

    kvm_close(kd);
    return(0);
}

void
showpage(long kva, vm_page_t page)
{
    printf("%08lx phys %08lx pc %02x obj %08lx pi %-5d bsy %d hld %d wir %2d drty %02x val %02x act %-3d %c%c%c flags:%s%s%s%s%s%s%s%s%s\n",
	(long)kva,
	page->phys_addr,
	(int)page->pc,
	(long)page->object,
	page->pindex,
	page->busy,
	page->hold_count,
	page->wire_count,
	(int)page->dirty,
	(int)page->valid,
	page->act_count,
	((page->busy) ? 'b' : '-'),
	((page->valid) ? 'v' : '-'),
	((page->dirty) ? 'd' : '-'),
	((page->flags & PG_BUSY) ? " BSY" : ""),
	((page->flags & PG_WANTED) ? " WNT" : ""),
	((page->flags & PG_FICTITIOUS) ? " FIC" : ""),
	((page->flags & PG_WRITEABLE) ? " WRT" : ""),
	((page->flags & PG_MAPPED) ? " MAP" : ""),
	((page->flags & PG_ZERO) ? " ZRO" : ""),
	((page->flags & PG_REFERENCED) ? " REF" : ""),
	((page->flags & PG_CLEANCHK) ? " CCHK" : ""),
	((page->flags & PG_SWAPINPROG) ? " SWP" : "")
    );
}

void
kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes)
{
    if (kvm_read(kd, addr, buf, nbytes) != nbytes) {
	perror("kvm_read");
	exit(1);
    }
}


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe cvs-all" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200110122026.f9CKQMS35969>