Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 5 Jan 1998 18:22:37 -0800 (PST)
From:      Matt Dillon <dillon@best.net>
To:        FreeBSD-gnats-submit@FreeBSD.ORG
Subject:   kern/5442: FreeBSD loses track of swap objects (I think) under heavy load overtime
Message-ID:  <199801060222.SAA03741@flea.best.net>
Resent-Message-ID: <199801082231.OAA09474@hub.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         5442
>Category:       kern
>Synopsis:       FreeBSD 2.2.5 (relatively recent cvs) loses track of swap space
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    freebsd-bugs
>State:          open
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Jan  8 14:31:24 PST 1998
>Last-Modified:
>Originator:     Matt Dillon
>Organization:
Best Internet Communications
>Release:        FreeBSD 2.2.5-STABLE i386
>Environment:
	
	FreeBSD 2.2.5, Dec 16 kernel (from cvs tree), 256MB ram, 917MB swap,
	general shell machine 160 online users at peak, heavily loaded,
	nominal paging occurs.

>Description:

	Usually around 200 to 300 MB is swapped out.  We have noticed with
	recent kernels that FreeBSD appears to be losing track of swap space.

	shell3:/var/crash# pstat -s
	Device      1K-blocks     Used    Avail Capacity  Type
	/dev/sd0b      524288   327448   196776    62%    Interleaved
	/dev/sd1b      131072   130484      524   100%    Interleaved
	/dev/sd2b      131072   130436      572   100%    Interleaved
	/dev/sd3b      131072   130352      656    99%    Interleaved
	Total          917248   718720   198528    78%
				(718MB!!!  This should be 200-300MB!!!)
	shell3:/var/crash# uptime
	 6:21PM  up 19 days, 18:36, 124 users, load averages: 2.53, 2.98, 3.12

	moderate but reasonable paging:

	    shell3:/var/crash# vmstat 1
	     procs   memory     page                    disks         faults      cpu
	     r b w   avm   fre  flt  re  pi  po  fr  sr s0 s1 s2 s3   in   sy  cs us sy id
	     5 8 0 1109.1  27.9 2237   5   1   2 426 605 22  5 24 21  487 1492 453 28 22 50
	     8 8 0 1102.5  28.0  982   0   9   0 804   0 73  0 30 14  675 5683 836 26 29 45
	    10 8 0 1087.4  28.7 1326   0   1   0 1262   0 74  4 54 26  750 7062 875 31 44 25
	     2 7 0 1085.9  29.0 1587   0   7   0 1641   0 69  3 22 12  664 4515 734 16 37 47
	     713 0 1078.2  27.7 1931  10  26   0 1798   0 91 23 42 15  676 4885 762 25 46 30
	     610 0 1060.7  27.5  873   2  30   0 737   0 83 17 37 13  788 3709 779 18 38 44
	     312 0 1066.9  26.5  780   1  12   0 345   0 83  1 43 12  700 5240 731 27 30 43
	     810 0 1065.9  34.3  552  97   2  35 447 110421 65  4 17  3  593 2801 476 19 67 13
	     610 0 1068.4  32.9 1055 115   0   0 630   0 104  4 36 16  699 4572 768 34 34 33
	     7 7 0 1067.6  32.5 1451 104   0   0 1189   0 98  1 60 10  764 5513 874 39 37 25
	     5 8 0 1068.3  32.6  529   2   0   0 433   0 62  1 53 13  659 2980 580 14 26 60
	     4 8 0 1065.0  33.2  299   1   2   0 396   0 31  0 58  8  704 2351 580 10 24 67
	     2 7 0 1069.1  32.0 1138  41   7   0 709   0 60  9 33 10  687 4454 854 35 31 34
	     2 8 0 1084.2  31.3  562   6   2   0 249   0 81  1 23 15  664 3812 759 20 28 52
	     8 7 0 1090.7  31.3  721   1   0   0 592   0 82  0 42 10  703 3984 680 15 33 52
	     810 0 1095.7  30.2 1118   1   1   0 600   0 37  0 31 21  731 6724 1230 56 38  6
	     3 7 0 1101.3  31.0  450  12   2   0 591   0 19  5 35 20  754 4152 629 21 37 42
	     511 0 1118.2  29.5  831  36  17   0 683   0 47 10 57 19  854 5448 863 27 37 36
	    10 9 0 1144.3  28.8  619   1  25   0 471   0 37  8 50 11  724 2988 749 49 28 24
	     523 0 1146.2  28.4  452  17   1  49 220   0 74  0 27 14  658 2272 560 37 24 39
	     515 0 1134.6  28.5  474   0   1   0 424   0 141 18 99 57  849 2873 550 14 25 61

	I spent an hour and wrote a program to track the active and cached
	vm_object queues.  The summary results are shown below:

vm_object_list
    summary def 4406
    summary swp 13033
    summary vno 2906
    swap-numblocks 46715 (721148K used)
    swap-allocsize 1442296 (721148K)

vm_object_cached_list
    summary def 1045
    summary swp 202
    summary vno 2257
    swap-numblocks 3889 (17808K used)
    swap-allocsize 35616 (17808K)

	I am not positive, but I believe that individual vm objects are
	slowly getting lost somehow with their reference count left at 1, 
	preventing the object from getting freed.  For example, the active
	object:

    obj f36e5a80 bking 00000000 type swp, size 14 , refcnt 1 nblocks 2(56K used), allocsize 112(56K)

	Does not appear to be referenced as backing store by any other
	object.  There are many examples of this.  I am not sure if this is
	proper operation or not.

	I have included my summarizing program in the How-To-Repeat section.
	The -v option prints out full info.  Without -v, just a summary is
	reported

>How-To-Repeat:

/*
 * VMOBJSTAT.C
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/user.h>
#include <sys/dkstat.h>
#include <sys/buf.h>
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/signal.h>
#include <sys/fcntl.h>
#include <sys/ioctl.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/swap_pager.h>
#include <time.h>
#include <nlist.h>
#include <kvm.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <paths.h>
#include <limits.h>

TAILQ_HEAD(object_q, vm_object);

void kread(kvm_t *kd, void *addr, void *buf, int bytes);
void scanvmobjq(kvm_t *kd, struct object_q *q, const char *str);
const char *vmobjtype(int type);

struct nlist namelist[] = {
#define X_VM_OBJECT_LIST	0
	{ "_vm_object_list" },
#define X_VM_OBJECT_CACHED_LIST	1
	{ "_vm_object_cached_list" },
	{ "" }
};

int VerboseOpt = 0;
int BlkSize;

void kread(kvm_t *kd, void *addr, void *buf, int bytes);

int
main(int ac, char **av)
{
    kvm_t *kd;
    char errbuf[_POSIX2_LINE_MAX];
    struct object_q *q1;
    struct object_q *q2;
    int i;

    BlkSize = PAGE_SIZE / btodb(PAGE_SIZE);

    for (i = 1; i < ac; ++i) {
	char *ptr = av[i];
	if (*ptr == '-')  {
	    ptr += 2;
	    switch(ptr[-1]) {
	    case 'v':
		VerboseOpt = 1;
		continue;
	    default:
		break;
	    }
	}
	break;
    }
    if (i != ac) {
	fprintf(stderr, "Argument error\n");
	exit(1);
    }

    kd = kvm_openfiles(NULL, NULL, NULL, O_RDONLY, errbuf);
    if (kd == NULL) {
	fprintf(stderr, "kvm_openfiles() failed: %s\n", errbuf);
	exit(1);
    }
    if (kvm_nlist(kd, namelist) != 0) {
	fprintf(stderr, "kvm_nlist() failed\n");
	exit(1);
    }

    kread(kd, (void *)namelist[X_VM_OBJECT_LIST].n_value, &q1, sizeof(q1));
    kread(kd, (void *)namelist[X_VM_OBJECT_CACHED_LIST].n_value, &q2, sizeof(q2));

    scanvmobjq(kd, q1, "vm_object_list");
    scanvmobjq(kd, q2, "vm_object_cached_list");
}

void
scanvmobjq(kvm_t *kd, struct object_q *q, const char *str)
{
    struct vm_object *obj;
    int ObjSizeAry[16];
    int swp_nblocks = 0;
    int swp_ublocks = 0;
    int swp_allocsize = 0;

    bzero(ObjSizeAry, sizeof(ObjSizeAry));
    printf("%s\n", str);

    kread(kd, (void *)&q->tqh_first, &obj, sizeof(void *));
    while (obj) {
	struct vm_object vmobj;

	bzero(&vmobj, sizeof(vmobj));
	kread(kd, (void *)obj, &vmobj, sizeof(vmobj));
	if (VerboseOpt) {
	    printf("    obj %08x bking %08x type %s, size %-3d, refcnt %d",
		(int)obj,
		(int)vmobj.backing_object,
		vmobjtype(vmobj.type),
		(int)vmobj.size,
		(int)vmobj.ref_count
	    );
	}
	if (vmobj.type ==  OBJT_SWAP) {
	    int ublocks = 0;
	    int nblocks = vmobj.un_pager.swp.swp_nblocks;
	    struct swblock *sw = vmobj.un_pager.swp.swp_blocks;

	    while (sw < vmobj.un_pager.swp.swp_blocks + nblocks) {
		struct swblock swblock;
		int i;

		kread(kd, (void *)sw, &swblock, sizeof(swblock));
		for (i = 0; i < SWB_NPAGES; ++i) {
		    if ((int)swblock.swb_block[i] >= 0)
			++ublocks;
		}
		++sw;
	    }
	    swp_ublocks += ublocks;
	    if (VerboseOpt) {
		printf(" nblocks %d(%dK used), allocsize %d(%dK)\n",
		    nblocks,
		    ublocks * PAGE_SIZE / 1024,
		    vmobj.un_pager.swp.swp_allocsize,
			vmobj.un_pager.swp.swp_allocsize * BlkSize / 1024
		);
	    }
	}
	if (VerboseOpt)
	    printf("\n");

	if (vmobj.type < 16)
	    ++ObjSizeAry[vmobj.type];
	if (vmobj.type == OBJT_SWAP) {
	    swp_nblocks   += vmobj.un_pager.swp.swp_nblocks;
	    swp_allocsize += vmobj.un_pager.swp.swp_allocsize;
	}
	obj = vmobj.object_list.tqe_next;
    }
    {
	int i;

	for (i = 0; i < 16; ++i) {
	    if (ObjSizeAry[i])
		printf("    summary %s %d\n", vmobjtype(i), ObjSizeAry[i]);
	}
	printf("    swap-numblocks %d (%dK used)\n", swp_nblocks, swp_ublocks * PAGE_SIZE / 1024);
	printf("    swap-allocsize %d (%dK)\n", swp_allocsize, swp_allocsize * BlkSize / 1024);
    }
    printf("\n");
}

void
kread(kvm_t *kd, void *addr, void *buf, int bytes)
{
    if (kvm_read(kd, (u_long)addr, buf, bytes) != bytes) {
	fprintf(stderr, "kvm_read(%08lx,%d) failed\n", addr, bytes);
	exit(1);
    }
}

const char *
vmobjtype(int type)
{
    static char buf[256];

    switch(type) {
    case OBJT_DEFAULT:
	strcpy(buf, "def");
	break;
    case OBJT_SWAP:
	strcpy(buf, "swp");
	break;
    case OBJT_VNODE:
	strcpy(buf, "vno");
	break;
    case OBJT_DEVICE:
	strcpy(buf, "dev");
	break;
    default:
	sprintf(buf, "%d", type);
	break;
    }
    return(buf);
}

>Fix:
	
	Don't know

>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199801060222.SAA03741>