Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Aug 2013 03:43:12 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r254635 - in projects/bhyve_npt_pmap: cddl/contrib/opensolaris/cmd/ztest lib/libc/iconv sys/amd64/amd64 sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/kern sys/net80211 sys/ofed/inc...
Message-ID:  <201308220343.r7M3hCKU005530@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Thu Aug 22 03:43:12 2013
New Revision: 254635
URL: http://svnweb.freebsd.org/changeset/base/254635

Log:
  IFC @254081

Modified:
  projects/bhyve_npt_pmap/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  projects/bhyve_npt_pmap/lib/libc/iconv/citrus_iconv.c
  projects/bhyve_npt_pmap/sys/amd64/amd64/minidump_machdep.c
  projects/bhyve_npt_pmap/sys/amd64/amd64/pmap.c
  projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
  projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  projects/bhyve_npt_pmap/sys/kern/kern_event.c
  projects/bhyve_npt_pmap/sys/net80211/ieee80211.c
  projects/bhyve_npt_pmap/sys/net80211/ieee80211_output.c
  projects/bhyve_npt_pmap/sys/net80211/ieee80211_proto.h
  projects/bhyve_npt_pmap/sys/net80211/ieee80211_var.h
  projects/bhyve_npt_pmap/sys/ofed/include/linux/page.h
  projects/bhyve_npt_pmap/sys/sparc64/sparc64/genassym.c
  projects/bhyve_npt_pmap/sys/sys/event.h
  projects/bhyve_npt_pmap/sys/vm/vm_page.c
  projects/bhyve_npt_pmap/sys/vm/vm_page.h
  projects/bhyve_npt_pmap/sys/vm/vm_pageout.c
  projects/bhyve_npt_pmap/sys/vm/vm_phys.c
  projects/bhyve_npt_pmap/sys/vm/vm_phys.h
  projects/bhyve_npt_pmap/sys/vm/vm_zeroidle.c
  projects/bhyve_npt_pmap/sys/x86/acpica/srat.c
  projects/bhyve_npt_pmap/tools/tools/sysdoc/sysdoc.sh
Directory Properties:
  projects/bhyve_npt_pmap/   (props changed)
  projects/bhyve_npt_pmap/cddl/   (props changed)
  projects/bhyve_npt_pmap/cddl/contrib/opensolaris/   (props changed)
  projects/bhyve_npt_pmap/lib/libc/   (props changed)
  projects/bhyve_npt_pmap/sys/   (props changed)
  projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/   (props changed)

Modified: projects/bhyve_npt_pmap/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- projects/bhyve_npt_pmap/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -186,6 +186,7 @@ static const ztest_shared_opts_t ztest_o
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
+extern uint64_t zfs_deadman_synctime;
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
@@ -365,7 +366,7 @@ ztest_info_t ztest_info[] = {
 	{ ztest_fault_inject,			1,	&zopt_sometimes	},
 	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
-	{ ztest_reguid,				1,	&zopt_sometimes },
+	{ ztest_reguid,				1,	&zopt_rarely	},
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
@@ -3606,6 +3607,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uin
 	else
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
+	/* This accounts for setting the checksum/compression. */
+	dmu_tx_hold_bonus(tx, bigobj);
+
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0) {
 		umem_free(packbuf, packsize);
@@ -4756,6 +4760,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 	ASSERT(leaves >= 1);
 
 	/*
+	 * Grab the name lock as reader. There are some operations
+	 * which don't like to have their vdevs changed while
+	 * they are in progress (i.e. spa_change_guid). Those
+	 * operations will have grabbed the name lock as writer.
+	 */
+	(void) rw_rdlock(&ztest_name_lock);
+
+	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
@@ -4784,7 +4796,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
-		if (vd0 != NULL && maxfaults != 1) {
+		/*
+		 * If the top-level vdev needs to be resilvered
+		 * then we only allow faults on the device that is
+		 * resilvering.
+		 */
+		if (vd0 != NULL && maxfaults != 1 &&
+		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
+		    vd0->vdev_resilvering)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwriteable, or reach behind its back
@@ -4815,6 +4834,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
+			(void) rw_unlock(&ztest_name_lock);
 			return;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
@@ -4828,6 +4848,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
+	(void) rw_unlock(&ztest_name_lock);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
@@ -5293,16 +5314,33 @@ static void *
 ztest_deadman_thread(void *arg)
 {
 	ztest_shared_t *zs = arg;
-	int grace = 300;
-	hrtime_t delta;
-
-	delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
+	spa_t *spa = ztest_spa;
+	hrtime_t delta, total = 0;
 
-	(void) poll(NULL, 0, (int)(1000 * delta));
+	for (;;) {
+		delta = (zs->zs_thread_stop - zs->zs_thread_start) /
+		    NANOSEC + zfs_deadman_synctime;
 
-	fatal(0, "failed to complete within %d seconds of deadline", grace);
+		(void) poll(NULL, 0, (int)(1000 * delta));
 
-	return (NULL);
+		/*
+		 * If the pool is suspended then fail immediately. Otherwise,
+		 * check to see if the pool is making any progress. If
+		 * vdev_deadman() discovers that there hasn't been any recent
+		 * I/Os then it will end up aborting the tests.
+		 */
+		if (spa_suspended(spa)) {
+			fatal(0, "aborting test after %llu seconds because "
+			    "pool has transitioned to a suspended state.",
+			    zfs_deadman_synctime);
+			return (NULL);
+		}
+		vdev_deadman(spa->spa_root_vdev);
+
+		total += zfs_deadman_synctime;
+		(void) printf("ztest has been running for %lld seconds\n",
+		    total);
+	}
 }
 
 static void
@@ -6031,6 +6069,7 @@ main(int argc, char **argv)
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	dprintf_setup(&argc, argv);
+	zfs_deadman_synctime = 300;
 
 	ztest_fd_rand = open("/dev/urandom", O_RDONLY);
 	ASSERT3S(ztest_fd_rand, >=, 0);

Modified: projects/bhyve_npt_pmap/lib/libc/iconv/citrus_iconv.c
==============================================================================
--- projects/bhyve_npt_pmap/lib/libc/iconv/citrus_iconv.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/lib/libc/iconv/citrus_iconv.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -116,7 +116,20 @@ open_shared(struct _citrus_iconv_shared 
 	size_t len_convname;
 	int ret;
 
+#ifdef INCOMPATIBLE_WITH_GNU_ICONV
+	/*
+	 * Sadly, the gnu tools expect iconv to actually parse the
+	 * byte stream and don't allow for a pass-through when
+	 * the (src,dest) encodings are the same.
+	 * See gettext-0.18.3+ NEWS:
+	 *   msgfmt now checks PO file headers more strictly with less
+	 *   false-positives.
+	 * NetBSD don't do this either.
+	 */
 	module = (strcmp(src, dst) != 0) ? "iconv_std" : "iconv_none";
+#else
+	module = "iconv_std";
+#endif
 
 	/* initialize iconv handle */
 	len_convname = strlen(convname);

Modified: projects/bhyve_npt_pmap/sys/amd64/amd64/minidump_machdep.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/amd64/amd64/minidump_machdep.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/amd64/amd64/minidump_machdep.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/msgbuf.h>
 #include <sys/watchdog.h>
 #include <vm/vm.h>
+#include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/pmap.h>

Modified: projects/bhyve_npt_pmap/sys/amd64/amd64/pmap.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/amd64/amd64/pmap.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/amd64/amd64/pmap.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -632,6 +632,10 @@ static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
 	int i, j, ndm1g, nkpdpe;
+	pt_entry_t *pt_p;
+	pd_entry_t *pd_p;
+	pdp_entry_t *pdp_p;
+	pml4_entry_t *p4_p;
 	pt_entry_t PG_G, PG_A, PG_M;
 
 	PG_G = pmap_global_bit(kernel_pmap);
@@ -667,32 +671,26 @@ create_pagetables(vm_paddr_t *firstaddr)
 	KPDphys = allocpages(firstaddr, nkpdpe);
 
 	/* Fill in the underlying page table pages */
-	/* Read-only from zero to physfree */
+	/* Nominally read-only (but really R/W) from zero to physfree */
 	/* XXX not fully used, underneath 2M pages */
-	for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) {
-		((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
-		((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G;
-	}
+	pt_p = (pt_entry_t *)KPTphys;
+	for (i = 0; ptoa(i) < *firstaddr; i++)
+		pt_p[i] = ptoa(i) | PG_RW | PG_V | PG_G;
 
 	/* Now map the page tables at their location within PTmap */
-	for (i = 0; i < nkpt; i++) {
-		((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
-		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
-	}
+	pd_p = (pd_entry_t *)KPDphys;
+	for (i = 0; i < nkpt; i++)
+		pd_p[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V;
 
 	/* Map from zero to end of allocations under 2M pages */
 	/* This replaces some of the KPTphys entries above */
-	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) {
-		((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
-		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
-	}
+	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
+		pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
 
 	/* And connect up the PD to the PDP */
-	for (i = 0; i < nkpdpe; i++) {
-		((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys +
-		    (i << PAGE_SHIFT);
-		((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
-	}
+	pdp_p = (pdp_entry_t *)KPDPphys;
+	for (i = 0; i < nkpdpe; i++)
+		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
 
 	/*
 	 * Now, set up the direct map region using 2MB and/or 1GB pages.  If
@@ -702,37 +700,39 @@ create_pagetables(vm_paddr_t *firstaddr)
 	 * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings
 	 * that are partially used. 
 	 */
+	pd_p = (pd_entry_t *)DMPDphys;
 	for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
-		((pd_entry_t *)DMPDphys)[j] = (vm_paddr_t)i << PDRSHIFT;
+		pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
-		((pd_entry_t *)DMPDphys)[j] |= PG_RW | PG_V | PG_PS | PG_G |
+		pd_p[j] |= PG_RW | PG_V | PG_PS | PG_G |
 		    PG_M | PG_A;
 	}
+	pdp_p = (pdp_entry_t *)DMPDPphys;
 	for (i = 0; i < ndm1g; i++) {
-		((pdp_entry_t *)DMPDPphys)[i] = (vm_paddr_t)i << PDPSHIFT;
+		pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
-		((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS | PG_G |
+		pdp_p[i] |= PG_RW | PG_V | PG_PS | PG_G |
 		    PG_M | PG_A;
 	}
 	for (j = 0; i < ndmpdp; i++, j++) {
-		((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (j << PAGE_SHIFT);
-		((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
+		pdp_p[i] = DMPDphys + ptoa(j);
+		pdp_p[i] |= PG_RW | PG_V | PG_U;
 	}
 
 	/* And recursively map PML4 to itself in order to get PTmap */
-	((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
-	((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
+	p4_p = (pml4_entry_t *)KPML4phys;
+	p4_p[PML4PML4I] = KPML4phys;
+	p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
 
 	/* Connect the Direct Map slot(s) up to the PML4. */
 	for (i = 0; i < NDMPML4E; i++) {
-		((pdp_entry_t *)KPML4phys)[DMPML4I + i] = DMPDPphys +
-		    (i << PAGE_SHIFT);
-		((pdp_entry_t *)KPML4phys)[DMPML4I + i] |= PG_RW | PG_V | PG_U;
+		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
+		p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
 	}
 
 	/* Connect the KVA slot up to the PML4 */
-	((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
-	((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
+	p4_p[KPML4I] = KPDPphys;
+	p4_p[KPML4I] |= PG_RW | PG_V | PG_U;
 }
 
 /*

Modified: projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -448,12 +448,12 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, ui
 		blkid = off >> dn->dn_datablkshift;
 		nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
 
-		if (blkid >= dn->dn_maxblkid) {
+		if (blkid > dn->dn_maxblkid) {
 			rw_exit(&dn->dn_struct_rwlock);
 			return;
 		}
 		if (blkid + nblks > dn->dn_maxblkid)
-			nblks = dn->dn_maxblkid - blkid;
+			nblks = dn->dn_maxblkid - blkid + 1;
 
 	}
 	l0span = nblks;    /* save for later use to calc level > 1 overhead */

Modified: projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -759,6 +759,7 @@ spa_change_guid(spa_t *spa)
 	int error;
 	uint64_t guid;
 
+	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 	guid = spa_generate_guid(NULL);
 
@@ -771,6 +772,7 @@ spa_change_guid(spa_t *spa)
 	}
 
 	mutex_exit(&spa_namespace_lock);
+	mutex_exit(&spa->spa_vdev_top_lock);
 
 	return (error);
 }
@@ -4861,7 +4863,6 @@ spa_vdev_detach(spa_t *spa, uint64_t gui
 		if (pvd->vdev_ops == &vdev_spare_ops)
 			cvd->vdev_unspare = B_FALSE;
 		vdev_remove_parent(cvd);
-		cvd->vdev_resilvering = B_FALSE;
 	}
 
 
@@ -5496,6 +5497,13 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
 			return (oldvd);
 	}
 
+	if (vd->vdev_resilvering && vdev_dtl_empty(vd, DTL_MISSING) &&
+	    vdev_dtl_empty(vd, DTL_OUTAGE)) {
+		ASSERT(vd->vdev_ops->vdev_op_leaf);
+		vd->vdev_resilvering = B_FALSE;
+		vdev_config_dirty(vd->vdev_top);
+	}
+
 	/*
 	 * Check for a completed replacement.  We always consider the first
 	 * vdev in the list to be the oldest vdev, and the last one to be

Modified: projects/bhyve_npt_pmap/sys/kern/kern_event.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/kern/kern_event.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/kern/kern_event.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -431,8 +431,11 @@ filt_proc(struct knote *kn, long hint)
 		if (!(kn->kn_status & KN_DETACHED))
 			knlist_remove_inevent(&p->p_klist, kn);
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
-		kn->kn_data = p->p_xstat;
 		kn->kn_ptr.p_proc = NULL;
+		if (kn->kn_fflags & NOTE_EXIT)
+			kn->kn_data = p->p_xstat;
+		if (kn->kn_fflags == 0)
+			kn->kn_flags |= EV_DROP;
 		return (1);
 	}
 
@@ -1410,7 +1413,21 @@ retry:
 		KASSERT((kn->kn_status & KN_INFLUX) == 0,
 		    ("KN_INFLUX set when not suppose to be"));
 
-		if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
+		if ((kn->kn_flags & EV_DROP) == EV_DROP) {
+			kn->kn_status &= ~KN_QUEUED;
+			kn->kn_status |= KN_INFLUX;
+			kq->kq_count--;
+			KQ_UNLOCK(kq);
+			/*
+			 * We don't need to lock the list since we've marked
+			 * it _INFLUX.
+			 */
+			if (!(kn->kn_status & KN_DETACHED))
+				kn->kn_fop->f_detach(kn);
+			knote_drop(kn, td);
+			KQ_LOCK(kq);
+			continue;
+		} else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
 			kn->kn_status &= ~KN_QUEUED;
 			kn->kn_status |= KN_INFLUX;
 			kq->kq_count--;

Modified: projects/bhyve_npt_pmap/sys/net80211/ieee80211.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/net80211/ieee80211.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/net80211/ieee80211.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -241,9 +241,15 @@ null_transmit(struct ifnet *ifp, struct 
 	return EACCES;		/* XXX EIO/EPERM? */
 }
 
+#if __FreeBSD_version >= 1000031
 static int
 null_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
+#else
+static int
+null_output(struct ifnet *ifp, struct mbuf *m,
+	struct sockaddr *dst, struct route *ro)
+#endif
 {
 	if_printf(ifp, "discard raw packet\n");
 	return null_transmit(ifp, m);

Modified: projects/bhyve_npt_pmap/sys/net80211/ieee80211_output.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/net80211/ieee80211_output.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/net80211/ieee80211_output.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -482,9 +482,15 @@ ieee80211_raw_output(struct ieee80211vap
  * connect bpf write calls to the 802.11 layer for injecting
  * raw 802.11 frames.
  */
+#if __FreeBSD_version >= 1000031
 int
 ieee80211_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
+#else
+int
+ieee80211_output(struct ifnet *ifp, struct mbuf *m,
+	struct sockaddr *dst, struct route *ro)
+#endif
 {
 #define senderr(e) do { error = (e); goto bad;} while (0)
 	struct ieee80211_node *ni = NULL;

Modified: projects/bhyve_npt_pmap/sys/net80211/ieee80211_proto.h
==============================================================================
--- projects/bhyve_npt_pmap/sys/net80211/ieee80211_proto.h	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/net80211/ieee80211_proto.h	Thu Aug 22 03:43:12 2013	(r254635)
@@ -96,8 +96,13 @@ int	ieee80211_mgmt_output(struct ieee802
 		struct ieee80211_bpf_params *);
 int	ieee80211_raw_xmit(struct ieee80211_node *, struct mbuf *,
 		const struct ieee80211_bpf_params *);
+#if __FreeBSD_version >= 1000031
 int	ieee80211_output(struct ifnet *, struct mbuf *,
                const struct sockaddr *, struct route *ro);
+#else
+int	ieee80211_output(struct ifnet *, struct mbuf *,
+               struct sockaddr *, struct route *ro);
+#endif
 int	ieee80211_vap_pkt_send_dest(struct ieee80211vap *, struct mbuf *,
 		struct ieee80211_node *);
 int	ieee80211_raw_output(struct ieee80211vap *, struct ieee80211_node *,

Modified: projects/bhyve_npt_pmap/sys/net80211/ieee80211_var.h
==============================================================================
--- projects/bhyve_npt_pmap/sys/net80211/ieee80211_var.h	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/net80211/ieee80211_var.h	Thu Aug 22 03:43:12 2013	(r254635)
@@ -496,8 +496,13 @@ struct ieee80211vap {
 	int			(*iv_newstate)(struct ieee80211vap *,
 				    enum ieee80211_state, int);
 	/* 802.3 output method for raw frame xmit */
+#if __FreeBSD_version >= 1000031
 	int			(*iv_output)(struct ifnet *, struct mbuf *,
 				    const struct sockaddr *, struct route *);
+#else
+	int			(*iv_output)(struct ifnet *, struct mbuf *,
+				    struct sockaddr *, struct route *);
+#endif
 	uint64_t		iv_spare[6];
 };
 MALLOC_DECLARE(M_80211_VAP);

Modified: projects/bhyve_npt_pmap/sys/ofed/include/linux/page.h
==============================================================================
--- projects/bhyve_npt_pmap/sys/ofed/include/linux/page.h	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/ofed/include/linux/page.h	Thu Aug 22 03:43:12 2013	(r254635)
@@ -32,6 +32,7 @@
 
 #include <sys/param.h>
 
+#include <machine/atomic.h>
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 

Modified: projects/bhyve_npt_pmap/sys/sparc64/sparc64/genassym.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/sparc64/sparc64/genassym.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/sparc64/sparc64/genassym.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/vmmeter.h>
 #include <sys/_cpuset.h>
 
+#include <machine/atomic.h>
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>

Modified: projects/bhyve_npt_pmap/sys/sys/event.h
==============================================================================
--- projects/bhyve_npt_pmap/sys/sys/event.h	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/sys/event.h	Thu Aug 22 03:43:12 2013	(r254635)
@@ -76,6 +76,7 @@ struct kevent {
 #define EV_DISPATCH	0x0080		/* disable event after reporting */
 
 #define EV_SYSFLAGS	0xF000		/* reserved by system */
+#define	EV_DROP		0x1000		/* note should be dropped */
 #define EV_FLAG1	0x2000		/* filter-specific flag */
 
 /* returned values */

Modified: projects/bhyve_npt_pmap/sys/vm/vm_page.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/vm/vm_page.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/vm/vm_page.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -64,8 +64,7 @@
  *			GENERAL RULES ON VM_PAGE MANIPULATION
  *
  *	- A page queue lock is required when adding or removing a page from a
- *	  page queue (vm_pagequeues[]), regardless of other locks or the
- *	  busy state of a page.
+ *	  page queue regardless of other locks or the busy state of a page.
  *
  *		* In general, no thread besides the page daemon can acquire or
  *		  hold more than one page queue lock at a time.
@@ -124,20 +123,7 @@ __FBSDID("$FreeBSD$");
  *	page structure.
  */
 
-struct vm_pagequeue vm_pagequeues[PQ_COUNT] = {
-	[PQ_INACTIVE] = {
-		.pq_pl = TAILQ_HEAD_INITIALIZER(
-		    vm_pagequeues[PQ_INACTIVE].pq_pl),
-		.pq_cnt = &cnt.v_inactive_count,
-		.pq_name = "vm inactive pagequeue"
-	},
-	[PQ_ACTIVE] = {
-		.pq_pl = TAILQ_HEAD_INITIALIZER(
-		    vm_pagequeues[PQ_ACTIVE].pq_pl),
-		.pq_cnt = &cnt.v_active_count,
-		.pq_name = "vm active pagequeue"
-	}
-};
+struct vm_domain vm_dom[MAXMEMDOM];
 struct mtx_padalign vm_page_queue_free_mtx;
 
 struct mtx_padalign pa_lock[PA_LOCK_COUNT];
@@ -256,6 +242,34 @@ vm_page_blacklist_lookup(char *list, vm_
 	return (0);
 }
 
+static void
+vm_page_domain_init(struct vm_domain *vmd)
+{
+	struct vm_pagequeue *pq;
+	int i;
+
+	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
+	    "vm inactive pagequeue";
+	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
+	    &cnt.v_inactive_count;
+	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
+	    "vm active pagequeue";
+	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
+	    &cnt.v_active_count;
+	vmd->vmd_fullintervalcount = 0;
+	vmd->vmd_page_count = 0;
+	vmd->vmd_free_count = 0;
+	vmd->vmd_segs = 0;
+	vmd->vmd_oom = FALSE;
+	vmd->vmd_pass = 0;
+	for (i = 0; i < PQ_COUNT; i++) {
+		pq = &vmd->vmd_pagequeues[i];
+		TAILQ_INIT(&pq->pq_pl);
+		mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
+		    MTX_DEF | MTX_DUPOK);
+	}
+}
+
 /*
  *	vm_page_startup:
  *
@@ -319,8 +333,8 @@ vm_page_startup(vm_offset_t vaddr)
 	mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
 	for (i = 0; i < PA_LOCK_COUNT; i++)
 		mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
-	for (i = 0; i < PQ_COUNT; i++)
-		vm_pagequeue_init_lock(&vm_pagequeues[i]);
+	for (i = 0; i < vm_ndomains; i++)
+		vm_page_domain_init(&vm_dom[i]);
 
 	/*
 	 * Allocate memory for use when boot strapping the kernel memory
@@ -1055,7 +1069,7 @@ vm_page_cache_free(vm_object_t object, v
 		KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE,
 		    ("vm_page_cache_free: page %p has inconsistent flags", m));
 		cnt.v_cache_count--;
-		cnt.v_free_count++;
+		vm_phys_freecnt_adj(m, 1);
 	}
 	empty = vm_radix_is_empty(&object->cache);
 	mtx_unlock(&vm_page_queue_free_mtx);
@@ -1311,7 +1325,7 @@ vm_page_alloc(vm_object_t object, vm_pin
 		    ("vm_page_alloc: page %p is not free", m));
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc: free page %p is valid", m));
-		cnt.v_free_count--;
+		vm_phys_freecnt_adj(m, -1);
 	}
 
 	/*
@@ -1569,7 +1583,7 @@ vm_page_alloc_init(vm_page_t m)
 		    ("vm_page_alloc_init: page %p is not free", m));
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc_init: free page %p is valid", m));
-		cnt.v_free_count--;
+		vm_phys_freecnt_adj(m, -1);
 		if ((m->flags & PG_ZERO) != 0)
 			vm_page_zero_count--;
 	}
@@ -1711,6 +1725,13 @@ vm_waitpfault(void)
 	    "pfault", 0);
 }
 
+struct vm_pagequeue *
+vm_page_pagequeue(vm_page_t m)
+{
+
+	return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+}
+
 /*
  *	vm_page_dequeue:
  *
@@ -1726,11 +1747,11 @@ vm_page_dequeue(vm_page_t m)
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_dequeue: page %p is not queued", m));
-	pq = &vm_pagequeues[m->queue];
+	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, pageq);
-	(*pq->pq_cnt)--;
+	vm_pagequeue_cnt_dec(pq);
 	vm_pagequeue_unlock(pq);
 }
 
@@ -1747,11 +1768,11 @@ vm_page_dequeue_locked(vm_page_t m)
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
-	pq = &vm_pagequeues[m->queue];
+	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, pageq);
-	(*pq->pq_cnt)--;
+	vm_pagequeue_cnt_dec(pq);
 }
 
 /*
@@ -1767,11 +1788,11 @@ vm_page_enqueue(int queue, vm_page_t m)
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
-	pq = &vm_pagequeues[queue];
+	pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
 	vm_pagequeue_lock(pq);
 	m->queue = queue;
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
-	++*pq->pq_cnt;
+	vm_pagequeue_cnt_inc(pq);
 	vm_pagequeue_unlock(pq);
 }
 
@@ -1790,7 +1811,7 @@ vm_page_requeue(vm_page_t m)
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue: page %p is not queued", m));
-	pq = &vm_pagequeues[m->queue];
+	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, pageq);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
@@ -1811,7 +1832,7 @@ vm_page_requeue_locked(vm_page_t m)
 
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue_locked: page %p is not queued", m));
-	pq = &vm_pagequeues[m->queue];
+	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, pageq);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
@@ -1948,7 +1969,7 @@ vm_page_free_toq(vm_page_t m)
 		 */
 		mtx_lock(&vm_page_queue_free_mtx);
 		m->flags |= PG_FREE;
-		cnt.v_free_count++;
+		vm_phys_freecnt_adj(m, 1);
 #if VM_NRESERVLEVEL > 0
 		if (!vm_reserv_free_page(m))
 #else
@@ -2081,14 +2102,14 @@ _vm_page_deactivate(vm_page_t m, int ath
 		if (queue != PQ_NONE)
 			vm_page_dequeue(m);
 		m->flags &= ~PG_WINATCFLS;
-		pq = &vm_pagequeues[PQ_INACTIVE];
+		pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
 		vm_pagequeue_lock(pq);
 		m->queue = PQ_INACTIVE;
 		if (athead)
 			TAILQ_INSERT_HEAD(&pq->pq_pl, m, pageq);
 		else
 			TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
-		cnt.v_inactive_count++;
+		vm_pagequeue_cnt_inc(pq);
 		vm_pagequeue_unlock(pq);
 	}
 }
@@ -2888,18 +2909,20 @@ DB_SHOW_COMMAND(page, vm_page_print_page
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
-		
-	db_printf("PQ_FREE:");
-	db_printf(" %d", cnt.v_free_count);
-	db_printf("\n");
-		
-	db_printf("PQ_CACHE:");
-	db_printf(" %d", cnt.v_cache_count);
-	db_printf("\n");
-
-	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
-		*vm_pagequeues[PQ_ACTIVE].pq_cnt,
-		*vm_pagequeues[PQ_INACTIVE].pq_cnt);
+	int dom;
+
+	db_printf("pq_free %d pq_cache %d\n",
+	    cnt.v_free_count, cnt.v_cache_count);
+	for (dom = 0; dom < vm_ndomains; dom++) {
+		db_printf(
+	"dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n",
+		    dom,
+		    vm_dom[dom].vmd_page_count,
+		    vm_dom[dom].vmd_free_count,
+		    vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
+		    vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
+		    vm_dom[dom].vmd_pass);
+	}
 }
 
 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)

Modified: projects/bhyve_npt_pmap/sys/vm/vm_page.h
==============================================================================
--- projects/bhyve_npt_pmap/sys/vm/vm_page.h	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/vm/vm_page.h	Thu Aug 22 03:43:12 2013	(r254635)
@@ -181,18 +181,44 @@ TAILQ_HEAD(pglist, vm_page);
 struct vm_pagequeue {
 	struct mtx	pq_mutex;
 	struct pglist	pq_pl;
-	int *const	pq_cnt;
-	const char *const pq_name;
+	int		pq_cnt;
+	int		* const pq_vcnt;
+	const char	* const pq_name;
 } __aligned(CACHE_LINE_SIZE);
 
-extern struct vm_pagequeue vm_pagequeues[PQ_COUNT];
+
+struct vm_domain {
+	struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
+	int vmd_fullintervalcount;
+	u_int vmd_page_count;
+	u_int vmd_free_count;
+	long vmd_segs;	/* bitmask of the segments */
+	boolean_t vmd_oom;
+	int vmd_pass;	/* local pagedaemon pass */
+	struct vm_page vmd_marker; /* marker for pagedaemon private use */
+};
+
+extern struct vm_domain vm_dom[MAXMEMDOM];
 
 #define	vm_pagequeue_assert_locked(pq)	mtx_assert(&(pq)->pq_mutex, MA_OWNED)
-#define	vm_pagequeue_init_lock(pq)	mtx_init(&(pq)->pq_mutex,	\
-	    (pq)->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK);
 #define	vm_pagequeue_lock(pq)		mtx_lock(&(pq)->pq_mutex)
 #define	vm_pagequeue_unlock(pq)		mtx_unlock(&(pq)->pq_mutex)
 
+#ifdef _KERNEL
+static __inline void
+vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
+{
+
+#ifdef notyet
+	vm_pagequeue_assert_locked(pq);
+#endif
+	pq->pq_cnt += addend;
+	atomic_add_int(pq->pq_vcnt, addend);
+}
+#define	vm_pagequeue_cnt_inc(pq)	vm_pagequeue_cnt_add((pq), 1)
+#define	vm_pagequeue_cnt_dec(pq)	vm_pagequeue_cnt_add((pq), -1)
+#endif	/* _KERNEL */
+
 extern struct mtx_padalign vm_page_queue_free_mtx;
 extern struct mtx_padalign pa_lock[];
 
@@ -393,6 +419,7 @@ boolean_t vm_page_is_cached(vm_object_t 
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 vm_page_t vm_page_next(vm_page_t m);
 int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
+struct vm_pagequeue *vm_page_pagequeue(vm_page_t m);
 vm_page_t vm_page_prev(vm_page_t m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);

Modified: projects/bhyve_npt_pmap/sys/vm/vm_pageout.c
==============================================================================
--- projects/bhyve_npt_pmap/sys/vm/vm_pageout.c	Thu Aug 22 02:54:20 2013	(r254634)
+++ projects/bhyve_npt_pmap/sys/vm/vm_pageout.c	Thu Aug 22 03:43:12 2013	(r254635)
@@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
+#include <sys/smp.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
@@ -103,6 +104,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
@@ -114,7 +116,8 @@ __FBSDID("$FreeBSD$");
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static int vm_pageout_clean(vm_page_t);
-static void vm_pageout_scan(int pass);
+static void vm_pageout_scan(struct vm_domain *vmd, int pass);
+static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
 
 struct proc *pageproc;
 
@@ -216,14 +219,15 @@ SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
-static boolean_t vm_pageout_launder(int, int, vm_paddr_t, vm_paddr_t);
+static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int, vm_paddr_t,
+    vm_paddr_t);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(int req);
 #endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
-static void vm_pageout_page_stats(void);
+static void vm_pageout_page_stats(struct vm_domain *vmd);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
@@ -267,7 +271,7 @@ vm_pageout_fallback_object_lock(vm_page_
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
-	pq = &vm_pagequeues[queue];
+	pq = vm_page_pagequeue(m);
 	object = m->object;
 	
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
@@ -309,7 +313,7 @@ vm_pageout_page_lock(vm_page_t m, vm_pag
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
-	pq = &vm_pagequeues[queue];
+	pq = vm_page_pagequeue(m);
 
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
 	vm_pagequeue_unlock(pq);
@@ -567,21 +571,17 @@ vm_pageout_flush(vm_page_t *mc, int coun
 }
 
 static boolean_t
-vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
+vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low,
+    vm_paddr_t high)
 {
 	struct mount *mp;
-	struct vm_pagequeue *pq;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_paddr_t pa;
 	vm_page_t m, m_tmp, next;
 
-	pq = &vm_pagequeues[queue];
 	vm_pagequeue_lock(pq);
 	TAILQ_FOREACH_SAFE(m, &pq->pq_pl, pageq, next) {
-		KASSERT(m->queue == queue,
-		    ("vm_pageout_launder: page %p's queue is not %d", m,
-		    queue));
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		pa = VM_PAGE_TO_PHYS(m);
@@ -661,7 +661,8 @@ vm_pageout_launder(int queue, int tries,
 void
 vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
 {
-	int actl, actmax, inactl, inactmax;
+	int actl, actmax, inactl, inactmax, dom, initial_dom;
+	static int start_dom = 0;
 
 	if (tries > 0) {
 		/*
@@ -677,19 +678,55 @@ vm_pageout_grow_cache(int tries, vm_padd
 		 */
 		uma_reclaim();
 	}
+
+	/*
+	 * Make the next scan start on the next domain.
+	 */
+	initial_dom = atomic_fetchadd_int(&start_dom, 1) % vm_ndomains;
+
 	inactl = 0;
 	inactmax = cnt.v_inactive_count;
 	actl = 0;
 	actmax = tries < 2 ? 0 : cnt.v_active_count;
+	dom = initial_dom;
+
+	/*
+	 * Scan domains in round-robin order, first inactive queues,
+	 * then active.  Since domain usually owns large physically
+	 * contiguous chunk of memory, it makes sense to completely
+	 * exhaust one domain before switching to next, while growing
+	 * the pool of contiguous physical pages.
+	 *
+	 * Do not even start launder a domain which cannot contain
+	 * the specified address range, as indicated by segments
+	 * constituting the domain.
+	 */
 again:
-	if (inactl < inactmax && vm_pageout_launder(PQ_INACTIVE, tries, low,
-	    high)) {
-		inactl++;
-		goto again;
-	}
-	if (actl < actmax && vm_pageout_launder(PQ_ACTIVE, tries, low, high)) {
-		actl++;
-		goto again;
+	if (inactl < inactmax) {
+		if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs,
+		    low, high) &&
+		    vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_INACTIVE],
+		    tries, low, high)) {
+			inactl++;
+			goto again;
+		}
+		if (++dom == vm_ndomains)
+			dom = 0;
+		if (dom != initial_dom)
+			goto again;
+	}
+	if (actl < actmax) {
+		if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs,
+		    low, high) &&
+		    vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_ACTIVE],
+		      tries, low, high)) {
+			actl++;
+			goto again;
+		}
+		if (++dom == vm_ndomains)
+			dom = 0;
+		if (dom != initial_dom)
+			goto again;
 	}
 }
 
@@ -861,10 +898,9 @@ vm_pageout_map_deactivate_pages(map, des
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  */
 static void
-vm_pageout_scan(int pass)
+vm_pageout_scan(struct vm_domain *vmd, int pass)
 {
 	vm_page_t m, next;
-	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	int page_shortage, maxscan, pcount;
 	int addl_page_shortage;
@@ -874,8 +910,6 @@ vm_pageout_scan(int pass)
 	int maxlaunder;
 	boolean_t queues_locked;
 
-	vm_pageout_init_marker(&marker, PQ_INACTIVE);
-
 	/*
 	 * Decrease registered cache sizes.
 	 */
@@ -888,7 +922,7 @@ vm_pageout_scan(int pass)
 	/*
 	 * The addl_page_shortage is the number of temporarily
 	 * stuck pages in the inactive queue.  In other words, the
-	 * number of pages from cnt.v_inactive_count that should be
+	 * number of pages from the inactive count that should be
 	 * discounted in setting the target for the active queue scan.
 	 */
 	addl_page_shortage = atomic_readandclear_int(&vm_pageout_deficit);
@@ -914,8 +948,6 @@ vm_pageout_scan(int pass)
 	if (pass)
 		maxlaunder = 10000;
 
-	maxscan = cnt.v_inactive_count;
-
 	/*
 	 * Start scanning the inactive queue for pages we can move to the
 	 * cache or free.  The scan will stop when the target is reached or
@@ -923,7 +955,8 @@ vm_pageout_scan(int pass)
 	 * is not used to form decisions for the inactive queue, only for the
 	 * active queue.
 	 */
-	pq = &vm_pagequeues[PQ_INACTIVE];
+	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
+	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
 	queues_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
@@ -984,7 +1017,7 @@ vm_pageout_scan(int pass)
 		 * 'next' pointer.  Use our marker to remember our
 		 * place.
 		 */
-		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
+		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, pageq);
 		vm_pagequeue_unlock(pq);
 		queues_locked = FALSE;
 
@@ -1034,7 +1067,7 @@ vm_pageout_scan(int pass)
 			/*
 			 * Held pages are essentially stuck in the
 			 * queue.  So, they ought to be discounted
-			 * from cnt.v_inactive_count.  See the
+			 * from the inactive count.  See the
 			 * calculation of the page_shortage for the
 			 * loop over the active queue below.
 			 */
@@ -1178,7 +1211,7 @@ vm_pageout_scan(int pass)
 				 */
 				if (m->queue != PQ_INACTIVE ||
 				    m->object != object ||
-				    TAILQ_NEXT(m, pageq) != &marker) {
+				    TAILQ_NEXT(m, pageq) != &vmd->vmd_marker) {
 					vm_page_unlock(m);
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						vnodes_skipped++;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308220343.r7M3hCKU005530>