Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Feb 2011 18:52:38 +0200
From:      Gleb Kurtsou <gleb.kurtsou@gmail.com>
To:        Ivan Voras <ivoras@freebsd.org>
Cc:        freebsd-fs@freebsd.org, freebsd-stable@freebsd.org
Subject:   Re: tmpfs is zero bytes (no free space), maybe a zfs bug?
Message-ID:  <20110210165237.GA15601@tops>
In-Reply-To: <AANLkTikY1GHVmoTxTdwbnOERHV1zgvs5k4WXYY6irWeZ@mail.gmail.com>
References:  <4D36A2CF.1080508@fsn.hu> <20110119084648.GA28278@icarus.home.lan> <4D36B85B.8070201@fsn.hu> <ih6f1d$u16$1@dough.gmane.org> <20110119150200.GY2518@deviant.kiev.zoral.com.ua> <AANLkTinPZ8jP5yX2se5LLaBYP1dpbEAhX-u7Wr0NAGz4@mail.gmail.com> <20110207133748.GA16327@tops.skynet.lt> <AANLkTikY1GHVmoTxTdwbnOERHV1zgvs5k4WXYY6irWeZ@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--IS0zKkzwUGydFO0o
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline

On (07/02/2011 15:35), Ivan Voras wrote:
> On 7 February 2011 14:37, Gleb Kurtsou <gleb.kurtsou@gmail.com> wrote:
> 
> > It's up to user to mount tmpfs filesystems of reasonable size to prevent
> > resource exhaustion. Anyway, enormously large tmpfs killing all your
> > process is not the way to go.
> 
> Of course not, but as I see it (from admin perspective), tmpfs should
> behave as close to regular processes in consuming memory as possible
> (where possible; obviously it cannot be subject to the OOM killer :)
> ).
Could you test the patch. It sets file system size to half of RAM by
default and makes tmpfs behave much like regular process for vm
subsystem. It no longer depends on inactive/wired memory stats, but
checks if swap is nearly full. I've added vfs.tmpfs.swap_reserved sysctl
to limit tmpfs growth.

In my tests system didn't panic nor invoked OOM killer while consuming
all available ram and swap. Unfortunately I wasn't able to test it with
ZFS, I'd appreciate if you could run several test to see how ZFS and
tmpfs will behave in low memory situation.

If it works as expected I'm going to implement resize feature, update
man page and change mount option parsing to allow specifying size in
human readable form, e.g. size=1g.

Thanks,
Gleb.

--IS0zKkzwUGydFO0o
Content-Type: text/plain; charset=utf-8
Content-Disposition: attachment; filename="tmpfs-memfix.patch.txt"

commit 185bc042f0647a38b86aa78c5dda25a4bf0ea3dd
Author: Gleb Kurtsou <gleb.kurtsou@gmail.com>
Date:   Thu Feb 10 18:38:44 2011 +0200

    tmpfs: Change the way available memory is calculated
    
    Try to allocate pages until filesystem size limit hit,
    fail in low memory situation.
    
    By default set filesystem size to half of available memory
    
    Add vfs.tmpfs.swap_reserved sysctl; set default to 2048 pages (8m or 16m)
    
    Check if free pages available before allocating new node
    
    Reorganize limits and mount option parsing

diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index b1c4249..07f521c 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -487,61 +487,30 @@ int	tmpfs_truncate(struct vnode *, off_t);
  * Memory management stuff.
  */
 
-/* Amount of memory pages to reserve for the system (e.g., to not use by
- * tmpfs).
- * XXX: Should this be tunable through sysctl, for instance? */
-#define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE)
-
 /*
- * Returns information about the number of available memory pages,
- * including physical and virtual ones.
- *
- * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
- * excessive memory usage.
- *
+ * Number of reserved swap pages should not be lower than
+ * swap_pager_almost_full high water mark.
  */
+#define TMPFS_SWAP_MINRESERVED		1024
+
 static __inline size_t
-tmpfs_mem_info(void)
+tmpfs_pages_max(struct tmpfs_mount *tmp)
 {
-	size_t size;
-
-	size = swap_pager_avail + cnt.v_free_count + cnt.v_inactive_count;
-	size -= size > cnt.v_wire_count ? cnt.v_wire_count : size;
-	return size;
+	return (tmp->tm_pages_max);
 }
 
-/* Returns the maximum size allowed for a tmpfs file system.  This macro
- * must be used instead of directly retrieving the value from tm_pages_max.
- * The reason is that the size of a tmpfs file system is dynamic: it lets
- * the user store files as long as there is enough free memory (including
- * physical memory and swap space).  Therefore, the amount of memory to be
- * used is either the limit imposed by the user during mount time or the
- * amount of available memory, whichever is lower.  To avoid consuming all
- * the memory for a given mount point, the system will always reserve a
- * minimum of TMPFS_PAGES_RESERVED pages, which is also taken into account
- * by this macro (see above). */
 static __inline size_t
-TMPFS_PAGES_MAX(struct tmpfs_mount *tmp)
+tmpfs_pages_used(struct tmpfs_mount *tmp)
 {
-	size_t freepages;
-
-	freepages = tmpfs_mem_info();
-	freepages -= freepages < TMPFS_PAGES_RESERVED ?
-	    freepages : TMPFS_PAGES_RESERVED;
+	const size_t node_size = sizeof(struct tmpfs_node) +
+	    sizeof(struct tmpfs_dirent);
+	size_t meta_pages;
 
-	return MIN(tmp->tm_pages_max, freepages + tmp->tm_pages_used);
+	meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
+	    PAGE_SIZE);
+	return (meta_pages + tmp->tm_pages_used);
 }
 
-/* Returns the available space for the given file system. */
-#define TMPFS_META_PAGES(tmp) (howmany((tmp)->tm_nodes_inuse * (sizeof(struct tmpfs_node) \
-				+ sizeof(struct tmpfs_dirent)), PAGE_SIZE))
-#define TMPFS_FILE_PAGES(tmp) ((tmp)->tm_pages_used)
-
-#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) > \
-			TMPFS_META_PAGES(tmp)+TMPFS_FILE_PAGES(tmp)? \
-			TMPFS_PAGES_MAX(tmp) - TMPFS_META_PAGES(tmp) \
-			- TMPFS_FILE_PAGES(tmp):0)
-
 #endif
 
 /* --------------------------------------------------------------------- */
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 84a2038..259e205 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
+#include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
@@ -55,6 +56,60 @@ __FBSDID("$FreeBSD$");
 #include <fs/tmpfs/tmpfs_fifoops.h>
 #include <fs/tmpfs/tmpfs_vnops.h>
 
+static long tmpfs_swap_reserved = TMPFS_SWAP_MINRESERVED * 2;
+
+SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs memory file system");
+
+static int
+sysctl_swap_reserved(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long pages, bytes;
+
+	pages = *(long *)arg1;
+	bytes = pages * PAGE_SIZE;
+
+	error = sysctl_handle_long(oidp, &bytes, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	pages = bytes / PAGE_SIZE;
+	if (pages < TMPFS_SWAP_MINRESERVED)
+		return (EINVAL);
+
+	*(long *)arg1 = pages;
+	return (0);
+}
+
+SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, swap_reserved, CTLTYPE_LONG|CTLFLAG_RW,
+    &tmpfs_swap_reserved, 0, sysctl_swap_reserved, "L", "reserved swap space");
+
+static __inline size_t
+tmpfs_pages_avail(struct tmpfs_mount *tmp, size_t req_pages)
+{
+	vm_ooffset_t avail;
+
+	if (tmpfs_pages_max(tmp) < tmpfs_pages_used(tmp) + req_pages)
+		return (0);
+
+	if (!vm_page_count_target())
+		return (1);
+
+	/*
+	 * Fail if pagedaemon wasn't able to free desired number of pages and
+	 * we are running out of swap.
+	 */
+	avail = swap_pager_avail - vm_paging_target() - req_pages;
+	if (avail < tmpfs_swap_reserved) {	/* avail is signed */
+		printf("tmpfs: low memory: available %jd, "
+		    "paging target %d, requested %zd\n",
+		    (intmax_t)swap_pager_avail, vm_paging_target(), req_pages);
+		return (0);
+	}
+
+	return (1);
+}
+
 /* --------------------------------------------------------------------- */
 
 /*
@@ -95,6 +150,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 
 	if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max)
 		return (ENOSPC);
+	if (tmpfs_pages_avail(tmp, 1) == 0)
+		return (ENOSPC);
 
 	nnode = (struct tmpfs_node *)uma_zalloc_arg(
 				tmp->tm_node_pool, tmp, M_WAITOK);
@@ -905,7 +962,7 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
 	newpages = round_page(newsize) / PAGE_SIZE;
 
 	if (newpages > oldpages &&
-	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
+	    tmpfs_pages_avail(tmp, newpages - oldpages) == 0) {
 		error = ENOSPC;
 		goto out;
 	}
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index 356be5e..128200f 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -129,14 +129,14 @@ tmpfs_node_fini(void *mem, int size)
 static int
 tmpfs_mount(struct mount *mp)
 {
+	const size_t nodes_per_page = howmany(PAGE_SIZE,
+	    sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node));
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *root;
-	size_t pages;
-	uint32_t nodes;
 	int error;
 	/* Size counters. */
-	u_int nodes_max;
-	u_quad_t size_max, maxfilesize;
+	u_quad_t pages;
+	u_quad_t nodes_max, size_max, maxfilesize;
 
 	/* Root node attributes. */
 	uid_t root_uid;
@@ -173,7 +173,7 @@ tmpfs_mount(struct mount *mp)
 	if (mp->mnt_cred->cr_ruid != 0 ||
 	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
 		root_mode = va.va_mode;
-	if (vfs_scanopt(mp->mnt_optnew, "inodes", "%u", &nodes_max) != 1)
+	if (vfs_scanopt(mp->mnt_optnew, "inodes", "%qu", &nodes_max) != 1)
 		nodes_max = 0;
 	if (vfs_scanopt(mp->mnt_optnew, "size", "%qu", &size_max) != 1)
 		size_max = 0;
@@ -181,38 +181,49 @@ tmpfs_mount(struct mount *mp)
 	    &maxfilesize) != 1)
 		maxfilesize = 0;
 
-	/* Do not allow mounts if we do not have enough memory to preserve
-	 * the minimum reserved pages. */
-	if (tmpfs_mem_info() < TMPFS_PAGES_RESERVED)
+	/*
+	 * XXX Deny mounts if pagedaemon wasn't able to recovery desired
+	 * number of pages.
+	 */
+	if (vm_page_count_target())
 		return ENOSPC;
 
 	/* Get the maximum number of memory pages this file system is
 	 * allowed to use, based on the maximum size the user passed in
-	 * the mount structure.  A value of zero is treated as if the
-	 * maximum available space was requested. */
-	if (size_max < PAGE_SIZE || size_max > SIZE_MAX - PAGE_SIZE)
-		pages = SIZE_MAX;
+	 * the mount structure. Use half of RAM by default. */
+	if (size_max < PAGE_SIZE*4 || size_max > SIZE_MAX - PAGE_SIZE)
+		pages = cnt.v_page_count / 2;
 	else
 		pages = howmany(size_max, PAGE_SIZE);
 	MPASS(pages > 0);
+	MPASS(pages < SIZE_MAX);
+
+	if (pages < SIZE_MAX / PAGE_SIZE)
+		size_max = pages * PAGE_SIZE;
+	else
+		size_max = SIZE_MAX;
 
 	if (nodes_max <= 3) {
-		if (pages > UINT32_MAX - 3)
-			nodes = UINT32_MAX;
+		if (pages < UINT32_MAX / nodes_per_page)
+			nodes_max = pages * nodes_per_page;
 		else
-			nodes = pages + 3;
-	} else
-		nodes = nodes_max;
-	MPASS(nodes >= 3);
+			nodes_max = UINT32_MAX;
+	}
+	if (nodes_max > UINT32_MAX)
+		nodes_max = UINT32_MAX;
+	MPASS(nodes_max >= 3);
+
+	if (maxfilesize < PAGE_SIZE || maxfilesize > size_max)
+		maxfilesize = size_max;
 
 	/* Allocate the tmpfs mount structure and fill it. */
 	tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount),
 	    M_TMPFSMNT, M_WAITOK | M_ZERO);
 
 	mtx_init(&tmp->allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF);
-	tmp->tm_nodes_max = nodes;
+	tmp->tm_nodes_max = nodes_max;
 	tmp->tm_nodes_inuse = 0;
-	tmp->tm_maxfilesize = maxfilesize > 0 ? maxfilesize : UINT64_MAX;
+	tmp->tm_maxfilesize = maxfilesize;
 	LIST_INIT(&tmp->tm_nodes_used);
 
 	tmp->tm_pages_max = pages;
@@ -381,22 +392,23 @@ tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 static int
 tmpfs_statfs(struct mount *mp, struct statfs *sbp)
 {
-	fsfilcnt_t freenodes;
 	struct tmpfs_mount *tmp;
+	size_t used;
 
 	tmp = VFS_TO_TMPFS(mp);
 
 	sbp->f_iosize = PAGE_SIZE;
 	sbp->f_bsize = PAGE_SIZE;
 
-	sbp->f_blocks = TMPFS_PAGES_MAX(tmp);
-	sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
-
-	freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse,
-	    TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
-
-	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
-	sbp->f_ffree = freenodes;
+	sbp->f_blocks = tmpfs_pages_max(tmp);
+	used = tmpfs_pages_used(tmp);
+	if (tmpfs_pages_max(tmp) <= used)
+		sbp->f_bavail = 0;
+	else
+		sbp->f_bavail = tmpfs_pages_max(tmp) - used;
+	sbp->f_bfree = sbp->f_bavail;
+	sbp->f_files = tmp->tm_nodes_max;
+	sbp->f_ffree = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
 	/* sbp->f_owner = tmp->tn_uid; */
 
 	return 0;

--IS0zKkzwUGydFO0o--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20110210165237.GA15601>