Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Apr 2014 14:12:22 +0200
From:      Mateusz Guzik <mjguzik@gmail.com>
To:        Eduardo Morras <emorrasg@yahoo.es>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: pipe() resource exhaustion
Message-ID:  <20140408121222.GB30326@dft-labs.eu>
In-Reply-To: <20140408130206.e75f3bf6c6df28b6e4839e70@yahoo.es>
References:  <lhu0jv$r6n$1@ger.gmane.org> <ab57e60fcc1c1438fcca500e3c594d35@mail.feld.me> <20140408130206.e75f3bf6c6df28b6e4839e70@yahoo.es>

next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Apr 08, 2014 at 01:02:06PM +0200, Eduardo Morras wrote:
> On Mon, 7 Apr 2014 07:25:22 -0500
> Mark Felder <feld@freebsd.org> wrote:
> 
> > On 2014-04-07 06:02, Ivan Voras wrote:
> > > Hello,
> > > 
> > > Last time I mentioned this it didn't get any attention, so I'll try
> > > again. By accident (via a buggy synergy server process) I found
> > > that a simple userland process can exhaust kernel pipe memory 
> > > (kern.ipc.pipekva
> > > sysctl) which as a consequence has that new processes which use pipe
> > > cannot be started, which includes "su", by which an administrator
> > > could kill such a process.
> > > 
> > 
> > That's a pretty painful local denial of service :(
> 
> Yes it is. Perhaps there should be 8% fd reserved for root, su and setuid family syscalls like in filesystem space or postgresql reserved connections for db admin.
> 

There is an fd reserve already. Report talks about problems with
creating a new *pipe*, not any fd in general.

That said, supporting a reserve for this one sounds like a good idea and
not that hard to implement - one can either play with atomics and double
check or just place a mutex-protected check in pipespace_new (before
vm_map_find).

I implemented the second one, which turned out to be surprisingly ugly.
For now it abuses PRIV_MAXPROC and has a reserve taken out of the blue.

Thoughts?

diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 6ba52e3..04c4559 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -102,6 +102,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/priv.h>
 #include <sys/ttycom.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
@@ -200,6 +201,7 @@ static struct filterops pipe_wfiltops = {
 #define MAXPIPESIZE (2*PIPE_SIZE/3)
 
 static long amountpipekva;
+static struct mtx amountpipekva_mtx;
 static int pipefragretry;
 static int pipeallocfail;
 static int piperesizefail;
@@ -256,6 +258,8 @@ pipeinit(void *dummy __unused)
 	KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized"));
 	pipedev_ino = devfs_alloc_cdp_inode();
 	KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized"));
+
+	mtx_init(&amountpipekva_mtx, "pipe kva mutex", NULL, MTX_DEF);
 }
 
 static int
@@ -515,24 +519,29 @@ pipespace_new(cpipe, size)
 	KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked"));
 	KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW),
 		("pipespace: resize of direct writes not allowed"));
+
+	mtx_lock(&amountpipekva_mtx);
 retry:
 	cnt = cpipe->pipe_buffer.cnt;
 	if (cnt > size)
 		size = cnt;
 
 	size = round_page(size);
-	buffer = (caddr_t) vm_map_min(pipe_map);
 
-	error = vm_map_find(pipe_map, NULL, 0,
-		(vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
-		VM_PROT_ALL, VM_PROT_ALL, 0);
-	if (error != KERN_SUCCESS) {
+	if (amountpipekva + size + 10 * PAGE_SIZE >= maxpipekva) {
+		if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC, 0) == 0 &&
+		    amountpipekva + size <= maxpipekva)
+			goto alloc;
+recalc:
 		if ((cpipe->pipe_buffer.buffer == NULL) &&
 			(size > SMALL_PIPE_SIZE)) {
 			size = SMALL_PIPE_SIZE;
 			pipefragretry++;
 			goto retry;
 		}
+
+		mtx_unlock(&amountpipekva_mtx);
+
 		if (cpipe->pipe_buffer.buffer == NULL) {
 			pipeallocfail++;
 			if (ppsratecheck(&lastfail, &curfail, 1))
@@ -542,6 +551,20 @@ retry:
 		}
 		return (ENOMEM);
 	}
+alloc:
+	atomic_add_long(&amountpipekva, size);
+	mtx_unlock(&amountpipekva_mtx);
+
+	buffer = (caddr_t) vm_map_min(pipe_map);
+
+	error = vm_map_find(pipe_map, NULL, 0,
+		(vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
+		VM_PROT_ALL, VM_PROT_ALL, 0);
+	if (error != KERN_SUCCESS) {
+		mtx_lock(&amountpipekva_mtx);
+		atomic_subtract_long(&amountpipekva, size);
+		goto recalc;
+	}
 
 	/* copy data, then free old resources if we're resizing */
 	if (cnt > 0) {
@@ -563,7 +586,6 @@ retry:
 	cpipe->pipe_buffer.in = cnt;
 	cpipe->pipe_buffer.out = 0;
 	cpipe->pipe_buffer.cnt = cnt;
-	atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size);
 	return (0);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140408121222.GB30326>