Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Sep 2013 18:53:42 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r255708 - in head: lib/libc/sys sys/compat/freebsd32 sys/kern sys/sys sys/vm usr.bin usr.bin/kdump usr.bin/protect usr.bin/truss
Message-ID:  <201309191853.r8JIrg00056100@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Sep 19 18:53:42 2013
New Revision: 255708
URL: http://svnweb.freebsd.org/changeset/base/255708

Log:
  Extend the support for exempting processes from being killed when swap is
  exhausted.
  - Add a new protect(1) command that can be used to set or revoke protection
    from arbitrary processes.  Similar to ktrace it can apply a change to all
    existing descendants of a process as well as future descendants.
  - Add a new procctl(2) system call that provides a generic interface for
    control operations on processes (as opposed to the debugger-specific
    operations provided by ptrace(2)).  procctl(2) uses a combination of
    idtype_t and an id to identify the set of processes on which to operate
    similar to wait6().
  - Add a PROC_SPROTECT control operation to manage the protection status
    of a set of processes.  MADV_PROTECT still works for backwards
    compatability.
  - Add a p_flag2 to struct proc (and a corresponding ki_flag2 to kinfo_proc)
    the first bit of which is used to track if P_PROTECT should be inherited
    by new child processes.
  
  Reviewed by:	kib, jilles (earlier version)
  Approved by:	re (delphij)
  MFC after:	1 month

Added:
  head/lib/libc/sys/procctl.2   (contents, props changed)
  head/sys/sys/procctl.h   (contents, props changed)
  head/usr.bin/protect/
  head/usr.bin/protect/Makefile   (contents, props changed)
  head/usr.bin/protect/protect.1   (contents, props changed)
  head/usr.bin/protect/protect.c   (contents, props changed)
Modified:
  head/lib/libc/sys/Makefile.inc
  head/lib/libc/sys/Symbol.map
  head/sys/compat/freebsd32/freebsd32.h
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/freebsd32/syscalls.master
  head/sys/kern/init_main.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_proc.c
  head/sys/kern/sys_process.c
  head/sys/kern/syscalls.master
  head/sys/sys/proc.h
  head/sys/sys/syscallsubr.h
  head/sys/sys/user.h
  head/sys/vm/vm_mmap.c
  head/usr.bin/Makefile
  head/usr.bin/kdump/kdump.c
  head/usr.bin/kdump/mksubr
  head/usr.bin/truss/syscall.h
  head/usr.bin/truss/syscalls.c

Modified: head/lib/libc/sys/Makefile.inc
==============================================================================
--- head/lib/libc/sys/Makefile.inc	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/lib/libc/sys/Makefile.inc	Thu Sep 19 18:53:42 2013	(r255708)
@@ -197,6 +197,7 @@ MAN+=	abort2.2 \
 	posix_fadvise.2 \
 	posix_fallocate.2 \
 	posix_openpt.2 \
+	procctl.2 \
 	profil.2 \
 	pselect.2 \
 	ptrace.2 \

Modified: head/lib/libc/sys/Symbol.map
==============================================================================
--- head/lib/libc/sys/Symbol.map	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/lib/libc/sys/Symbol.map	Thu Sep 19 18:53:42 2013	(r255708)
@@ -395,6 +395,7 @@ FBSD_1.3 {
 	ffclock_setestimate;
 	pipe2;
 	posix_fadvise;
+	procctl;
 	wait6;
 };
 
@@ -822,6 +823,8 @@ FBSDprivate_1.0 {
 	__sys_poll;
 	_preadv;
 	__sys_preadv;
+	_procctl;
+	__sys_procctl;
 	_profil;
 	__sys_profil;
 	_pselect;

Added: head/lib/libc/sys/procctl.2
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libc/sys/procctl.2	Thu Sep 19 18:53:42 2013	(r255708)
@@ -0,0 +1,142 @@
+.\" Copyright (c) 2013 Advanced Computing Technologies LLC
+.\" Written by: John H. Baldwin <jhb@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 19, 2013
+.Dt PROCCTL 2
+.Os
+.Sh NAME
+.Nm procctl
+.Nd control processes
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/procctl.h
+.Ft int
+.Fn procctl "idtype_t idtype" "id_t id" "int cmd" "void *arg"
+.Sh DESCRIPTION
+The
+.Fn procctl
+system call provides for control over processes.
+The
+.Fa idtype
+and
+.Fa id
+arguments specify the set of processes to control.
+If multiple processes match the identifier,
+.Nm
+will make a
+.Dq best effort
+to control as many of the selected possibles as possible.
+An error is only returned if no selected processes successfully complete
+the request.
+The following identifier types are supported:
+.Bl -tag -width Dv P_PGID
+.It Dv P_PID
+Control the process with the process ID
+.Fa id .
+.It Dv P_PGID
+Control processes belonging to the process group with the ID
+.Fa id .
+.El
+.Pp
+The control request to perform is specified by the
+.Fa cmd
+argument.
+The following commands are supported:
+.Bl -tag -width Dv PROC_SPROTECT
+.It Dv PROC_SPROTECT
+Set process protection state.
+This is used to mark a process as protected from being killed if the system
+exhausts available memory and swap.
+The
+.Fa arg
+parameter must point to an integer containing an operation and zero or more
+optional flags.
+The following operations are supported:
+.Bl -tag -width Dv PPROT_CLEAR
+.It Dv PPROT_SET
+Mark the selected processes as protected.
+.It Dv PPROT_CLEAR
+Clear the protected state of selected processes.
+.El
+.Pp
+The following optional flags are supported:
+.Bl -tag -width Dv PPROT_DESCEND
+.It Dv PPROT_DESCEND
+Apply the requested operation to all child processes of each selected process
+in addition to each selected process.
+.It Dv PPROT_INHERIT
+When used with
+.Dv PPROT_SET ,
+mark all future child processes of each selected process as protected.
+Future child processes will also mark all of their future child processes.
+.El
+.El
+.Sh RETURN VALUES
+If an error occurs, a value of -1 is returned and
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+The
+.Fn procctl
+system call
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EFAULT
+The
+.Fa arg
+points outside the process's allocated address space.
+.It Bq Er EINVAL
+The
+.Fa cmd
+argument specifies an unsupported command.
+.Pp
+The
+.Fa idtype
+argument specifies an unsupported identifier type.
+.It Bq Er EPERM
+The calling process does not have permission to perform the requested
+operation on any of the selected processes.
+.It Bq Er ESRCH
+No processes matched the requested
+.Fa idtype
+and
+.Fa id .
+.It Bq Er EINVAL
+An invalid operation or flag was passed in
+.Fa arg
+for a
+.Dv PROC_SPROTECT
+command.
+.El
+.Sh SEE ALSO
+.Xr ptrace 2
+.Sh HISTORY
+The
+.Fn procctl
+function appeared in
+.Fx 10 .

Modified: head/sys/compat/freebsd32/freebsd32.h
==============================================================================
--- head/sys/compat/freebsd32/freebsd32.h	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/compat/freebsd32/freebsd32.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -342,6 +342,7 @@ struct kinfo_proc32 {
 	char	ki_loginclass[LOGINCLASSLEN+1];
 	char	ki_sparestrings[50];
 	int	ki_spareints[KI_NSPARE_INT];
+	int	ki_flag2;
 	int	ki_fibnum;
 	u_int	ki_cr_flags;
 	int	ki_jid;

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
@@ -3000,3 +3001,23 @@ convert_sigevent32(struct sigevent32 *si
 	}
 	return (0);
 }
+
+int
+freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
+{
+	void *data;
+	int error, flags;
+
+	switch (uap->com) {
+	case PROC_SPROTECT:
+		error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
+		if (error)
+			return (error);
+		data = &flags;
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
+	    uap->com, data));
+}

Modified: head/sys/compat/freebsd32/syscalls.master
==============================================================================
--- head/sys/compat/freebsd32/syscalls.master	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/compat/freebsd32/syscalls.master	Thu Sep 19 18:53:42 2013	(r255708)
@@ -1056,3 +1056,12 @@
 542	AUE_PIPE	NOPROTO	{ int pipe2(int *fildes, int flags); }
 543	AUE_NULL	NOSTD	{ int freebsd32_aio_mlock( \
 				    struct aiocb32 *aiocbp); }
+#ifdef PAD64_REQUIRED
+544	AUE_NULL	STD	{ int freebsd32_procctl(int idtype, int pad, \
+				    uint32_t id1, uint32_t id2, int com, \
+				    void *data); }
+#else
+544	AUE_NULL	STD	{ int freebsd32_procctl(int idtype, \
+				    uint32_t id1, uint32_t id2, int com, \
+				    void *data); }
+#endif

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/kern/init_main.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -474,6 +474,7 @@ proc0_init(void *dummy __unused)
 
 	p->p_sysent = &null_sysvec;
 	p->p_flag = P_SYSTEM | P_INMEM;
+	p->p_flag2 = 0;
 	p->p_state = PRS_NORMAL;
 	knlist_init_mtx(&p->p_klist, &p->p_mtx);
 	STAILQ_INIT(&p->p_ktr);

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/kern/kern_fork.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -489,6 +489,7 @@ do_fork(struct thread *td, int flags, st
 	 * Increase reference counts on shared objects.
 	 */
 	p2->p_flag = P_INMEM;
+	p2->p_flag2 = 0;
 	p2->p_swtick = ticks;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);
@@ -512,6 +513,11 @@ do_fork(struct thread *td, int flags, st
 	p2->p_fd = fd;
 	p2->p_fdtol = fdtol;
 
+	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
+		p2->p_flag |= P_PROTECTED;
+		p2->p_flag2 |= P2_INHERIT_PROTECTED;
+	}
+
 	/*
 	 * p_limit is copy-on-write.  Bump its refcount.
 	 */

Modified: head/sys/kern/kern_proc.c
==============================================================================
--- head/sys/kern/kern_proc.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/kern/kern_proc.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -802,6 +802,7 @@ fill_kinfo_proc_only(struct proc *p, str
 	kp->ki_fd = p->p_fd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
+	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
@@ -1161,6 +1162,7 @@ freebsd32_kinfo_proc_out(const struct ki
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
+	CP(*ki, *ki32, ki_flag2);
 	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/kern/sys_process.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -41,7 +41,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/rwlock.h>
@@ -1240,3 +1242,196 @@ stopevent(struct proc *p, unsigned int e
 		msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
 	} while (p->p_step);
 }
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	if (p->p_flag & P_SYSTEM || p_cansee(td, p) != 0)
+		return (0);
+	if (flags & PPROT_SET) {
+		p->p_flag |= P_PROTECTED;
+		if (flags & PPROT_INHERIT)
+			p->p_flag2 |= P2_INHERIT_PROTECTED;
+	} else {
+		p->p_flag &= ~P_PROTECTED;
+		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
+	}
+	return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+	struct proc *p;
+	int ret;
+
+	p = top;
+	ret = 0;
+	sx_assert(&proctree_lock, SX_LOCKED);
+	for (;;) {
+		ret |= protect_setchild(td, p, flags);
+		PROC_UNLOCK(p);
+		/*
+		 * If this process has children, descend to them next,
+		 * otherwise do any siblings, and if done with this level,
+		 * follow back up the tree (but not past top).
+		 */
+		if (!LIST_EMPTY(&p->p_children))
+			p = LIST_FIRST(&p->p_children);
+		else for (;;) {
+			if (p == top) {
+				PROC_LOCK(p);
+				return (ret);
+			}
+			if (LIST_NEXT(p, p_sibling)) {
+				p = LIST_NEXT(p, p_sibling);
+				break;
+			}
+			p = p->p_pptr;
+		}
+		PROC_LOCK(p);
+	}
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+	int error, ret;
+
+	switch (PPROT_OP(flags)) {
+	case PPROT_SET:
+	case PPROT_CLEAR:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
+		return (EINVAL);
+
+	error = priv_check(td, PRIV_VM_MADV_PROTECT);
+	if (error)
+		return (error);
+
+	if (flags & PPROT_DESCEND)
+		ret = protect_setchildren(td, p, flags);
+	else
+		ret = protect_setchild(td, p, flags);
+	if (ret == 0)
+		return (EPERM);
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+	idtype_t idtype;
+	id_t	id;
+	int	com;
+	void	*data;
+};
+#endif
+/* ARGSUSED */
+int
+sys_procctl(struct thread *td, struct procctl_args *uap)
+{
+	int error, flags;
+	void *data;
+
+	switch (uap->com) {
+	case PROC_SPROTECT:
+		error = copyin(uap->data, &flags, sizeof(flags));
+		if (error)
+			return (error);
+		data = &flags;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (kern_procctl(td, uap->idtype, uap->id, uap->com, data));
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	switch (com) {
+	case PROC_SPROTECT:
+		return (protect_set(td, p, *(int *)data));
+	default:
+		return (EINVAL);
+	}
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
+{
+	struct pgrp *pg;
+	struct proc *p;
+	int error, first_error, ok;
+
+	sx_slock(&proctree_lock);
+	switch (idtype) {
+	case P_PID:
+		p = pfind(id);
+		if (p == NULL) {
+			error = ESRCH;
+			break;
+		}
+		if (p->p_state == PRS_NEW)
+			error = ESRCH;
+		else
+			error = p_cansee(td, p);
+		if (error == 0)
+			error = kern_procctl_single(td, p, com, data);
+		PROC_UNLOCK(p);
+		break;
+	case P_PGID:
+		/*
+		 * Attempt to apply the operation to all members of the
+		 * group.  Ignore processes in the group that can't be
+		 * seen.  Ignore errors so long as at least one process is
+		 * able to complete the request successfully.
+		 */
+		pg = pgfind(id);
+		if (pg == NULL) {
+			error = ESRCH;
+			break;
+		}
+		PGRP_UNLOCK(pg);
+		ok = 0;
+		first_error = 0;
+		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+			PROC_LOCK(p);
+			if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+			error = kern_procctl_single(td, p, com, data);
+			PROC_UNLOCK(p);
+			if (error == 0)
+				ok = 1;
+			else if (first_error == 0)
+				first_error = error;
+		}
+		if (ok)
+			error = 0;
+		else if (first_error != 0)
+			error = first_error;
+		else
+			/*
+			 * Was not able to see any processes in the
+			 * process group.
+			 */
+			error = ESRCH;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	sx_sunlock(&proctree_lock);
+	return (error);
+}

Modified: head/sys/kern/syscalls.master
==============================================================================
--- head/sys/kern/syscalls.master	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/kern/syscalls.master	Thu Sep 19 18:53:42 2013	(r255708)
@@ -978,5 +978,7 @@
 				    int flags); }
 542	AUE_PIPE	STD	{ int pipe2(int *fildes, int flags); }
 543	AUE_NULL	NOSTD	{ int aio_mlock(struct aiocb *aiocbp); }
+544	AUE_NULL	STD	{ int procctl(idtype_t idtype, id_t id, \
+				    int com, void *data); }
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/sys/proc.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -492,11 +492,8 @@ struct proc {
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
-	/*
-	 * The following don't make too much sense.
-	 * See the td_ or ke_ versions of the same flags.
-	 */
 	int		p_flag;		/* (c) P_* flags. */
+	int		p_flag2;	/* (c) P2_* flags. */
 	enum {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
@@ -641,6 +638,9 @@ struct proc {
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 #define	P_KILLED(p)	((p)->p_flag & P_WKILLED)
 
+/* These flags are kept in p_flag2. */
+#define	P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */
+
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.

Added: head/sys/sys/procctl.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/sys/procctl.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_PROCCTL_H_
+#define	_SYS_PROCCTL_H_
+
+#define	PROC_SPROTECT		1	/* set protected state */
+
+/* Operations for PROC_SPROTECT (passed in integer arg). */
+#define	PPROT_OP(x)	((x) & 0xf)
+#define	PPROT_SET	1
+#define	PPROT_CLEAR	2
+
+/* Flags for PROC_SPROTECT (ORed in with operation). */
+#define	PPROT_FLAGS(x)	((x) & ~0xf)
+#define	PPROT_DESCEND	0x10
+#define	PPROT_INHERIT	0x20
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#include <sys/wait.h>
+
+__BEGIN_DECLS
+int	procctl(idtype_t, id_t, int, void *);
+__END_DECLS
+
+#endif
+
+#endif /* !_SYS_PROCCTL_H_ */

Modified: head/sys/sys/syscallsubr.h
==============================================================================
--- head/sys/sys/syscallsubr.h	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/sys/syscallsubr.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -167,6 +167,8 @@ int	kern_posix_fadvise(struct thread *td
 	    int advice);
 int	kern_posix_fallocate(struct thread *td, int fd, off_t offset,
 	    off_t len);
+int	kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
+	    void *data);
 int	kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
 	    fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);

Modified: head/sys/sys/user.h
==============================================================================
--- head/sys/sys/user.h	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/sys/user.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -84,7 +84,7 @@
  * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
  * function kvm_proclist in lib/libkvm/kvm_proc.c .
  */
-#define	KI_NSPARE_INT	8
+#define	KI_NSPARE_INT	7
 #define	KI_NSPARE_LONG	12
 #define	KI_NSPARE_PTR	6
 
@@ -187,6 +187,7 @@ struct kinfo_proc {
 	 */
 	char	ki_sparestrings[50];	/* spare string space */
 	int	ki_spareints[KI_NSPARE_INT];	/* spare room for growth */
+	int	ki_flag2;		/* P2_* flags */
 	int	ki_fibnum;		/* Default FIB number */
 	u_int	ki_cr_flags;		/* Credential flags */
 	int	ki_jid;			/* Process jail ID */

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/sys/vm/vm_mmap.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/filedesc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
@@ -68,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 #include <sys/conf.h>
 #include <sys/stat.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/vmmeter.h>
 
@@ -739,23 +741,18 @@ sys_madvise(td, uap)
 {
 	vm_offset_t start, end;
 	vm_map_t map;
-	struct proc *p;
-	int error;
+	int flags;
 
 	/*
 	 * Check for our special case, advising the swap pager we are
 	 * "immortal."
 	 */
 	if (uap->behav == MADV_PROTECT) {
-		error = priv_check(td, PRIV_VM_MADV_PROTECT);
-		if (error == 0) {
-			p = td->td_proc;
-			PROC_LOCK(p);
-			p->p_flag |= P_PROTECTED;
-			PROC_UNLOCK(p);
-		}
-		return (error);
+		flags = PPROT_SET;
+		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
+		    PROC_SPROTECT, &flags));
 	}
+
 	/*
 	 * Check for illegal behavior
 	 */

Modified: head/usr.bin/Makefile
==============================================================================
--- head/usr.bin/Makefile	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/usr.bin/Makefile	Thu Sep 19 18:53:42 2013	(r255708)
@@ -132,6 +132,7 @@ SUBDIR=	alias \
 	printenv \
 	printf \
 	procstat \
+	protect \
 	rctl \
 	renice \
 	rev \

Modified: head/usr.bin/kdump/kdump.c
==============================================================================
--- head/usr.bin/kdump/kdump.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/usr.bin/kdump/kdump.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -1161,6 +1161,18 @@ ktrsyscall(struct ktr_syscall *ktr, u_in
 				ip++;
 				narg--;
 				break;
+			case SYS_procctl:
+				putchar('(');
+				idtypename(*ip, decimal);
+				c = ',';
+				ip++;
+				narg--;
+				print_number(ip, narg, c);
+				putchar(',');
+				procctlcmdname(*ip);
+				ip++;
+				narg--;
+				break;
 			}
 		}
 		while (narg > 0) {

Modified: head/usr.bin/kdump/mksubr
==============================================================================
--- head/usr.bin/kdump/mksubr	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/usr.bin/kdump/mksubr	Thu Sep 19 18:53:42 2013	(r255708)
@@ -169,6 +169,7 @@ cat <<_EOF_
 #include <netinet/in.h>
 #include <sys/param.h>
 #include <sys/mount.h>
+#include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/reboot.h>
@@ -465,6 +466,7 @@ auto_or_type     "mountflagsname"      "
 auto_switch_type "msyncflagsname"      "MS_[A-Z]+[[:space:]]+0x[0-9]+"                "sys/mman.h"
 auto_or_type     "nfssvcname"          "NFSSVC_[A-Z0-9]+[[:space:]]+0x[0-9]+"            "nfs/nfssvc.h"
 auto_switch_type "prioname"            "PRIO_[A-Z]+[[:space:]]+[0-9]"                 "sys/resource.h"
+auto_switch_type "procctlcmdname"      "PROC_[A-Z]+[[:space:]]+[0-9]"                 "sys/procctl.h"
 auto_switch_type "ptraceopname"        "PT_[[:alnum:]_]+[[:space:]]+[0-9]+"           "sys/ptrace.h"
 auto_switch_type "quotactlname"        "Q_[A-Z]+[[:space:]]+0x[0-9]+"                 "ufs/ufs/quota.h"
 auto_or_type     "rebootoptname"       "RB_[A-Z]+[[:space:]]+0x[0-9]+"                "sys/reboot.h"

Added: head/usr.bin/protect/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/protect/Makefile	Thu Sep 19 18:53:42 2013	(r255708)
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+PROG=   protect
+WARNS?=	6
+
+.include <bsd.prog.mk>

Added: head/usr.bin/protect/protect.1
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/protect/protect.1	Thu Sep 19 18:53:42 2013	(r255708)
@@ -0,0 +1,89 @@
+.\" Copyright (c) 2013 Advanced Computing Technologies LLC
+.\" Written by: John H. Baldwin <jhb@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 19, 2013
+.Dt PROTECT 1
+.Os
+.Sh NAME
+.Nm protect
+.Nd "protect processes from being killed when swap space is exhausted"
+.Sh SYNOPSIS
+.Nm
+.Op Fl i
+.Ar command
+.Nm
+.Op Fl cdi
+.Fl g Ar pgrp | Fl p Ar pid
+.Sh DESCRIPTION
+The
+.Nm
+command is used to mark processes as protected.
+The kernel does not kill protected processes when swap space is exhausted.
+Note that this protected state is not inherited by child processes by default.
+.Pp
+The options are:
+.Bl -tag -width indent
+.It Fl c
+Remove protection from the specified processes.
+.It Fl d
+Apply the operation to all current children of the specified processes.
+.It Fl i
+Apply the operation to all future children of the specified processes.
+.It Fl g Ar pgrp
+Apply the operation to all processes in the specified process group.
+.It Fl p Ar pid
+Apply the operation to the specified process.
+.It Ar command
+Execute
+.Ar command
+as a protected process.
+.El
+.Pp
+Note that only one of the
+.Fl p
+or
+.Fl g
+flags may be specified when adjusting the state of existing processes.
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Mark the Xorg server as protected:
+.Pp
+.Dl "pgrep Xorg | xargs protect -p"
+Protect all ssh sessions and their child processes:
+.Pp
+.Dl "pgrep sshd | xargs protect -dip"
+Remove protection from all current and future processes:
+.Pp
+.Dl "protect -cdi -p 1"
+.Sh SEE ALSO
+.Xr pprotect 2
+.Sh BUGS
+If you protect a runaway process that allocates all memory the system will
+deadlock.
+.Pp
+Inheritance of the protected state is not yet implemented.

Added: head/usr.bin/protect/protect.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/protect/protect.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -0,0 +1,122 @@
+/*-
+ * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/procctl.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void
+usage(void)
+{
+
+	fprintf(stderr, "usage: protect [-i] command\n");
+	fprintf(stderr, "       protect [-cdi] -g pgrp | -p pid\n");
+	exit(1);
+}
+
+static id_t
+parse_id(char *id)
+{
+	static bool first = true;
+	long value;
+	char *ch;
+
+	if (!first) {
+		warnx("only one -g or -p flag is permitted");
+		usage();
+	}
+	value = strtol(id, &ch, 0);
+	if (*ch != '\0') {
+		warnx("invalid process id");
+		usage();
+	}
+	return (value);
+}
+
+int
+main(int argc, char *argv[])
+{
+	idtype_t idtype;
+	id_t id;
+	int ch, flags;
+	bool descend, inherit, idset;
+
+	idtype = P_PID;
+	id = getpid();
+	flags = PPROT_SET;
+	descend = inherit = idset = false;
+	while ((ch = getopt(argc, argv, "cdig:p:")) != -1)
+		switch (ch) {
+		case 'c':
+			flags = PPROT_CLEAR;
+			break;
+		case 'd':
+			descend = true;
+			break;
+		case 'i':
+			inherit = true;
+			break;
+		case 'g':
+			idtype = P_PGID;
+			id = parse_id(optarg);
+			idset = true;
+			break;
+		case 'p':
+			idtype = P_PID;
+			id = parse_id(optarg);
+			idset = true;
+			break;
+		}
+	argc -= optind;
+	argv += optind;
+
+	if ((idset && argc != 0) || (!idset && (argc == 0 || descend)))
+		usage();
+
+	if (descend)
+		flags |= PPROT_DESCEND;
+	if (inherit)
+		flags |= PPROT_INHERIT;
+	if (procctl(idtype, id, PROC_SPROTECT, &flags) == -1)
+		err(1, "procctl");
+
+	if (argc != 0) {
+		errno = 0;
+		execvp(*argv, argv);
+		err(errno == ENOENT ? 127 : 126, "%s", *argv);
+	}
+	return (0);
+}

Modified: head/usr.bin/truss/syscall.h
==============================================================================
--- head/usr.bin/truss/syscall.h	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/usr.bin/truss/syscall.h	Thu Sep 19 18:53:42 2013	(r255708)
@@ -40,7 +40,7 @@ enum Argtype { None = 1, Hex, Octal, Int
 	Fd_set, Sigaction, Fcntl, Mprot, Mmapflags, Whence, Readlinkres,
 	Umtx, Sigset, Sigprocmask, Kevent, Sockdomain, Socktype, Open,
 	Fcntlflag, Rusage, BinString, Shutdown, Resource, Rlimit, Timeval2,
-	Pathconf, Rforkflags, ExitStatus, Waitoptions, Idtype };
+	Pathconf, Rforkflags, ExitStatus, Waitoptions, Idtype, Procctl };
 
 #define	ARG_MASK	0xff
 #define	OUT	0x100

Modified: head/usr.bin/truss/syscalls.c
==============================================================================
--- head/usr.bin/truss/syscalls.c	Thu Sep 19 18:00:05 2013	(r255707)
+++ head/usr.bin/truss/syscalls.c	Thu Sep 19 18:53:42 2013	(r255708)
@@ -41,6 +41,7 @@ static const char rcsid[] =
 
 #include <sys/types.h>
 #include <sys/mman.h>
+#include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/socket.h>
 #include <sys/time.h>
@@ -270,6 +271,8 @@ static struct syscall syscalls[] = {
 	{ .name = "wait6", .ret_type = 1, .nargs = 6,
 	  .args = { { Idtype, 0 }, { Int, 1 }, { ExitStatus | OUT, 2 },
 		    { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Ptr, 5 } } },
+	{ .name = "procctl", .ret_type = 1, .nargs = 4,
+	  .args = { { Idtype, 0 }, { Int, 1 }, { Procctl, 2 }, { Ptr, 3 } } },
 	{ .name = 0 },
 };
 
@@ -399,6 +402,10 @@ static struct xlat idtype_arg[] = {
 	X(P_CTID) X(P_CPUID) X(P_PSETID) XEND
 };
 
+static struct xlat procctl_arg[] = {
+	X(PROC_SPROTECT) XEND
+};
+
 #undef X
 #undef XEND
 
@@ -1198,6 +1205,9 @@ print_arg(struct syscall_args *sc, unsig
 	case Idtype:
 		tmp = strdup(xlookup(idtype_arg, args[sc->offset]));
 		break;
+	case Procctl:
+		tmp = strdup(xlookup(procctl_arg, args[sc->offset]));
+		break;
 	default:
 		errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK);
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201309191853.r8JIrg00056100>