Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 7 Jan 2013 20:22:35 +0200
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        arch@freebsd.org
Cc:        toolchain@freebsd.org
Subject:   Fast sigblock (AKA rtld speedup)
Message-ID:  <20130107182235.GA65279@kib.kiev.ua>

next in thread | raw e-mail | index | archive | help

--sdtB3X0nJg68CQEu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

Below is the forward of the patch for which I failed to obtain a private
review. Might be, the list generates more responses.

Our rtld has a performance bootleneck, typically exposed by the images
with the lot of the run-time relocation processing, and by the C++
exception handling. We block the signals delivery during the rtld
performing the relocations, as well as for the dl_iterate_phdr(3) (the
later is used for finding the dwarf unwinding tables).

The signal blocking is needed to allow the rtld to resolve the symbols
for the signal handlers in the safe way, but also causes 2 syscalls
overhead per each rtld entry.

The proposed approach allows to shave off those two syscalls, doubling
the FreeBSD performance for the (silly) throw/catch C++ microbenchmark.

Date: Mon, 13 Aug 2012 15:26:00 +0300
=46rom: Konstantin Belousov <kostikbel@gmail.com>

=2E..

The basic idea is to implement sigprocmask() as single write into usermode
address. If kernel needs to calculate the signal mask for current thread,
it takes into the consideration non-zero value of the word at the agreed
address. Also, usermode is informed about signals which were put on hold
due to fast sigblock active.

As I said, on my measurements in microbenchmark that did throw/catch in
a loop, I see equal user and system time spent for unpatched system, and
same user time with zero system time on patched system.

Patch can be improved further, e.g. it would be nice to allow rtld to fall
back to sigprocmask(2) if kernel does not support fast sigblock, to prevent
flag day. Also, the mask enforced by fast sigblock can be made configurable.

Note that libthr already blocks signals by catching them, and not using rtld
service in the first line handler. I tried to make the change in the spirit
of libthr interceptors, but handoff to libthr appears too complicated to
work. In fact, libthr can be changed to start using fast sigblock instead
of wrapping sigaction, but this is out of scope of the proposal right now.

Please comment.

diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index 6888ea0..3b75539 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -546,6 +546,7 @@ FBSDprivate_1.0 {
 	__sys_extattr_set_link;
 	_extattrctl;
 	__sys_extattrctl;
+	__sys_fast_sigblock;
 	_fchdir;
 	__sys_fchdir;
 	_fchflags;
diff --git a/libexec/rtld-elf/rtld_lock.c b/libexec/rtld-elf/rtld_lock.c
index d1563e5..50c52c6 100644
--- a/libexec/rtld-elf/rtld_lock.c
+++ b/libexec/rtld-elf/rtld_lock.c
@@ -43,6 +43,7 @@
  */
=20
 #include <sys/param.h>
+#include <sys/signalvar.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <time.h>
@@ -59,8 +60,8 @@ typedef struct Struct_Lock {
 	void *base;
 } Lock;
=20
-static sigset_t fullsigmask, oldsigmask;
 static int thread_flag;
+static uint32_t fsigblock;
=20
 static void *
 def_lock_create()
@@ -111,18 +112,26 @@ def_rlock_acquire(void *lock)
 }
=20
 static void
+sig_fastunblock(void)
+{
+	uint32_t oldval;
+
+	oldval =3D atomic_fetchadd_32(&fsigblock, -FAST_SIGBLOCK_INC);
+	if (oldval =3D=3D (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC))
+		__sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL);
+}
+
+static void
 def_wlock_acquire(void *lock)
 {
     Lock *l =3D (Lock *)lock;
-    sigset_t tmp_oldsigmask;
=20
     for ( ; ; ) {
-	sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
+	atomic_add_rel_32(&fsigblock, FAST_SIGBLOCK_INC);
 	if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
 	    break;
-	sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
+	sig_fastunblock();
     }
-    oldsigmask =3D tmp_oldsigmask;
 }
=20
 static void
@@ -134,7 +143,7 @@ def_lock_release(void *lock)
     	atomic_add_rel_int(&l->lock, -RC_INCR);
     else {
     	atomic_add_rel_int(&l->lock, -WAFLAG);
-    	sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
+	sig_fastunblock();
     }
 }
=20
@@ -286,19 +295,7 @@ lockdflt_init()
=20
     memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
     _rtld_thread_init(NULL);
-    /*
-     * Construct a mask to block all signals except traps which might
-     * conceivably be generated within the dynamic linker itself.
-     */
-    sigfillset(&fullsigmask);
-    sigdelset(&fullsigmask, SIGILL);
-    sigdelset(&fullsigmask, SIGTRAP);
-    sigdelset(&fullsigmask, SIGABRT);
-    sigdelset(&fullsigmask, SIGEMT);
-    sigdelset(&fullsigmask, SIGFPE);
-    sigdelset(&fullsigmask, SIGBUS);
-    sigdelset(&fullsigmask, SIGSEGV);
-    sigdelset(&fullsigmask, SIGSYS);
+    __sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &fsigblock);
 }
=20
 /*
@@ -319,7 +316,10 @@ _rtld_thread_init(struct RtldLockInfo *pli)
=20
 	if (pli =3D=3D NULL)
 		pli =3D &deflockinfo;
-
+	else {
+		fsigblock =3D 0;
+		__sys_fast_sigblock(FAST_SIGBLOCK_UNSETPTR, NULL);
+	}
=20
 	for (i =3D 0; i < RTLD_LOCK_CNT; i++)
 		if ((locks[i] =3D pli->lock_create()) =3D=3D NULL)
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/sy=
scalls.master
index 0891e41..f9e8b9e 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -997,3 +997,4 @@
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
+532	AUE_NULL	NOPROTO	{ int fast_sigblock(int cmd, uint32_t *ptr); }
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 90f7311..8a3cd15 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1031,6 +1031,7 @@ exec_new_vmspace(imgp, sv)
 	int error;
 	struct proc *p =3D imgp->proc;
 	struct vmspace *vmspace =3D p->p_vmspace;
+	struct thread *td =3D curthread;
 	vm_object_t obj;
 	vm_offset_t sv_minuser, stack_addr;
 	vm_map_t map;
@@ -1039,6 +1040,10 @@ exec_new_vmspace(imgp, sv)
 	imgp->vmspace_destroyed =3D 1;
 	imgp->sysent =3D sv;
=20
+	td->td_pflags &=3D ~TDP_FAST_SIGBLOCK;
+	td->td_sigblock_ptr =3D NULL;
+	td->td_sigblock_val =3D 0;
+
 	/* May be called with Giant held */
 	EVENTHANDLER_INVOKE(process_exec, p, imgp);
=20
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 2685a8b..3c8a2f7 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -230,6 +230,7 @@ static int sigproptbl[NSIG] =3D {
 };
=20
 static void reschedule_signals(struct proc *p, sigset_t block, int flags);
+static sigset_t fastblock_mask;
=20
 static void
 sigqueue_start(void)
@@ -240,6 +241,16 @@ sigqueue_start(void)
 	p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
 	p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
+	SIGFILLSET(fastblock_mask);
+	SIG_CANTMASK(fastblock_mask);
+	SIGDELSET(fastblock_mask, SIGILL);
+	SIGDELSET(fastblock_mask, SIGTRAP);
+	SIGDELSET(fastblock_mask, SIGABRT);
+	SIGDELSET(fastblock_mask, SIGEMT);
+	SIGDELSET(fastblock_mask, SIGFPE);
+	SIGDELSET(fastblock_mask, SIGBUS);
+	SIGDELSET(fastblock_mask, SIGSEGV);
+	SIGDELSET(fastblock_mask, SIGSYS);
 }
=20
 ksiginfo_t *
@@ -2525,6 +2536,7 @@ issignal(struct thread *td, int stop_allowed)
 	struct sigqueue *queue;
 	sigset_t sigpending;
 	int sig, prop, newsig;
+	uint32_t oldval;
=20
 	p =3D td->td_proc;
 	ps =3D p->p_sigacts;
@@ -2541,6 +2553,32 @@ issignal(struct thread *td, int stop_allowed)
 			SIG_STOPSIGMASK(sigpending);
 		if (SIGISEMPTY(sigpending))	/* no signal to send */
 			return (0);
+
+		/*
+		 * Do fast sigblock if requested by usermode.  Since
+		 * we do know that there was a signal pending at this
+		 * point, set the FAST_SIGBLOCK_PEND as indicator for
+		 * usermode to perform a dummy call to
+		 * FAST_SIGBLOCK_UNBLOCK, which causes immediate
+		 * delivery of postponed pending signal.
+		 */
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) !=3D 0) {
+			if (td->td_sigblock_val !=3D 0)
+				SIGSETNAND(sigpending, fastblock_mask);
+			if (SIGISEMPTY(sigpending)) {
+				oldval =3D fuword32(td->td_sigblock_ptr);
+				if (oldval =3D=3D -1) {
+					fetch_fast_sigblock_failed(td, 0);
+					return (0);
+				}
+				oldval |=3D FAST_SIGBLOCK_PEND;
+				if (suword32(td->td_sigblock_ptr, oldval) =3D=3D -1)
+					fetch_fast_sigblock_failed(td, 1);
+				td->td_sigblock_val =3D oldval;
+				return (0);
+			}
+		}
+
 		sig =3D sig_ffs(&sigpending);
=20
 		if (p->p_stops & S_SIG) {
@@ -3456,3 +3494,92 @@ sigacts_shared(struct sigacts *ps)
 	mtx_unlock(&ps->ps_mtx);
 	return (shared);
 }
+
+int
+sys_fast_sigblock(struct thread *td, struct fast_sigblock_args *uap)
+{
+	struct proc *p;
+	int error;
+	uint32_t oldval;
+
+	error =3D 0;
+	switch (uap->cmd) {
+	case FAST_SIGBLOCK_SETPTR:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) !=3D 0)
+			error =3D EBUSY;
+		else if (((uintptr_t)(uap->ptr) & (sizeof(uint32_t) - 1)) !=3D 0)
+			error =3D EINVAL;
+		else {
+			td->td_pflags |=3D TDP_FAST_SIGBLOCK;
+			td->td_sigblock_ptr =3D uap->ptr;
+		}
+		break;
+
+	case FAST_SIGBLOCK_UNBLOCK:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) !=3D 0) {
+			oldval =3D casuword32(td->td_sigblock_ptr,
+			    FAST_SIGBLOCK_PEND, 0);
+			if (oldval =3D=3D (uint32_t)-1)
+				error =3D EFAULT;
+			else if (oldval !=3D FAST_SIGBLOCK_PEND)
+				error =3D EBUSY;
+			else
+				td->td_sigblock_val =3D 0;
+		} else
+			error =3D EDOOFUS;
+
+		/*
+		 * Rely on normal ast mechanism to deliver pending
+		 * signals to current thread.  But notify others about
+		 * fake unblock.
+		 */
+		p =3D td->td_proc;
+		if (error =3D=3D 0 && p->p_numthreads !=3D 1) {
+			PROC_LOCK(p);
+			reschedule_signals(p, td->td_sigmask, 0);
+			PROC_UNLOCK(p);
+		}
+		break;
+
+	case FAST_SIGBLOCK_UNSETPTR:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) !=3D 0) {
+			error =3D copyin(td->td_sigblock_ptr, &oldval,
+			    sizeof(uint32_t));
+			if (error !=3D 0)
+				break;
+			if (oldval !=3D 0 && oldval !=3D FAST_SIGBLOCK_PEND)
+				error =3D EBUSY;
+			else {
+				td->td_pflags &=3D ~TDP_FAST_SIGBLOCK;
+				td->td_sigblock_val =3D 0;
+			}
+		} else
+			error =3D EDOOFUS;
+		break;
+	}
+	return (error);
+}
+
+void
+fetch_fast_sigblock(struct thread *td)
+{
+
+	if ((td->td_pflags & TDP_FAST_SIGBLOCK) =3D=3D 0)
+		return;
+	td->td_sigblock_val =3D fuword32(td->td_sigblock_ptr);
+	if (td->td_sigblock_val =3D=3D -1)
+		fetch_fast_sigblock_failed(td, 0);
+}
+
+void
+fetch_fast_sigblock_failed(struct thread *td, int write)
+{
+	ksiginfo_t ksi;
+
+	td->td_sigblock_val =3D 0;
+	ksiginfo_init_trap(&ksi);
+	ksi.ksi_signo =3D SIGSEGV;
+	ksi.ksi_code =3D write ? SEGV_ACCERR :  SEGV_MAPERR;
+	ksi.ksi_addr =3D td->td_sigblock_ptr;
+	trapsignal(td, &ksi);
+}
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index 5aee684..77b250d 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -131,6 +131,12 @@ syscallenter(struct thread *td, struct syscall_args *s=
a)
 			    sa->callp, sa->args, 0);
 #endif
=20
+		/*
+		 * Fetch fast sigblock value at the time of syscall
+		 * entry because sleepqueue primitives might call
+		 * cursig().
+		 */
+		fetch_fast_sigblock(td);
 		AUDIT_SYSCALL_ENTER(sa->code, td);
 		error =3D (sa->callp->sy_call)(td, sa->args);
 		AUDIT_SYSCALL_EXIT(error, td);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 095bbdf..66e485b 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -237,6 +237,7 @@ ast(struct trapframe *framep)
 	 */
 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
 	    !SIGISEMPTY(p->p_siglist)) {
+		fetch_fast_sigblock(td);
 		PROC_LOCK(p);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig =3D cursig(td, SIG_STOP_ALLOWED)) !=3D 0)
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index f62dad7..28a9393 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -951,5 +951,6 @@
 				    off_t offset, off_t len); }
 531	AUE_NULL	STD	{ int posix_fadvise(int fd, off_t offset, \
 				    off_t len, int advice); }
+532	AUE_NULL	STD	{ int fast_sigblock(int cmd, uint32_t *ptr); }
 ; Please copy any additions and changes to the following compatability tab=
les:
 ; sys/compat/freebsd32/syscalls.master
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 06df632..4899ca2 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -272,6 +272,9 @@ struct thread {
 	struct osd	td_osd;		/* (k) Object specific data. */
 	struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
 	pid_t		td_dbg_forked;	/* (c) Child pid for debugger. */
+	void		*td_sigblock_ptr; /* (k) uptr for fast sigblock. */
+	uint32_t	td_sigblock_val;  /* (k) fast sigblock value at
+					     kernel entry. */
 #define	td_endzero td_sigmask
=20
 /* Copied during fork1() or create_thread(). */
@@ -424,6 +427,7 @@ do {									\
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
+#define	TDP_FAST_SIGBLOCK 0x20000000 /* Fast sigblock active */
=20
 /*
  * Reasons that the current thread can not be run yet.
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 71685e7..68cca58 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -250,6 +250,20 @@ typedef struct sigqueue {
 /* Flags for ksi_flags */
 #define	SQ_INIT	0x01
=20
+/*
+ * Fast_sigblock
+ */
+#define	FAST_SIGBLOCK_SETPTR	1
+#define	FAST_SIGBLOCK_UNBLOCK	2
+#define	FAST_SIGBLOCK_UNSETPTR	3
+
+#define	FAST_SIGBLOCK_PEND	0x1
+#define	FAST_SIGBLOCK_INC	0x10
+
+#ifndef _KERNEL
+int __sys_fast_sigblock(int cmd, void *ptr);
+#endif
+
 #ifdef _KERNEL
=20
 /* Return nonzero if process p has an unmasked pending signal. */
@@ -329,6 +343,8 @@ extern struct mtx	sigio_lock;
=20
 int	cursig(struct thread *td, int stop_allowed);
 void	execsigs(struct proc *p);
+void	fetch_fast_sigblock(struct thread *td);
+void	fetch_fast_sigblock_failed(struct thread *td, int write);
 void	gsignal(int pgid, int sig, ksiginfo_t *ksi);
 void	killproc(struct proc *p, char *why);
 ksiginfo_t * ksiginfo_alloc(int wait);



----- End forwarded message -----

--sdtB3X0nJg68CQEu
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)

iQIcBAEBAgAGBQJQ6xJqAAoJEJDCuSvBvK1BiSMP/RrPt9uEu5jsbL+GbZ4EPALD
veW06QvySP6TQ65jBFHoimZGXn3uSdduXEPBTqXsd5ECgbuptcy+luWORkc3b8Yz
NuoTczRd6WJCC0J9WsxC3FgBVynq1iBsARfe4sg+SvpmlwnIkKON22E3FSy6k42z
eYN/IDtYrC0nRzDSbBtkuIJ7QSXs2btY6vJHQhBMjhM/O7UtoZMF8tx/2T0khbvR
Dt/HAM6HJBUYDOuYMvUJCx7NgogXcbHYDHD4FHpGriBmFNXxtAy/RkP0Xuv+9MO9
gQ5S22Uj/hkxwiz/nHMm+QNRNtqokywATLPm7LIwykMvPiwc0AwhXkKXFu87myjI
Lk7aKFk/MuXERwZ6X8FU9ieodO4eJECgXF3Xsmkw3QxMk1KyhrslXbmyl7wLnuel
dFHlOMAMjKx7Xka7sm3hZNIpztDJKS6cdys+VYYxHj2j0T45hEOO4Qk643dFb2sa
a+DQdRi1p/dBY4fu1nmdRgGtbzl3u7ClqyEf9j+lWrLawgQ4QkWtUNlBBX8hohLz
FuGNzMDRQs7UupUVHJfaHpoBKzRg1hhKczGux1pdd4wLTZBtvb47At4tVGquGyMT
sDrN8vK/jPofhqTPg2IkRI1LnRQ6BZ5anwRiDmewO7QTyBkngO7Cdd58hirwQC2o
0PU6sx0kaeTntQH7hMVL
=G0Up
-----END PGP SIGNATURE-----

--sdtB3X0nJg68CQEu--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130107182235.GA65279>