Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 24 Mar 2018 12:57:58 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r331486 - head/sys/amd64/ia32
Message-ID:  <201803241257.w2OCvwwT018532@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sat Mar 24 12:57:58 2018
New Revision: 331486
URL: https://svnweb.freebsd.org/changeset/base/331486

Log:
  Improve the lcall $7,$0 syscall emulation on amd64.
  
  Current code, which copies the potential syscall arguments into the
  current frame, puts an arbitrary limit on the number of syscall
  arguments.  Apparently, mmap(2) and lseek(2) (?) require larger
  number.  But there is an issue that stack is only need to be mapped to
  contain the number of arguments required by the syscall, so copying
  arbitrary large number of words from the stack is not completely safe.
  
  Use different approach to convert lcall frame into int $0x80 frame in
  place, by doing the retl in kernel.  This also allows to stop proceed
  vfork case specially, and stop making assumptions about %cs at the
  syscall time.
  
  Also, improve comments with the formulations provided by bde.
  
  Reviewed and tested by:	bde
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week

Modified:
  head/sys/amd64/ia32/ia32_sigtramp.S
  head/sys/amd64/ia32/ia32_syscall.c

Modified: head/sys/amd64/ia32/ia32_sigtramp.S
==============================================================================
--- head/sys/amd64/ia32/ia32_sigtramp.S	Sat Mar 24 12:48:10 2018	(r331485)
+++ head/sys/amd64/ia32/ia32_sigtramp.S	Sat Mar 24 12:57:58 2018	(r331486)
@@ -78,44 +78,23 @@ ia32_osigcode:
 1:
 	jmp	1b
 
-
 /*
- * The lcall $7,$0 emulator cannot use the call gate that does an
- * inter-privilege transition. The reason is that the call gate
- * does not disable interrupts, and, before the swapgs is
- * executed, we would have a window where the ring 0 code is
- * executed with the wrong gsbase.
+ * Our lcall $7,$0 handler remains in user mode (ring 3), since lcalls
+ * don't change the interrupt mask, so if this one went directly to the
+ * kernel then there would be a window with interrupts enabled in kernel
+ * mode, and all interrupt handlers would have to be almost as complicated
+ * as the NMI handler to support this.
  *
- * Instead, set LDT descriptor 0 as code segment, which reflects
- * the lcall $7,$0 back to ring 3 trampoline.  The trampoline sets up
- * the frame for int $0x80.
+ * Instead, convert the lcall to an int0x80 call.  The kernel does most
+ * of the conversion by popping the lcall return values off the user
+ * stack and returning to them instead of to here, except when the
+ * conversion itself fails.  Adjusting the stack here is impossible for
+ * vfork() and harder for other syscalls.
  */
 	ALIGN_TEXT
 lcall_tramp:
-	cmpl	$SYS_vfork,%eax
-	je	1f
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	0x24(%ebp) /* arg 6 */
-	pushl	0x20(%ebp)
-	pushl	0x1c(%ebp)
-	pushl	0x18(%ebp)
-	pushl	0x14(%ebp)
-	pushl	0x10(%ebp) /* arg 1 */
-	subl	$4,%esp   /* gap */
 	int	$0x80
-	leavel
-	lretl
-1:
-	/*
-	 * vfork handling is special and relies on the libc stub saving
-	 * the return ip in %ecx.  Also, we assume that the call was done
-	 * with ucode32 selector in %cs.
-	 */
-	int	$0x80
-	movl	$0x33,4(%esp)	/* GUCODE32_SEL | SEL_UPL */
-	movl	%ecx,(%esp)
-	lretl
+1:	jmp	1b
 #endif
 
 	ALIGN_TEXT

Modified: head/sys/amd64/ia32/ia32_syscall.c
==============================================================================
--- head/sys/amd64/ia32/ia32_syscall.c	Sat Mar 24 12:48:10 2018	(r331485)
+++ head/sys/amd64/ia32/ia32_syscall.c	Sat Mar 24 12:57:58 2018	(r331486)
@@ -116,10 +116,38 @@ ia32_fetch_syscall_args(struct thread *td)
 	caddr_t params;
 	u_int32_t args[8], tmp;
 	int error, i;
+#ifdef COMPAT_43
+	u_int32_t eip;
+	int cs;
+#endif
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
+
+#ifdef COMPAT_43
+	if (__predict_false(frame->tf_cs == 7 && frame->tf_rip == 2)) {
+		/*
+		 * In lcall $7,$0 after int $0x80.  Convert the user
+		 * frame to what it would be for a direct int 0x80 instead
+		 * of lcall $7,$0, by popping the lcall return address.
+		 */
+		error = fueword32((void *)frame->tf_rsp, &eip);
+		if (error == -1)
+			return (EFAULT);
+		cs = fuword16((void *)(frame->tf_rsp + sizeof(u_int32_t)));
+		if (cs == -1)
+			return (EFAULT);
+
+		/*
+		 * Unwind in-kernel frame after all stack frame pieces
+		 * were successfully read.
+		 */
+		frame->tf_rip = eip;
+		frame->tf_cs = cs;
+		frame->tf_rsp += 2 * sizeof(u_int32_t);
+	}
+#endif
 
 	params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
 	sa->code = frame->tf_rax;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803241257.w2OCvwwT018532>