Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Apr 1999 21:10:28 +0200 (MET DST)
From:      Juergen Lock <nox@jelal.kn-bremen.de>
To:        freebsd-hackers@FreeBSD.org
Cc:        freebsd-emulation@FreeBSD.org, FreeBSD-gnats-submit@FreeBSD.org
Subject:   Re: kern/11287: rfork(RFMEM...) doesn't share LDTs set by i386_set_ldt, breaking wine
Message-ID:  <199904261910.VAA03311@saturn.kn-bremen.de>
In-Reply-To: <199904222239.AAA43095@saturn.kn-bremen.de>

next in thread | previous in thread | raw e-mail | index | archive | help
In article <199904222239.AAA43095@saturn.kn-bremen.de> you write:

>>Description:
>
>	wine now uses kernel threads (rfork()) and expects i386_set_ldt()
>	to work across threads, i.e. the new LDT be global to all threads.
>	rfork() copies the ldt regardless of the RFMEM flag so each thread
>	ends up with its own ldt (sys/i386/i386/vm_machdep.c, cpu_fork()).

>>Fix:

Here's a patch that makes it share the user LDT for rfork(RFTHREAD...),
tested on 3.1-stable.  It works by copying only the pcb_ldt pointer
and copying it to all peers in i386_set_ldt(2).  the status `copied
pointer' is indicated by setting pcb_ldt_len = -1, only p_leader's
pcb_ldt_len holds the real size.

 This appears to fix the wine crashes (more in the newsgroups...)
if you add RFTHREAD to its rfork args.  Everything else works as
before, there is only one `problem': if you rfork(RFTHREAD...) and
then in the parent do an exec() the exec'd program will still share
the LDT as it will still be the p_leader...  But as there is nothing
else besides wine that uses i386_set_ldt(2) and wine doesn't do this
it shouldn't really matter.  (Btw.  if a child exec()s shouldn't it
unlink itself from the p_peers list?  Looks like it currently
doesn't.  Hmm.)

 One other change:  i added a handler for trap 12's at cpu_switch_load_{f,g}s
as i was getting these while testing.  the finished patch doesn't seem
to generate them anymore (only trap 9's for which there already is a
handler), but handling them anyway doesn't hurt, right? :)

 As for style etc., any comments are welcome.  this is only my second
patch to FreeBSD's kernel...

cvs diff: Diffing sys
Index: sys/proc.h
===================================================================
RCS file: /home/cvs/cvs/src/sys/sys/proc.h,v
retrieving revision 1.66.2.2
diff -u -r1.66.2.2 proc.h
--- proc.h	1999/02/23 13:44:36	1.66.2.2
+++ proc.h	1999/04/25 17:35:14
@@ -373,6 +373,7 @@
 void	unsleep __P((struct proc *));
 void	wakeup_one __P((void *chan));
 
+void	cpu_kill9 __P((struct proc *));
 void	cpu_exit __P((struct proc *)) __dead2;
 void	exit1 __P((struct proc *, int)) __dead2;
 void	cpu_fork __P((struct proc *, struct proc *));
cvs diff: Diffing kern
Index: kern/kern_exit.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.71.2.2
diff -u -r1.71.2.2 kern_exit.c
--- kern_exit.c	1999/03/02 00:42:08	1.71.2.2
+++ kern_exit.c	1999/04/26 14:48:47
@@ -41,6 +41,9 @@
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
+#ifdef __i386__
+#include "opt_user_ldt.h"
+#endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -139,6 +142,12 @@
 			 * than the internal signal
 			 */
 			kill(p, &killArgs);
+#ifdef __i386__
+#ifdef USER_LDT
+			/* hook to undo LDT sharing */
+			cpu_kill9(q);
+#endif
+#endif
 			nq = q;
 			q = q->p_peers;
 			/*
cvs diff: Diffing i386/i386
Index: i386/i386/machdep.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/i386/i386/machdep.c,v
retrieving revision 1.322.2.4
diff -u -r1.322.2.4 machdep.c
--- machdep.c	1999/02/17 13:08:41	1.322.2.4
+++ machdep.c	1999/04/26 16:34:31
@@ -815,13 +815,34 @@
 #ifdef USER_LDT
 	/* was i386_user_cleanup() in NetBSD */
 	if (pcb->pcb_ldt) {
-		if (pcb == curpcb) {
-			lldt(_default_ldt);
-			currentldt = _default_ldt;
+		if (pcb->pcb_ldt_len != -1) {
+#ifdef DIAGNOSTIC
+			if (p->p_leader != p)
+				panic("setregs: pcb_ldt_len != -1 in peer");
+#endif
+			if (!p->p_peers) {
+				if (pcb == curpcb) {
+					lldt(_default_ldt);
+					currentldt = _default_ldt;
+				}
+				pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
+				kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
+					pcb->pcb_ldt_len * sizeof(union descriptor));
+			} else {
+				/* XXX what to do here? */
+				printf("setregs: leader exec()ing, keeping shared user ldt\n");
+			}
+#ifdef DIAGNOSTIC
+		} else if (!p->p_leader || p->p_leader == p) {
+			panic("setregs: pcb_ldt_len == -1 in leader");
+#endif
+		} else {
+			if (pcb == curpcb) {
+				lldt(_default_ldt);
+				currentldt = _default_ldt;
+			}
+			pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 		}
-		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
-			pcb->pcb_ldt_len * sizeof(union descriptor));
-		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  	}
 #endif
   
Index: i386/i386/sys_machdep.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/i386/i386/sys_machdep.c,v
retrieving revision 1.38
diff -u -r1.38 sys_machdep.c
--- sys_machdep.c	1998/12/07 21:58:19	1.38
+++ sys_machdep.c	1999/04/26 15:05:02
@@ -259,8 +259,16 @@
 void
 set_user_ldt(struct pcb *pcb)
 {
+	int nldt = pcb->pcb_ldt_len;
+	if (nldt == -1) {
+#ifdef DIAGNOSTIC
+		if (pcb != (struct pcb *)&curproc->p_addr->u_pcb)
+			panic("set_user_ldt: pcb->pcb_ldt_len == -1 and pcb != curproc's");
+#endif
+		nldt = ((struct pcb *)&curproc->p_leader->p_addr->u_pcb)->pcb_ldt_len;
+	}
 	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)pcb->pcb_ldt;
-	gdt_segs[GUSERLDT_SEL].ssd_limit = (pcb->pcb_ldt_len * sizeof(union descriptor)) - 1;
+	gdt_segs[GUSERLDT_SEL].ssd_limit = (nldt * sizeof(union descriptor)) - 1;
 	ssdtosd(&gdt_segs[GUSERLDT_SEL], &gdt[GUSERLDT_SEL].sd);
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 	currentldt = GSEL(GUSERLDT_SEL, SEL_KPL);
@@ -301,6 +309,13 @@
 
 	if (pcb->pcb_ldt) {
 		nldt = pcb->pcb_ldt_len;
+		if (nldt == -1) {
+#ifdef DIAGNOSTIC
+			if (!p->p_leader || p->p_leader == p)
+				panic("i386_get_ldt: pcb_ldt_len == -1 in leader");
+#endif
+			nldt = ((struct pcb *)&p->p_leader->p_addr->u_pcb)->pcb_ldt_len;
+		}
 		num = min(uap->num, nldt);
 		lp = &((union descriptor *)(pcb->pcb_ldt))[uap->start];
 	} else {
@@ -335,7 +350,8 @@
 	int error = 0, i, n;
  	int largest_ld;
 	struct pcb *pcb = &p->p_addr->u_pcb;
-	int s;
+	struct proc *q;
+	int nldt, s;
 	struct i386_set_ldt_args ua, *uap;
 
 	if ((error = copyin(args, &ua, sizeof(struct i386_set_ldt_args))) < 0)
@@ -359,24 +375,54 @@
   		return(EINVAL);
   
   	/* allocate user ldt */
- 	if (!pcb->pcb_ldt || (largest_ld >= pcb->pcb_ldt_len)) {
+	nldt = pcb->pcb_ldt_len;
+	if (nldt == -1) {
+#ifdef DIAGNOSTIC
+		if (!p->p_leader || p->p_leader == p)
+			panic("i386_set_ldt: pcb_ldt_len == -1 in leader");
+#endif
+		nldt = ((struct pcb *)&p->p_leader->p_addr->u_pcb)->pcb_ldt_len;
+	}
+ 	if (!pcb->pcb_ldt || (largest_ld >= nldt)) {
  		union descriptor *new_ldt = (union descriptor *)kmem_alloc(
  			kernel_map, SIZE_FROM_LARGEST_LD(largest_ld));
  		if (new_ldt == NULL) {
  			return ENOMEM;
  		}
  		if (pcb->pcb_ldt) {
- 			bcopy(pcb->pcb_ldt, new_ldt, pcb->pcb_ldt_len
+ 			bcopy(pcb->pcb_ldt, new_ldt, nldt
  				* sizeof(union descriptor));
  			kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
- 				pcb->pcb_ldt_len * sizeof(union descriptor));
+ 				nldt * sizeof(union descriptor));
  		} else {
  			bcopy(ldt, new_ldt, sizeof(ldt));
  		}
-  		pcb->pcb_ldt = (caddr_t)new_ldt;
- 		pcb->pcb_ldt_len = NEW_MAX_LD(largest_ld);
+		/*
+		 * copy pcb_ldt for peers, set their pcb_ldt_len = -1
+		 * to indicate this is a copy
+		 */
+		for (q = p->p_leader; q; q = q->p_peers) {
+			struct pcb *pcb2 = &q->p_addr->u_pcb;
+
+			pcb2->pcb_ldt = (caddr_t)new_ldt;
+			/* the leader gets the real pcb_ldt_len */
+			if (q == p->p_leader)
+				pcb2->pcb_ldt_len = NEW_MAX_LD(largest_ld);
+			else
+				pcb2->pcb_ldt_len = -1;
+			if (pcb2 == curpcb)
+			    set_user_ldt((struct pcb *)&p->p_leader->p_addr->u_pcb);
+		}
+#ifdef DIAGNOSTIC
+		if (!p->p_leader)
+			panic("i386_set_ldt: p_leader == 0");
+  		if (pcb->pcb_ldt != (caddr_t)new_ldt)
+			panic("i386_set_ldt: pcb->pcb_ldt != new_ldt");
+#endif
+#if 0
  		if (pcb == curpcb)
  		    set_user_ldt(pcb);
+#endif
   	}
 
 	/* Check descriptors for access violations */
Index: i386/i386/trap.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/i386/i386/trap.c,v
retrieving revision 1.133
diff -u -r1.133 trap.c
--- trap.c	1999/01/06 23:05:36	1.133
+++ trap.c	1999/04/26 13:44:35
@@ -434,6 +434,29 @@
 
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
+			if (intr_nesting_level == 0) {
+				/*
+				 * Invalid %fs's and %gs's can be created using
+				 * procfs or PT_SETREGS or by invalidating the
+				 * underlying LDT entry.  This causes a fault
+				 * in kernel mode when the kernel attempts to
+				 * switch contexts.  Lose the bad context
+				 * (XXX) so that we can continue, and generate
+				 * a signal.
+				 */
+				if (frame.tf_eip == (int)cpu_switch_load_fs
+				    && curpcb->pcb_fs) {
+					curpcb->pcb_fs = 0;
+					psignal(p, SIGBUS);
+					return;
+				}
+				if (frame.tf_eip == (int)cpu_switch_load_gs
+				    && curpcb->pcb_gs) {
+					curpcb->pcb_gs = 0;
+					psignal(p, SIGBUS);
+					return;
+				}
+			}
 			(void) trap_pfault(&frame, FALSE, eva);
 			return;
 
Index: i386/i386/vm_machdep.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/i386/i386/vm_machdep.c,v
retrieving revision 1.115
diff -u -r1.115 vm_machdep.c
--- vm_machdep.c	1999/01/06 23:05:37	1.115
+++ vm_machdep.c	1999/04/26 15:31:35
@@ -173,11 +173,32 @@
         /* Copy the LDT, if necessary. */
         if (pcb2->pcb_ldt != 0) {
                 union descriptor *new_ldt;
-                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
+                int nldt = pcb2->pcb_ldt_len;
 
-                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
-                bcopy(pcb2->pcb_ldt, new_ldt, len);
-                pcb2->pcb_ldt = (caddr_t)new_ldt;
+		if (nldt == -1) {
+#ifdef DIAGNOSTIC
+			if (!p2->p_leader || p2->p_leader == p2)
+				panic("cpu_fork: pcb_ldt_len == -1 in leader");
+#endif
+			nldt = ((struct pcb *)&p2->p_leader->p_addr->u_pcb)->pcb_ldt_len;
+		}
+		if (p2->p_leader == p1->p_leader) {
+			/*
+			 * this is a rfork(RFTHREAD|...),
+			 * indicate pcb_ldt is a copy
+			 */
+			pcb2->pcb_ldt_len = -1;
+#ifdef DIAGNOSTIC
+			if (p2->p_leader == p2)
+				panic("cpu_fork: p2->p_leader == p1->p_leader and p2 is leader");
+#endif
+		} else {
+			new_ldt = (union descriptor *)kmem_alloc(kernel_map,
+				nldt * sizeof(union descriptor));
+			bcopy(pcb2->pcb_ldt, new_ldt,
+				nldt * sizeof(union descriptor));
+			pcb2->pcb_ldt = (caddr_t)new_ldt;
+		}
         }
 #endif
 
@@ -240,8 +261,13 @@
 			lldt(_default_ldt);
 			currentldt = _default_ldt;
 		}
-		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
-			pcb->pcb_ldt_len * sizeof(union descriptor));
+		if (pcb->pcb_ldt_len != -1)
+			kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
+				pcb->pcb_ldt_len * sizeof(union descriptor));
+#ifdef DIAGNOSTIC
+		else if (!p->p_leader || p->p_leader == p)
+			panic("cpu_exit: pcb_ldt_len == -1 in leader");
+#endif
 		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 	}
 #endif
@@ -249,6 +275,25 @@
 	cpu_switch(p);
 	panic("cpu_exit");
 }
+
+#ifdef USER_LDT
+void
+cpu_kill9(p)
+	register struct proc *p;
+{
+	struct pcb *pcb = &p->p_addr->u_pcb; 
+	/*
+	 * hook to undo ldt sharing:
+	 * we are going to be SIGKILL'd so we can just forget our ldt
+	 */
+	if (pcb->pcb_ldt_len == -1)
+		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
+#ifdef DIAGNOSTIC
+	if (pcb == curpcb)
+		panic("cpu_kill9: pcb == curpcb");
+#endif
+}
+#endif
 
 void
 cpu_wait(p)
cvs diff: Diffing pc98/i386
Index: pc98/i386/machdep.c
===================================================================
RCS file: /home/cvs/cvs/src/sys/pc98/i386/machdep.c,v
retrieving revision 1.105.2.3
diff -u -r1.105.2.3 machdep.c
--- machdep.c	1999/02/19 14:39:52	1.105.2.3
+++ machdep.c	1999/04/26 16:34:38
@@ -828,13 +828,34 @@
 #ifdef USER_LDT
 	/* was i386_user_cleanup() in NetBSD */
 	if (pcb->pcb_ldt) {
-		if (pcb == curpcb) {
-			lldt(_default_ldt);
-			currentldt = _default_ldt;
+		if (pcb->pcb_ldt_len != -1) {
+#ifdef DIAGNOSTIC
+			if (p->p_leader != p)
+				panic("setregs: pcb_ldt_len != -1 in peer");
+#endif
+			if (!p->p_peers) {
+				if (pcb == curpcb) {
+					lldt(_default_ldt);
+					currentldt = _default_ldt;
+				}
+				pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
+				kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
+					pcb->pcb_ldt_len * sizeof(union descriptor));
+			} else {
+				/* XXX what to do here? */
+				printf("setregs: leader exec()ing, keeping shared user ldt\n");
+			}
+#ifdef DIAGNOSTIC
+		} else if (!p->p_leader || p->p_leader == p) {
+			panic("setregs: pcb_ldt_len == -1 in leader");
+#endif
+		} else {
+			if (pcb == curpcb) {
+				lldt(_default_ldt);
+				currentldt = _default_ldt;
+			}
+			pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 		}
-		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
-			pcb->pcb_ldt_len * sizeof(union descriptor));
-		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  	}
 #endif
   
 Happy hacking,
-- 
Juergen Lock <nox.foo@jelal.kn-bremen.de>
(remove dot foo from address to reply)


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199904261910.VAA03311>