Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Apr 1999 12:30:02 -0700 (PDT)
From:      Juergen Lock <nox@jelal.kn-bremen.de>
To:        freebsd-bugs@FreeBSD.org
Subject:   Re: kern/11287: rfork(RFMEM...) doesn't share LDTs set by i386_set_ldt, breaking wine
Message-ID:  <199904261930.MAA91435@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/11287; it has been noted by GNATS.

From: Juergen Lock <nox@jelal.kn-bremen.de>
To: freebsd-hackers@FreeBSD.org
Cc: freebsd-emulation@FreeBSD.org, FreeBSD-gnats-submit@FreeBSD.org
Subject: Re: kern/11287: rfork(RFMEM...) doesn't share LDTs set by i386_set_ldt, breaking wine
Date: Mon, 26 Apr 1999 21:10:28 +0200 (MET DST)

 In article <199904222239.AAA43095@saturn.kn-bremen.de> you write:
 
 >>Description:
 >
 >	wine now uses kernel threads (rfork()) and expects i386_set_ldt()
 >	to work across threads, i.e. the new LDT be global to all threads.
 >	rfork() copies the ldt regardless of the RFMEM flag so each thread
 >	ends up with its own ldt (sys/i386/i386/vm_machdep.c, cpu_fork()).
 
 >>Fix:
 
 Here's a patch that makes it share the user LDT for rfork(RFTHREAD...),
 tested on 3.1-stable.  It works by copying only the pcb_ldt pointer
 and copying it to all peers in i386_set_ldt(2).  the status `copied
 pointer' is indicated by setting pcb_ldt_len = -1, only p_leader's
 pcb_ldt_len holds the real size.
 
  This appears to fix the wine crashes (more in the newsgroups...)
 if you add RFTHREAD to its rfork args.  Everything else works as
 before, there is only one `problem': if you rfork(RFTHREAD...) and
 then in the parent do an exec() the exec'd program will still share
 the LDT as it will still be the p_leader...  But as there is nothing
 else besides wine that uses i386_set_ldt(2) and wine doesn't do this
 it shouldn't really matter.  (Btw.  if a child exec()s shouldn't it
 unlink itself from the p_peers list?  Looks like it currently
 doesn't.  Hmm.)
 
  One other change:  i added a handler for trap 12's at cpu_switch_load_{f,g}s
 as i was getting these while testing.  the finished patch doesn't seem
 to generate them anymore (only trap 9's for which there already is a
 handler), but handling them anyway doesn't hurt, right? :)
 
  As for style etc., any comments are welcome.  this is only my second
 patch to FreeBSD's kernel...
 
 cvs diff: Diffing sys
 Index: sys/proc.h
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/sys/proc.h,v
 retrieving revision 1.66.2.2
 diff -u -r1.66.2.2 proc.h
 --- proc.h	1999/02/23 13:44:36	1.66.2.2
 +++ proc.h	1999/04/25 17:35:14
 @@ -373,6 +373,7 @@
  void	unsleep __P((struct proc *));
  void	wakeup_one __P((void *chan));
  
 +void	cpu_kill9 __P((struct proc *));
  void	cpu_exit __P((struct proc *)) __dead2;
  void	exit1 __P((struct proc *, int)) __dead2;
  void	cpu_fork __P((struct proc *, struct proc *));
 cvs diff: Diffing kern
 Index: kern/kern_exit.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/kern/kern_exit.c,v
 retrieving revision 1.71.2.2
 diff -u -r1.71.2.2 kern_exit.c
 --- kern_exit.c	1999/03/02 00:42:08	1.71.2.2
 +++ kern_exit.c	1999/04/26 14:48:47
 @@ -41,6 +41,9 @@
  
  #include "opt_compat.h"
  #include "opt_ktrace.h"
 +#ifdef __i386__
 +#include "opt_user_ldt.h"
 +#endif
  
  #include <sys/param.h>
  #include <sys/systm.h>
 @@ -139,6 +142,12 @@
  			 * than the internal signal
  			 */
  			kill(p, &killArgs);
 +#ifdef __i386__
 +#ifdef USER_LDT
 +			/* hook to undo LDT sharing */
 +			cpu_kill9(q);
 +#endif
 +#endif
  			nq = q;
  			q = q->p_peers;
  			/*
 cvs diff: Diffing i386/i386
 Index: i386/i386/machdep.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/i386/i386/machdep.c,v
 retrieving revision 1.322.2.4
 diff -u -r1.322.2.4 machdep.c
 --- machdep.c	1999/02/17 13:08:41	1.322.2.4
 +++ machdep.c	1999/04/26 16:34:31
 @@ -815,13 +815,34 @@
  #ifdef USER_LDT
  	/* was i386_user_cleanup() in NetBSD */
  	if (pcb->pcb_ldt) {
 -		if (pcb == curpcb) {
 -			lldt(_default_ldt);
 -			currentldt = _default_ldt;
 +		if (pcb->pcb_ldt_len != -1) {
 +#ifdef DIAGNOSTIC
 +			if (p->p_leader != p)
 +				panic("setregs: pcb_ldt_len != -1 in peer");
 +#endif
 +			if (!p->p_peers) {
 +				if (pcb == curpcb) {
 +					lldt(_default_ldt);
 +					currentldt = _default_ldt;
 +				}
 +				pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 +				kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 +					pcb->pcb_ldt_len * sizeof(union descriptor));
 +			} else {
 +				/* XXX what to do here? */
 +				printf("setregs: leader exec()ing, keeping shared user ldt\n");
 +			}
 +#ifdef DIAGNOSTIC
 +		} else if (!p->p_leader || p->p_leader == p) {
 +			panic("setregs: pcb_ldt_len == -1 in leader");
 +#endif
 +		} else {
 +			if (pcb == curpcb) {
 +				lldt(_default_ldt);
 +				currentldt = _default_ldt;
 +			}
 +			pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  		}
 -		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 -			pcb->pcb_ldt_len * sizeof(union descriptor));
 -		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
   	}
  #endif
    
 Index: i386/i386/sys_machdep.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/i386/i386/sys_machdep.c,v
 retrieving revision 1.38
 diff -u -r1.38 sys_machdep.c
 --- sys_machdep.c	1998/12/07 21:58:19	1.38
 +++ sys_machdep.c	1999/04/26 15:05:02
 @@ -259,8 +259,16 @@
  void
  set_user_ldt(struct pcb *pcb)
  {
 +	int nldt = pcb->pcb_ldt_len;
 +	if (nldt == -1) {
 +#ifdef DIAGNOSTIC
 +		if (pcb != (struct pcb *)&curproc->p_addr->u_pcb)
 +			panic("set_user_ldt: pcb->pcb_ldt_len == -1 and pcb != curproc's");
 +#endif
 +		nldt = ((struct pcb *)&curproc->p_leader->p_addr->u_pcb)->pcb_ldt_len;
 +	}
  	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)pcb->pcb_ldt;
 -	gdt_segs[GUSERLDT_SEL].ssd_limit = (pcb->pcb_ldt_len * sizeof(union descriptor)) - 1;
 +	gdt_segs[GUSERLDT_SEL].ssd_limit = (nldt * sizeof(union descriptor)) - 1;
  	ssdtosd(&gdt_segs[GUSERLDT_SEL], &gdt[GUSERLDT_SEL].sd);
  	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
  	currentldt = GSEL(GUSERLDT_SEL, SEL_KPL);
 @@ -301,6 +309,13 @@
  
  	if (pcb->pcb_ldt) {
  		nldt = pcb->pcb_ldt_len;
 +		if (nldt == -1) {
 +#ifdef DIAGNOSTIC
 +			if (!p->p_leader || p->p_leader == p)
 +				panic("i386_get_ldt: pcb_ldt_len == -1 in leader");
 +#endif
 +			nldt = ((struct pcb *)&p->p_leader->p_addr->u_pcb)->pcb_ldt_len;
 +		}
  		num = min(uap->num, nldt);
  		lp = &((union descriptor *)(pcb->pcb_ldt))[uap->start];
  	} else {
 @@ -335,7 +350,8 @@
  	int error = 0, i, n;
   	int largest_ld;
  	struct pcb *pcb = &p->p_addr->u_pcb;
 -	int s;
 +	struct proc *q;
 +	int nldt, s;
  	struct i386_set_ldt_args ua, *uap;
  
  	if ((error = copyin(args, &ua, sizeof(struct i386_set_ldt_args))) < 0)
 @@ -359,24 +375,54 @@
    		return(EINVAL);
    
    	/* allocate user ldt */
 - 	if (!pcb->pcb_ldt || (largest_ld >= pcb->pcb_ldt_len)) {
 +	nldt = pcb->pcb_ldt_len;
 +	if (nldt == -1) {
 +#ifdef DIAGNOSTIC
 +		if (!p->p_leader || p->p_leader == p)
 +			panic("i386_set_ldt: pcb_ldt_len == -1 in leader");
 +#endif
 +		nldt = ((struct pcb *)&p->p_leader->p_addr->u_pcb)->pcb_ldt_len;
 +	}
 + 	if (!pcb->pcb_ldt || (largest_ld >= nldt)) {
   		union descriptor *new_ldt = (union descriptor *)kmem_alloc(
   			kernel_map, SIZE_FROM_LARGEST_LD(largest_ld));
   		if (new_ldt == NULL) {
   			return ENOMEM;
   		}
   		if (pcb->pcb_ldt) {
 - 			bcopy(pcb->pcb_ldt, new_ldt, pcb->pcb_ldt_len
 + 			bcopy(pcb->pcb_ldt, new_ldt, nldt
   				* sizeof(union descriptor));
   			kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 - 				pcb->pcb_ldt_len * sizeof(union descriptor));
 + 				nldt * sizeof(union descriptor));
   		} else {
   			bcopy(ldt, new_ldt, sizeof(ldt));
   		}
 -  		pcb->pcb_ldt = (caddr_t)new_ldt;
 - 		pcb->pcb_ldt_len = NEW_MAX_LD(largest_ld);
 +		/*
 +		 * copy pcb_ldt for peers, set their pcb_ldt_len = -1
 +		 * to indicate this is a copy
 +		 */
 +		for (q = p->p_leader; q; q = q->p_peers) {
 +			struct pcb *pcb2 = &q->p_addr->u_pcb;
 +
 +			pcb2->pcb_ldt = (caddr_t)new_ldt;
 +			/* the leader gets the real pcb_ldt_len */
 +			if (q == p->p_leader)
 +				pcb2->pcb_ldt_len = NEW_MAX_LD(largest_ld);
 +			else
 +				pcb2->pcb_ldt_len = -1;
 +			if (pcb2 == curpcb)
 +			    set_user_ldt((struct pcb *)&p->p_leader->p_addr->u_pcb);
 +		}
 +#ifdef DIAGNOSTIC
 +		if (!p->p_leader)
 +			panic("i386_set_ldt: p_leader == 0");
 +  		if (pcb->pcb_ldt != (caddr_t)new_ldt)
 +			panic("i386_set_ldt: pcb->pcb_ldt != new_ldt");
 +#endif
 +#if 0
   		if (pcb == curpcb)
   		    set_user_ldt(pcb);
 +#endif
    	}
  
  	/* Check descriptors for access violations */
 Index: i386/i386/trap.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/i386/i386/trap.c,v
 retrieving revision 1.133
 diff -u -r1.133 trap.c
 --- trap.c	1999/01/06 23:05:36	1.133
 +++ trap.c	1999/04/26 13:44:35
 @@ -434,6 +434,29 @@
  
  		switch (type) {
  		case T_PAGEFLT:			/* page fault */
 +			if (intr_nesting_level == 0) {
 +				/*
 +				 * Invalid %fs's and %gs's can be created using
 +				 * procfs or PT_SETREGS or by invalidating the
 +				 * underlying LDT entry.  This causes a fault
 +				 * in kernel mode when the kernel attempts to
 +				 * switch contexts.  Lose the bad context
 +				 * (XXX) so that we can continue, and generate
 +				 * a signal.
 +				 */
 +				if (frame.tf_eip == (int)cpu_switch_load_fs
 +				    && curpcb->pcb_fs) {
 +					curpcb->pcb_fs = 0;
 +					psignal(p, SIGBUS);
 +					return;
 +				}
 +				if (frame.tf_eip == (int)cpu_switch_load_gs
 +				    && curpcb->pcb_gs) {
 +					curpcb->pcb_gs = 0;
 +					psignal(p, SIGBUS);
 +					return;
 +				}
 +			}
  			(void) trap_pfault(&frame, FALSE, eva);
  			return;
  
 Index: i386/i386/vm_machdep.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/i386/i386/vm_machdep.c,v
 retrieving revision 1.115
 diff -u -r1.115 vm_machdep.c
 --- vm_machdep.c	1999/01/06 23:05:37	1.115
 +++ vm_machdep.c	1999/04/26 15:31:35
 @@ -173,11 +173,32 @@
          /* Copy the LDT, if necessary. */
          if (pcb2->pcb_ldt != 0) {
                  union descriptor *new_ldt;
 -                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
 +                int nldt = pcb2->pcb_ldt_len;
  
 -                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
 -                bcopy(pcb2->pcb_ldt, new_ldt, len);
 -                pcb2->pcb_ldt = (caddr_t)new_ldt;
 +		if (nldt == -1) {
 +#ifdef DIAGNOSTIC
 +			if (!p2->p_leader || p2->p_leader == p2)
 +				panic("cpu_fork: pcb_ldt_len == -1 in leader");
 +#endif
 +			nldt = ((struct pcb *)&p2->p_leader->p_addr->u_pcb)->pcb_ldt_len;
 +		}
 +		if (p2->p_leader == p1->p_leader) {
 +			/*
 +			 * this is a rfork(RFTHREAD|...),
 +			 * indicate pcb_ldt is a copy
 +			 */
 +			pcb2->pcb_ldt_len = -1;
 +#ifdef DIAGNOSTIC
 +			if (p2->p_leader == p2)
 +				panic("cpu_fork: p2->p_leader == p1->p_leader and p2 is leader");
 +#endif
 +		} else {
 +			new_ldt = (union descriptor *)kmem_alloc(kernel_map,
 +				nldt * sizeof(union descriptor));
 +			bcopy(pcb2->pcb_ldt, new_ldt,
 +				nldt * sizeof(union descriptor));
 +			pcb2->pcb_ldt = (caddr_t)new_ldt;
 +		}
          }
  #endif
  
 @@ -240,8 +261,13 @@
  			lldt(_default_ldt);
  			currentldt = _default_ldt;
  		}
 -		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 -			pcb->pcb_ldt_len * sizeof(union descriptor));
 +		if (pcb->pcb_ldt_len != -1)
 +			kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 +				pcb->pcb_ldt_len * sizeof(union descriptor));
 +#ifdef DIAGNOSTIC
 +		else if (!p->p_leader || p->p_leader == p)
 +			panic("cpu_exit: pcb_ldt_len == -1 in leader");
 +#endif
  		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  	}
  #endif
 @@ -249,6 +275,25 @@
  	cpu_switch(p);
  	panic("cpu_exit");
  }
 +
 +#ifdef USER_LDT
 +void
 +cpu_kill9(p)
 +	register struct proc *p;
 +{
 +	struct pcb *pcb = &p->p_addr->u_pcb; 
 +	/*
 +	 * hook to undo ldt sharing:
 +	 * we are going to be SIGKILL'd so we can just forget our ldt
 +	 */
 +	if (pcb->pcb_ldt_len == -1)
 +		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 +#ifdef DIAGNOSTIC
 +	if (pcb == curpcb)
 +		panic("cpu_kill9: pcb == curpcb");
 +#endif
 +}
 +#endif
  
  void
  cpu_wait(p)
 cvs diff: Diffing pc98/i386
 Index: pc98/i386/machdep.c
 ===================================================================
 RCS file: /home/cvs/cvs/src/sys/pc98/i386/machdep.c,v
 retrieving revision 1.105.2.3
 diff -u -r1.105.2.3 machdep.c
 --- machdep.c	1999/02/19 14:39:52	1.105.2.3
 +++ machdep.c	1999/04/26 16:34:38
 @@ -828,13 +828,34 @@
  #ifdef USER_LDT
  	/* was i386_user_cleanup() in NetBSD */
  	if (pcb->pcb_ldt) {
 -		if (pcb == curpcb) {
 -			lldt(_default_ldt);
 -			currentldt = _default_ldt;
 +		if (pcb->pcb_ldt_len != -1) {
 +#ifdef DIAGNOSTIC
 +			if (p->p_leader != p)
 +				panic("setregs: pcb_ldt_len != -1 in peer");
 +#endif
 +			if (!p->p_peers) {
 +				if (pcb == curpcb) {
 +					lldt(_default_ldt);
 +					currentldt = _default_ldt;
 +				}
 +				pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
 +				kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 +					pcb->pcb_ldt_len * sizeof(union descriptor));
 +			} else {
 +				/* XXX what to do here? */
 +				printf("setregs: leader exec()ing, keeping shared user ldt\n");
 +			}
 +#ifdef DIAGNOSTIC
 +		} else if (!p->p_leader || p->p_leader == p) {
 +			panic("setregs: pcb_ldt_len == -1 in leader");
 +#endif
 +		} else {
 +			if (pcb == curpcb) {
 +				lldt(_default_ldt);
 +				currentldt = _default_ldt;
 +			}
 +			pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  		}
 -		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
 -			pcb->pcb_ldt_len * sizeof(union descriptor));
 -		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
   	}
  #endif
    
  Happy hacking,
 -- 
 Juergen Lock <nox.foo@jelal.kn-bremen.de>
 (remove dot foo from address to reply)
 


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199904261930.MAA91435>