From owner-p4-projects Sun Aug 18 20:29: 9 2002 Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id BBD6B37B401; Sun, 18 Aug 2002 20:28:30 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.FreeBSD.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 6E2D837B400 for ; Sun, 18 Aug 2002 20:28:30 -0700 (PDT) Received: from freefall.freebsd.org (freefall.FreeBSD.org [216.136.204.21]) by mx1.FreeBSD.org (Postfix) with ESMTP id DDC2C43E70 for ; Sun, 18 Aug 2002 20:28:29 -0700 (PDT) (envelope-from mini@freebsd.org) Received: from freefall.freebsd.org (perforce@localhost [127.0.0.1]) by freefall.freebsd.org (8.12.4/8.12.4) with ESMTP id g7J3STJU059187 for ; Sun, 18 Aug 2002 20:28:29 -0700 (PDT) (envelope-from mini@freebsd.org) Received: (from perforce@localhost) by freefall.freebsd.org (8.12.4/8.12.4/Submit) id g7J3STap059184 for perforce@freebsd.org; Sun, 18 Aug 2002 20:28:29 -0700 (PDT) Date: Sun, 18 Aug 2002 20:28:29 -0700 (PDT) Message-Id: <200208190328.g7J3STap059184@freefall.freebsd.org> X-Authentication-Warning: freefall.freebsd.org: perforce set sender to mini@freebsd.org using -f From: Jonathan Mini Subject: PERFORCE change 16241 for review To: Perforce Change Reviews Sender: owner-p4-projects@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG http://people.freebsd.org/~peter/p4db/chv.cgi?CH=16241 Change 16241 by mini@mini_stylus on 2002/08/18 20:28:06 - Add the glue needed to get and save mcontext_t structs from and to a thread (respectively). - Modify the timing of FPU state management so that (among other things)the FPU regs are set from the mcontext_t for a thread just before returning to userland upon delivering a signal. Sadly, this is i386-only. This work is from Daniel Eischen, who wrote alpha MD code as well, but I was unable to include that here (lack of testability, lack of understanding of alpha arch on my part). Affected files ... .. //depot/projects/kse/sys/i386/i386/genassym.c#31 edit .. //depot/projects/kse/sys/i386/i386/machdep.c#51 edit .. //depot/projects/kse/sys/i386/i386/support.s#10 edit .. //depot/projects/kse/sys/i386/include/npx.h#6 edit .. //depot/projects/kse/sys/i386/include/pcb.h#7 edit .. //depot/projects/kse/sys/i386/include/signal.h#4 edit .. //depot/projects/kse/sys/i386/include/ucontext.h#3 edit .. //depot/projects/kse/sys/i386/isa/npx.c#19 edit Differences ... ==== //depot/projects/kse/sys/i386/i386/genassym.c#31 (text+ko) ==== @@ -142,7 +142,6 @@ ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); -ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ==== //depot/projects/kse/sys/i386/i386/machdep.c#51 (text+ko) ==== @@ -130,6 +130,9 @@ #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); +static void fpstate_drop(struct thread *td); +static void get_fpcontext(struct thread *td, mcontext_t *mcp); +static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); @@ -428,8 +431,10 @@ ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); - sf.sf_uc.uc_mcontext.mc_flags = __UC_MC_VALID; /* no FP regs */ bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); + sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ + get_fpcontext(td, &sf.sf_uc.uc_mcontext); + fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && @@ -661,7 +666,7 @@ struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; - int cs, eflags, error; + int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) @@ -735,6 +740,9 @@ return (EINVAL); } + ret = set_fpcontext(td, &ucp->uc_mcontext); + if (ret != 0) + return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } @@ -895,11 +903,6 @@ */ load_cr0(rcr0() | CR0_MP | CR0_TS); -#ifdef DEV_NPX - /* Initialize the npx (if any) for the current process. */ - npxinit(__INITIAL_NPXCW__); -#endif - /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend @@ -1989,8 +1992,6 @@ /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; - - sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; } static void @@ -2015,8 +2016,6 @@ /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; - - sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; } #endif /* CPU_ENABLE_SSE */ @@ -2048,6 +2047,113 @@ return (0); } +void +get_mcontext(struct thread *td, mcontext_t *mcp) +{ + struct trapframe *tp; + + tp = td->td_frame; + + mcp->mc_onstack = sigonstack(tp->tf_esp); + mcp->mc_gs = td->td_pcb->pcb_gs; + mcp->mc_fs = tp->tf_fs; + mcp->mc_es = tp->tf_es; + mcp->mc_ds = tp->tf_ds; + mcp->mc_edi = tp->tf_edi; + mcp->mc_esi = tp->tf_esi; + mcp->mc_ebp = tp->tf_ebp; + mcp->mc_isp = tp->tf_isp; + mcp->mc_ebx = tp->tf_ebx; + mcp->mc_edx = tp->tf_edx; + mcp->mc_ecx = tp->tf_ecx; + mcp->mc_eax = tp->tf_eax; + mcp->mc_eip = tp->tf_eip; + mcp->mc_cs = tp->tf_cs; + mcp->mc_eflags = tp->tf_eflags; + mcp->mc_esp = tp->tf_esp; + mcp->mc_ss = tp->tf_ss; + mcp->mc_len = sizeof(*mcp); + get_fpcontext(td, mcp); +} + +int +set_mcontext(struct thread *td, const mcontext_t *mcp) +{ + struct trapframe *tp; + int ret; + + tp = td->td_frame; + + if (!EFL_SECURE(mcp->mc_eflags, tp->tf_eflags) || + !CS_SECURE(mcp->mc_cs)) + ret = EINVAL; + else if (mcp->mc_len != sizeof(*mcp)) + ret = EINVAL; + else if ((ret = set_fpcontext(td, mcp)) == 0) { + tp->tf_fs = mcp->mc_fs; + tp->tf_es = mcp->mc_es; + tp->tf_ds = mcp->mc_ds; + tp->tf_edi = mcp->mc_edi; + tp->tf_esi = mcp->mc_esi; + tp->tf_ebp = mcp->mc_ebp; + tp->tf_ebx = mcp->mc_ebx; + tp->tf_edx = mcp->mc_edx; + tp->tf_ecx = mcp->mc_ecx; + tp->tf_eax = mcp->mc_eax; + tp->tf_eip = mcp->mc_eip; + tp->tf_cs = mcp->mc_cs; + tp->tf_eflags = mcp->mc_eflags; + tp->tf_esp = mcp->mc_esp; + tp->tf_ss = mcp->mc_ss; + td->td_pcb->pcb_gs = mcp->mc_gs; + ret = 0; + } + return (ret); +} + +static void +get_fpcontext(struct thread *td, mcontext_t *mcp) +{ +#ifndef DEV_NPX + mcp->mc_fpformat = _MC_FPFMT_NODEV; + mcp->mc_ownedfp = _MC_FPOWNED_NONE; +#else + mcp->mc_ownedfp = npxgetregs(td, (union savefpu *)&mcp->mc_fpstate); + mcp->mc_fpformat = npxformat(); +#endif +} + +static int +set_fpcontext(struct thread *td, const mcontext_t *mcp) +{ + + if (mcp->mc_fpformat == _MC_FPFMT_NODEV) + return (0); + else if ((mcp->mc_fpformat != _MC_FPFMT_387) && + ((mcp->mc_fpformat != _MC_FPFMT_XMM))) + return (EINVAL); + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + /* We don't care what state is left in the FPU or PCB. */ + fpstate_drop(td); + else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + mcp->mc_ownedfp == _MC_FPOWNED_PCB) + npxsetregs(td, (union savefpu *)&mcp->mc_fpstate); + else + return (EINVAL); + return (0); +} + +static void +fpstate_drop(struct thread *td) +{ + register_t s; + + s = intr_disable(); + if (PCPU_GET(fpcurthread) == td) + npxdrop(); + intr_restore(s); +} + int fill_dbregs(struct thread *td, struct dbreg *dbregs) { ==== //depot/projects/kse/sys/i386/i386/support.s#10 (text+ko) ==== @@ -976,7 +976,7 @@ ENTRY(fastmove) pushl %ebp movl %esp,%ebp - subl $PCB_SAVE87_SIZE+3*4,%esp + subl $PCB_SAVEFPU_SIZE+3*4,%esp movl 8(%ebp),%ecx cmpl $63,%ecx @@ -1018,7 +1018,7 @@ movl PCPU(CURPCB),%esi addl $PCB_SAVEFPU,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1102,7 +1102,7 @@ addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1147,7 +1147,7 @@ addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl ==== //depot/projects/kse/sys/i386/include/npx.h#6 (text+ko) ==== @@ -73,7 +73,7 @@ struct save87 { struct env87 sv_env; /* floating point control/status */ struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */ - u_long sv_ex_sw; /* status word for last exception */ + u_char sv_pad0[4]; /* padding for (now unused) saved status word */ /* * Bogus padding for emulators. Emulators should use their own * struct and arrange to store into this struct (ending here) @@ -112,8 +112,7 @@ u_char fp_pad[6]; /* padding */ } sv_fp[8]; struct xmmacc sv_xmm[8]; - u_long sv_ex_sw; /* status word for last exception */ - u_char sv_pad[220]; + u_char sv_pad[224]; } __attribute__((aligned(16))); union savefpu { @@ -142,9 +141,13 @@ #ifdef _KERNEL int npxdna(void); +void npxdrop(void); void npxexit(struct thread *td); -void npxinit(int control); +int npxformat(void); +int npxgetregs(struct thread *td, union savefpu *addr); +void npxinit(u_short control); void npxsave(union savefpu *addr); +void npxsetregs(struct thread *td, union savefpu *addr); int npxtrap(void); #endif ==== //depot/projects/kse/sys/i386/include/pcb.h#7 (text+ko) ==== @@ -66,6 +66,7 @@ #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ #define PCB_NPXTRAP 0x04 /* npx trap pending */ +#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ ==== //depot/projects/kse/sys/i386/include/signal.h#4 (text+ko) ==== @@ -117,13 +117,16 @@ int sc_efl; int sc_esp; int sc_ss; + int sc_len; /* sizeof(struct mcontext_t) */ /* - * XXX FPU state is 27 * 4 bytes h/w, 1 * 4 bytes s/w (probably not - * needed here), or that + 16 * 4 bytes for emulators (probably all - * needed here). The "spare" bytes are mostly not spare. + * XXX - See and for + * the following fields. */ - int sc_fpregs[28]; /* machine state (FPU): */ - int sc_spare[17]; + int sc_fpformat; + int sc_ownedfp; + int sc_spare1[1]; + int sc_fpregs[128]; + int sc_spare2[8]; }; #define sc_sp sc_esp ==== //depot/projects/kse/sys/i386/include/ucontext.h#3 (text+ko) ==== @@ -58,11 +58,25 @@ int mc_esp; /* machine state */ int mc_ss; - int mc_fpregs[28]; /* env87 + fpacc87 + u_long */ -#define __UC_MC_VALID 0x0001 /* mcontext register state is valid */ -#define __UC_FP_VALID 0x0002 /* FP registers have been saved */ - int mc_flags; - int __spare__[16]; + int mc_len; /* sizeof(mcontext_t) */ +#define _MC_FPFMT_NODEV 0 /* device not present or configured */ +#define _MC_FPFMT_387 1 +#define _MC_FPFMT_XMM 2 + int mc_fpformat; +#define _MC_FPOWNED_NONE 0 /* FP state not used */ +#define _MC_FPOWNED_FPU 1 /* FP state came from FPU */ +#define _MC_FPOWNED_PCB 2 /* FP state came from PCB */ + int mc_ownedfp; + int mc_spare1[1]; /* align next field to 16 bytes */ + int mc_fpstate[128]; /* must be multiple of 16 bytes */ + int mc_spare2[8]; } mcontext_t; +#ifdef _KERNEL +struct thread; + +void get_mcontext(struct thread *td, mcontext_t *mcp); +int set_mcontext(struct thread *td, const mcontext_t *mcp); +#endif + #endif /* !_MACHINE_UCONTEXT_H_ */ ==== //depot/projects/kse/sys/i386/isa/npx.c#19 (text+ko) ==== @@ -74,6 +74,7 @@ #include #include #include +#include #ifndef SMP #include @@ -144,17 +145,11 @@ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#define GET_FPU_EXSW_PTR(pcb) \ - (cpu_fxsr ? \ - &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \ - &(pcb)->pcb_save.sv_87.sv_ex_sw) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#define GET_FPU_EXSW_PTR(pcb) \ - (&(pcb)->pcb_save.sv_87.sv_ex_sw) #endif /* CPU_ENABLE_SSE */ typedef u_char bool_t; @@ -183,6 +178,8 @@ static volatile u_int npx_traps_while_probing; #endif +static union savefpu npx_cleanstate; +static bool_t npx_cleanstate_ready; static bool_t npx_ex16; static bool_t npx_exists; static bool_t npx_irq13; @@ -454,6 +451,7 @@ device_t dev; { int flags; + register_t s; if (resource_int_value("npx", 0, "flags", &flags) != 0) flags = 0; @@ -490,6 +488,14 @@ } npxinit(__INITIAL_NPXCW__); + if (npx_cleanstate_ready == 0) { + s = intr_disable(); + stop_emulating(); + fpusave(&npx_cleanstate); + start_emulating(); + npx_cleanstate_ready = 1; + intr_restore(s); + } #ifdef I586_CPU_XXX if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists && timezero("i586_bzero()", i586_bzero) < @@ -536,8 +542,6 @@ fninit(); #endif fldcw(&control); - if (PCPU_GET(curpcb) != NULL) - fpusave(&PCPU_GET(curpcb)->pcb_save); start_emulating(); intr_restore(savecrit); } @@ -559,8 +563,7 @@ if (npx_exists) { u_int masked_exceptions; - masked_exceptions = PCPU_GET(curpcb)->pcb_save.sv_87.sv_env.en_cw - & PCPU_GET(curpcb)->pcb_save.sv_87.sv_env.en_sw & 0x7f; + masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). @@ -574,6 +577,19 @@ #endif } +int +npxformat() +{ + + if (!npx_exists) + return (_MC_FPFMT_NODEV); +#ifdef CPU_ENABLE_SSE + if (cpu_fxsr) + return (_MC_FPFMT_XMM); +#endif + return (_MC_FPFMT_387); +} + /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user @@ -767,7 +783,6 @@ { register_t savecrit; u_short control, status; - u_long *exstat; if (!npx_exists) { printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n", @@ -789,11 +804,7 @@ fnstsw(&status); } - exstat = GET_FPU_EXSW_PTR(curthread->td_pcb); - *exstat = status; - if (PCPU_GET(fpcurthread) != curthread) - GET_FPU_SW(curthread) &= ~0x80bf; - else + if (PCPU_GET(fpcurthread) == curthread) fnclex(); intr_restore(savecrit); return (fpetable[status & ((~control & 0x3f) | 0x40)]); @@ -809,8 +820,9 @@ int npxdna() { - u_long *exstat; + struct pcb *pcb; register_t s; + u_short control; if (!npx_exists) return (0); @@ -825,22 +837,35 @@ * Record new context early in case frstor causes an IRQ13. */ PCPU_SET(fpcurthread, curthread); + pcb = PCPU_GET(curpcb); - exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb)); - *exstat = 0; - /* - * The following frstor may cause an IRQ13 when the state being - * restored has a pending error. The error will appear to have been - * triggered by the current (npx) user instruction even when that - * instruction is a no-wait instruction that should not trigger an - * error (e.g., fnclex). On at least one 486 system all of the - * no-wait instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least one - * 386/Cyrix 387 system, fnclex works correctly while frstor and - * fnsave are broken, so our treatment breaks fnclex if it is the - * first FPU instruction after a context switch. - */ - fpurstor(&PCPU_GET(curpcb)->pcb_save); + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { + /* + * This is the first time this thread has used the FPU or + * the PCB doesn't contain a clean FPU state. Explicitly + * initialize the FPU and load the default control word. + */ + fninit(); + control = __INITIAL_NPXCW__; + fldcw(&control); + pcb->pcb_flags |= PCB_NPXINITDONE; + } else { + /* + * The following frstor may cause an IRQ13 when the state + * being restored has a pending error. The error will + * appear to have been triggered by the current (npx) user + * instruction even when that instruction is a no-wait + * instruction that should not trigger an error (e.g., + * fnclex). On at least one 486 system all of the no-wait + * instructions are broken the same as frstor, so our + * treatment does not amplify the breakage. On at least + * one 386/Cyrix 387 system, fnclex works correctly while + * frstor and fnsave are broken, so our treatment breaks + * fnclex if it is the first FPU instruction after a context + * switch. + */ + fpurstor(&pcb->pcb_save); + } intr_restore(s); return (1); @@ -881,6 +906,87 @@ PCPU_SET(fpcurthread, NULL); } +/* + * This should be called with interrupts disabled and only when the owning + * FPU thread is non-null. + */ +void +npxdrop() +{ + struct thread *td; + + td = PCPU_GET(fpcurthread); + PCPU_SET(fpcurthread, NULL); + td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + start_emulating(); +} + +/* + * Get the state of the FPU without dropping ownership (if possible). + * It returns the FPU ownership status. + */ +int +npxgetregs(td, addr) + struct thread *td; + union savefpu *addr; +{ + register_t s; + + if (!npx_exists) + return (_MC_FPOWNED_NONE); + + if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { + if (npx_cleanstate_ready) + bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); + else + bzero(addr, sizeof(*addr)); + return (_MC_FPOWNED_NONE); + } + + s = intr_disable(); + if (td == PCPU_GET(fpcurthread)) { + fpusave(addr); +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + /* + * fnsave initializes the FPU and destroys whatever + * context it contains. Make sure the FPU owner + * starts with a clean state next time. + */ + npxdrop(); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +/* + * Set the state of the FPU; this must be called with interrupts disabled. + */ +void +npxsetregs(td, addr) + struct thread *td; + union savefpu *addr; +{ + register_t s; + + if (!npx_exists) + return; + + s = intr_disable(); + if (td == PCPU_GET(fpcurthread)) { + fpurstor(addr); + intr_restore(s); + } else { + intr_restore(s); + bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + } +} + static void fpusave(addr) union savefpu *addr; To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe p4-projects" in the body of the message