Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 15 Jan 2003 01:30:14 +0100
From:      Thomas Moestl <tmoestl@gmx.net>
To:        John Polstra <jdp@polstra.com>
Cc:        sparc@freebsd.org
Subject:   Re: Sparc64 floating point questions
Message-ID:  <20030115003013.GA3536@crow.dom2ip.de>
In-Reply-To: <XFMail.20030114144825.jdp@polstra.com>
References:  <XFMail.20030114144825.jdp@polstra.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Tue, 2003/01/14 at 14:48:25 -0800, John Polstra wrote:
> I think that the CVSup failure one person reported was caused by the
> fact that I'm not currently saving/restoring the floating point state
> on thread context switches.

Hmmm, how do you switch contexts? If it's voluntary in some form
(i.e. using a function call like longjmp()), you do not need to save,
as the floating-point registers are not preserved across function
calls (so the compiler should save and reload them across calls as
needed, unless that's different for m3).
When switching by signals in some form, sendsig() should save the
registers in the signal context, so this should be safe when restoring
the context by returning from the signal handler.
However, this saving is not currently done, which is a bug. The
attached patch should fix this (it also implements getcontext() and
setcontext(), setcontext() will not work for ucontexts from signals
and some optimizing remains to be done though).

> I need to find out how to do that.  It
> looks like if I save and restore the FSR and the FPRS and %q0 through
> %q60, that will do it.  Am I missing anything?

Yes, that should do it.
 
> What is the most straightforward way to save all of the %qN registers?
> Like this?
> 
>         stq     %q0, ...
>         stq     %q4, ...
>         stq     %q8, ...
>     [...]
>         stq     %q60, ...

Yes, unless you want to use UltraSPARC specific instructions. Then,
you could do:

	wr	%g0, ASI_BLK_P, %asi
	stda	%f0, [...] %asi
	stda	%f16, [...] %asi
	stda	%f32, [...] %asi
	stda	%f48, [...] %asi

This stores 16 floating-point registers at a time, i.e. 64 bytes,
which is a complete 2nd-level-cache line, so it should be faster.

	- Thomas

-- 
Thomas Moestl <tmoestl@gmx.net>	http://www.tu-bs.de/~y0015675/
              <tmm@FreeBSD.org>	http://people.FreeBSD.org/~tmm/
PGP fingerprint: 1C97 A604 2BD0 E492 51D0  9C0F 1FE6 4F1D 419C 776C

--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ctx2.diff"

Index: include/cache.h
===================================================================
RCS file: /ncvs/src/sys/sparc64/include/cache.h,v
retrieving revision 1.6
diff -u -r1.6 cache.h
--- include/cache.h	20 May 2002 16:30:46 -0000	1.6
+++ include/cache.h	9 Jan 2003 23:10:53 -0000
@@ -99,8 +99,8 @@
 void	dcache_inval(pmap_t, vm_offset_t, vm_offset_t);
 void	dcache_inval_phys(vm_offset_t, vm_offset_t);
 void	dcache_blast(void);
-void	ecache_flush(vm_offset_t, vm_offset_t);
 #if 0
+void	ecache_flush(vm_offset_t, vm_offset_t);
 void	ecache_inval_phys(vm_offset_t, vm_offset_t);
 #endif
 
Index: include/fp.h
===================================================================
RCS file: /ncvs/src/sys/sparc64/include/fp.h,v
retrieving revision 1.8
diff -u -r1.8 fp.h
--- include/fp.h	8 Jun 2002 07:17:18 -0000	1.8
+++ include/fp.h	9 Jan 2003 23:27:22 -0000
@@ -45,8 +45,8 @@
  * Note: The pointers passed to the next two functions must be aligned on
  * 64 byte boundaries.
  */
-void	savefpctx(struct fpstate *);
-void	restorefpctx(struct fpstate *);
+void	savefpregs(struct fpstate *);
+void	savefpctx(struct thread *);
 
 #endif /* _KERNEL */
 #endif /* !_MACHINE_FP_H_ */
Index: include/pcb.h
===================================================================
RCS file: /ncvs/src/sys/sparc64/include/pcb.h,v
retrieving revision 1.12
diff -u -r1.12 pcb.h
--- include/pcb.h	19 Oct 2002 15:54:34 -0000	1.12
+++ include/pcb.h	9 Jan 2003 23:00:31 -0000
@@ -37,6 +37,7 @@
 /* NOTE: pcb_fpstate must be aligned on a 64 byte boundary. */
 struct pcb {
 	struct	fpstate	pcb_fpstate;
+	u_long	pcb_fpsaved;
 	u_long	pcb_fp;
 	u_long	pcb_pc;
 	u_long	pcb_nsaved;
Index: include/ucontext.h
===================================================================
RCS file: /ncvs/src/sys/sparc64/include/ucontext.h,v
retrieving revision 1.8
diff -u -r1.8 ucontext.h
--- include/ucontext.h	10 Jan 2003 00:04:56 -0000	1.8
+++ include/ucontext.h	14 Jan 2003 23:47:40 -0000
@@ -42,16 +42,24 @@
 
 typedef struct __mcontext mcontext_t;
 
+/* Common. */
 #define	mc_flags	mc_global[0]
-#define	mc_sp           mc_out[6]
-#define	mc_fprs         mc_local[0]
-#define	mc_fsr		mc_local[1]
-#define	mc_gsr		mc_local[2]
-#define	mc_tnpc         mc_in[0]
-#define	mc_tpc          mc_in[1]
-#define	mc_tstate       mc_in[2]
-#define	mc_y            mc_in[4]
-#define	mc_wstate	mc_in[5]
+#define	mc_sp		mc_out[6]
+
+/* Signal contexts only. */
+#define	mcs_fprs	mc_local[0]
+#define	mcs_fsr		mc_local[1]
+#define	mcs_gsr		mc_local[2]
+#define	mcs_tnpc	mc_in[0]
+#define	mcs_tpc		mc_in[1]
+#define	mcs_tstate	mc_in[2]
+#define	mcs_y		mc_in[4]
+#define	mcs_wstate	mc_in[5]
+
+/* Regular user contexts only. */
+#define	mcu_tpc		mc_out[0]
+#define	mcu_tnpc	mc_out[1]
+#define	mcu_pc		mc_out[7]
 
 #define	_MC_VERSION_SHIFT	0
 #define	_MC_VERSION_BITS	32
@@ -60,5 +68,6 @@
 #define	_MC_FLAGS_SHIFT		32
 #define	_MC_FLAGS_BITS		32
 #define	_MC_VOLUNTARY		((1L << 0) << _MC_FLAGS_SHIFT)
+#define	_MC_FP			((1L << 1) << _MC_FLAGS_SHIFT)
 
 #endif /* !_MACHINE_UCONTEXT_H_ */
Index: sparc64/cache.c
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/cache.c,v
retrieving revision 1.14
diff -u -r1.14 cache.c
--- sparc64/cache.c	5 Jan 2003 05:30:40 -0000	1.14
+++ sparc64/cache.c	10 Jan 2003 17:11:24 -0000
@@ -430,6 +430,7 @@
 		stxa_sync(dca, ASI_DCACHE_TAG, 0);
 }
 
+#if 0
 /* Flush an E$ physical range using block commit stores. */
 void
 ecache_flush(vm_offset_t start, vm_offset_t end)
@@ -439,8 +440,8 @@
 	if (!cache.c_enabled)
 		return;
 
-	/* XXX: not needed in all cases, provide a wrapper in fp.c */
-	savefpctx(&curthread->td_pcb->pcb_fpstate);
+	critical_enter();
+	savefpctx(curthread);
 	wr(fprs, 0, FPRS_FEF);
 
 	for (addr = start & ~(cache.ec_linesize - 1); addr <= end;
@@ -451,10 +452,10 @@
 	}
 	membar(Sync);
 
-	restorefpctx(&curthread->td_pcb->pcb_fpstate);
+	wr(fprs, 0, 0);
+	critical_enter();
 }
 
-#if 0
 /*
  * Invalidate an E$ range using diagnostic accesses.
  * This is disabled: it suffers from the same races as dcache_blast() and
Index: sparc64/exception.S
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/exception.S,v
retrieving revision 1.57
diff -u -r1.57 exception.S
--- sparc64/exception.S	29 Dec 2002 00:23:48 -0000	1.57
+++ sparc64/exception.S	10 Jan 2003 18:02:02 -0000
@@ -458,16 +458,21 @@
 	.endm
 
 	.macro	tl0_fp_restore
-	wr	%g0, FPRS_FEF, %fprs
+	ba	%xcc, tl0_fp_restore
+	 wr	%g0, FPRS_FEF, %fprs
+	.align	32
+	.endm
+
+ENTRY(tl0_fp_restore)
 	wr	%g0, ASI_BLK_S, %asi
 	ldda	[PCB_REG + PCB_FPSTATE + FP_FB0] %asi, %f0
 	ldda	[PCB_REG + PCB_FPSTATE + FP_FB1] %asi, %f16
 	ldda	[PCB_REG + PCB_FPSTATE + FP_FB2] %asi, %f32
 	ldda	[PCB_REG + PCB_FPSTATE + FP_FB3] %asi, %f48
 	membar	#Sync
+	stx	%g0, [PCB_REG + PCB_FPSAVED]
 	done
-	.align	32
-	.endm
+END(tl0_fp_restore)
 
 	.macro	tl0_insn_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
Index: sparc64/genassym.c
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/genassym.c,v
retrieving revision 1.45
diff -u -r1.45 genassym.c
--- sparc64/genassym.c	28 Dec 2002 23:58:18 -0000	1.45
+++ sparc64/genassym.c	9 Jan 2003 22:57:15 -0000
@@ -250,6 +250,7 @@
 ASSYM(PCB_NSAVED, offsetof(struct pcb, pcb_nsaved));
 ASSYM(PCB_RWSP, offsetof(struct pcb, pcb_rwsp));
 ASSYM(PCB_RW, offsetof(struct pcb, pcb_rw));
+ASSYM(PCB_FPSAVED, offsetof(struct pcb, pcb_fpsaved));
 
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
Index: sparc64/machdep.c
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/machdep.c,v
retrieving revision 1.75
diff -u -r1.75 machdep.c
--- sparc64/machdep.c	10 Jan 2003 00:04:56 -0000	1.75
+++ sparc64/machdep.c	15 Jan 2003 00:06:37 -0000
@@ -92,6 +92,7 @@
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/fp.h>
+#include <machine/fsr.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
@@ -365,6 +366,7 @@
 	struct sigframe *sfp;
 	struct sigacts *psp;
 	struct sigframe sf;
+	mcontext_t *mcp;
 	struct thread *td;
 	struct frame *fp;
 	struct proc *p;
@@ -399,7 +401,16 @@
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
-	bcopy(tf, &sf.sf_uc.uc_mcontext, sizeof(*tf));
+
+	savefpctx(td);
+	mcp = &sf.sf_uc.uc_mcontext;
+	mcp->mc_flags = 0;
+	bcopy(tf, mcp, sizeof(*tf));
+	if (td->td_pcb->pcb_fpsaved) {
+		bcopy(&td->td_pcb->pcb_fpstate, &mcp->mc_fp,
+		    sizeof(mcp->mc_fp));
+		mcp->mc_flags |= _MC_FP;
+	}
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
@@ -455,6 +466,28 @@
 };
 #endif
 
+static int
+set_sigmcontext(struct thread *td, const mcontext_t *mcp)
+{
+	uint64_t wstate;
+	int error;
+
+	if ((error = rwindow_save(td)) != 0)
+		return (error);
+
+	if (!TSTATE_SECURE(mcp->mcs_tstate))
+		return (EINVAL);
+	wstate = td->td_frame->tf_wstate;
+	bcopy(mcp, td->td_frame, sizeof(*td->td_frame));
+	td->td_frame->tf_wstate = wstate;
+	if ((mcp->mc_flags & _MC_FP) != 0) {
+		td->td_frame->tf_fprs &= ~FPRS_FEF;
+		bcopy(&mcp->mc_fp, &td->td_pcb->pcb_fpstate,
+		    sizeof(td->td_pcb->pcb_fpstate));
+	}
+	return (0);
+}
+
 /*
  * MPSAFE
  */
@@ -463,14 +496,11 @@
 {
 	struct trapframe *tf;
 	struct proc *p;
-	mcontext_t *mc;
 	ucontext_t uc;
+	int error;
 
 	p = td->td_proc;
-	if (rwindow_save(td)) {
-		PROC_LOCK(p);
-		sigexit(td, SIGILL);
-	}
+	tf = td->td_frame;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
@@ -478,12 +508,8 @@
 		return (EFAULT);
 	}
 
-	mc = &uc.uc_mcontext;
-	tf = td->td_frame;
-	if (!TSTATE_SECURE(mc->mc_tstate))
-		return (EINVAL);
-	mc->mc_wstate = tf->tf_wstate;
-	bcopy(mc, tf, sizeof(*tf));
+	if ((error = set_sigmcontext(td, &uc.uc_mcontext)) != 0)
+		return (error);
 
 	PROC_LOCK(p);
 	p->p_sigmask = uc.uc_sigmask;
@@ -508,15 +534,49 @@
 int
 get_mcontext(struct thread *td, mcontext_t *mcp)
 {
+	struct trapframe *tf;
+	struct frame *f;
+	int error;
 
-	return (ENOSYS);
+	/*
+	 * Need to save the ins and locals of the caller; gcc does not know
+	 * that getcontext() is magic and will not discard them after the call.
+	 */
+	tf = td->td_frame;
+	f = (struct frame *)(uintptr_t)(tf->tf_sp + SPOFF);
+	if ((error = rwindow_save(td)) != 0 ||
+	    (error = copyin(&f->fr_local, &mcp->mc_local,
+	     sizeof(f->fr_local) + sizeof(f->fr_in)) != 0))
+		return (error);
+	mcp->mc_sp = tf->tf_sp;
+	mcp->mcu_pc = tf->tf_out[7];
+	mcp->mcu_tpc = tf->tf_tpc;
+	mcp->mcu_tnpc = tf->tf_tnpc;
+	mcp->mc_flags = _MC_VOLUNTARY;
+	return (0);
 }
 
 int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
+	struct trapframe *tf;
+	struct frame *f;
+	int error;
 
-	return (ENOSYS);
+	/* Handle return from signal. */
+	if ((mcp->mc_flags & _MC_VOLUNTARY) == 0)
+		return (set_sigmcontext(td, mcp));
+	tf = td->td_frame;
+	f = (struct frame *)(uintptr_t)(mcp->mc_sp + SPOFF);
+	if ((error = rwindow_save(td)) != 0 ||
+	    (error = copyout(&mcp->mc_local, &f->fr_local,
+	     sizeof(f->fr_local) + sizeof(f->fr_in)) != 0))
+		return (error);
+	tf->tf_sp = mcp->mc_sp;
+	tf->tf_out[7] = mcp->mcu_pc;
+	tf->tf_tpc = mcp->mcu_tpc;
+	tf->tf_tnpc = mcp->mcu_tnpc;
+	return (0);
 }
 
 /*
@@ -696,4 +756,18 @@
 	tf->tf_fsr = fpregs->fr_fsr;
 	tf->tf_gsr = fpregs->fr_gsr;
 	return (0);
+}
+
+void
+savefpctx(struct thread *td)
+{
+	struct pcb *pcb = td->td_pcb;
+
+	critical_enter();
+	if ((td->td_frame->tf_fprs & FPRS_FEF) != 0) {
+		savefpregs(&pcb->pcb_fpstate);
+		pcb->pcb_fpsaved = 1;
+		td->td_frame->tf_fprs &= ~FPRS_FEF;
+	}
+	critical_exit();
 }
Index: sparc64/swtch.S
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/swtch.S,v
retrieving revision 1.22
diff -u -r1.22 swtch.S
--- sparc64/swtch.S	22 Oct 2002 18:03:15 -0000	1.22
+++ sparc64/swtch.S	9 Jan 2003 23:27:51 -0000
@@ -74,6 +74,7 @@
 	stda	%f48, [%l1 + PCB_FPSTATE + FP_FB3] %asi
 	membar	#Sync
 	wr	%g0, 0, %fprs
+	stx	%l3, [%l1 + PCB_FPSAVED]
 	andn	%l3, FPRS_FEF, %l3
 	stx	%l3, [%l2 + TF_FPRS]
 
@@ -269,8 +270,8 @@
 ENTRY(savectx)
 	save	%sp, -CCFSZ, %sp
 	flushw
-	call	savefpctx
-	 mov	%i0, %o0
+	call	savefpregs
+	 add	%i0, PCB_FPSTATE, %o0
 	stx	%fp, [%i0 + PCB_FP]
 	stx	%i7, [%i0 + PCB_PC]
 	ret
@@ -280,29 +281,14 @@
 /*
  * void savefpctx(struct fpstate *);
  */
-ENTRY(savefpctx)
+ENTRY(savefpregs)
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
-	stda	%f0, [%o0 + PCB_FPSTATE + FP_FB0] %asi
-	stda	%f16, [%o0 + PCB_FPSTATE + FP_FB1] %asi
-	stda	%f32, [%o0 + PCB_FPSTATE + FP_FB2] %asi
-	stda	%f48, [%o0 + PCB_FPSTATE + FP_FB3] %asi
+	stda	%f0, [%o0 + FP_FB0] %asi
+	stda	%f16, [%o0 + FP_FB1] %asi
+	stda	%f32, [%o0 + FP_FB2] %asi
+	stda	%f48, [%o0 + FP_FB3] %asi
 	membar	#Sync
 	retl
 	 wr	%g0, 0, %fprs
-END(savefpctx)
-
-/*
- * void restorefpctx(struct fpstate *);
- */	
-ENTRY(restorefpctx)
-	wr	%g0, FPRS_FEF, %fprs
-	wr	%g0, ASI_BLK_S, %asi
-	ldda	[%o0 + PCB_FPSTATE + FP_FB0] %asi, %f0
-	ldda	[%o0 + PCB_FPSTATE + FP_FB1] %asi, %f16
-	ldda	[%o0 + PCB_FPSTATE + FP_FB2] %asi, %f32
-	ldda	[%o0 + PCB_FPSTATE + FP_FB3] %asi, %f48
-	membar	#Sync
-	retl
-	 wr	%g0, 0, %fprs
-END(restorefpctx)
+END(savefpregs)
Index: sparc64/vm_machdep.c
===================================================================
RCS file: /ncvs/src/sys/sparc64/sparc64/vm_machdep.c,v
retrieving revision 1.32
diff -u -r1.32 vm_machdep.c
--- sparc64/vm_machdep.c	5 Jan 2003 05:30:40 -0000	1.32
+++ sparc64/vm_machdep.c	9 Jan 2003 23:14:32 -0000
@@ -181,11 +181,7 @@
 	/*
 	 * Ensure that p1's pcb is up to date.
 	 */
-	if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0) {
-		mtx_lock_spin(&sched_lock);
-		savefpctx(&pcb1->pcb_fpstate);
-		mtx_unlock_spin(&sched_lock);
-	}
+	savefpctx(td1);
 	/* Make sure the copied windows are spilled. */
 	flushw();
 	/* Copy the pcb (this will copy the windows saved in the pcb, too). */

--tKW2IUtsqtDRztdT--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-sparc" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20030115003013.GA3536>