Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 May 1996 03:39:11 -0700 (PDT)
From:      asami@cs.berkeley.edu (Satoshi Asami)
To:        bde@zeta.org.au
Cc:        current@freebsd.org, ccd@stampede.cs.berkeley.edu
Subject:   More on kernel bcopy
Message-ID:  <199605251039.DAA21280@silvia.HIP.Berkeley.EDU>

next in thread | raw e-mail | index | archive | help
Bruce,

I tried calling fastmove from bcopy.  Unfortunately, it crashed right
after it loaded.  I remember you mentioning that the fnsave/frstor
part is only used when we call it from bcopy and an interrupt handler
calls bcopy.  Maybe there is a bug in there still.

Here's the diff.

===
Index: support.s
===================================================================
RCS file: /usr/cvs/src/sys/i386/i386/support.s,v
retrieving revision 1.35
diff -u -r1.35 support.s
--- support.s	1996/05/03 21:01:00	1.35
+++ support.s	1996/05/25 09:30:20
@@ -291,6 +291,18 @@
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping? */
 	jb	1f
+#ifdef I586_FAST_BCOPY
+	cmpl	$128,%ecx
+	jbe	slow_bcopy
+
+	jmp	slow_bcopy	/* XXX take this out and see it crash */
+
+	call	fastmove
+	jmp	done_bcopy
+
+	ALIGN_TEXT
+slow_bcopy:
+#endif /* I586_FAST_BCOPY */
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	cld					/* nope, copy forwards */
 	rep
@@ -299,6 +311,9 @@
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
+#ifdef I586_FAST_BCOPY
+done_bcopy:
+#endif /* I586_FAST_BCOPY */
 	popl	%edi
 	popl	%esi
 	ret
@@ -453,6 +468,16 @@
 	/* bcopy(%esi, %edi, %ebx) */
 3:
 	movl	%ebx,%ecx
+#ifdef I586_FAST_BCOPY
+	cmpl	$128,%ecx
+	jbe	slow_copyout
+
+	call	fastmove
+	jmp	done_copyout
+
+	ALIGN_TEXT
+slow_copyout:
+#endif /* I586_FAST_BCOPY */
 	shrl	$2,%ecx
 	cld
 	rep
@@ -500,6 +525,16 @@
 	cmpl	$VM_MAXUSER_ADDRESS,%edx
 	ja	copyin_fault
 
+#ifdef I586_FAST_BCOPY
+	cmpl	$128,%ecx
+	jbe	slow_copyin
+
+	call	fastmove
+	jmp	done_copyin
+
+	ALIGN_TEXT
+slow_copyin:
+#endif /* I586_FAST_BCOPY */
 	movb	%cl,%al
 	shrl	$2,%ecx				/* copy longword-wise */
 	cld
@@ -510,6 +545,10 @@
 	rep
 	movsb
 
+#ifdef I586_FAST_BCOPY
+	ALIGN_TEXT
+done_copyin:
+#endif /* I586_FAST_BCOPY */
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
@@ -525,6 +564,206 @@
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
+
+#ifdef I586_FAST_BCOPY
+/* fastmove(src, dst, len)
+	src in %esi
+	dst in %edi
+	len in %ecx
+	uses %eax and %edx for tmp. storage
+ */
+/* 
+LC0:
+	.ascii	"npxproc == curproc\0"
+LC1:
+	.ascii	"support.s"
+ */
+	ALIGN_TEXT
+fastmove:
+	cmpl	$63,%ecx
+	jbe	fastmove_tail
+
+	testl	$7,%esi	/* check if src addr is multiple of 8 */
+	jnz	fastmove_tail
+
+	testl	$7,%edi	/* check if dst addr is multiple of 8 */
+	jnz	fastmove_tail
+
+	pushl	%ebp
+	movl	%esp,%ebp
+	subl	$176,%esp
+
+/* if (intr_nesting_level > 0) */
+	cmpb	$0,_intr_nesting_level
+	je	L6
+/* save reentrantly */
+	movl	%cr0,%edx
+	clts
+	fnsave	-176(%ebp)
+	jmp L7
+
+/* else { */
+	ALIGN_TEXT
+L6:
+/* if (npxproc != NULL) { */
+	cmpl	$0,_npxproc
+	je	L8
+/*    assert(npxproc == curproc); */
+/*	movl	_npxproc,%eax
+	cmpl	%eax,_curproc
+	je	L6b
+	pushl	LC0
+	pushl	$599
+	pushl	LC1
+	call	___assert
+	addl	$12,%esp
+L6b: */
+/*    fnsave(&curpcb->pcb_savefpu); */
+	movl	_curpcb,%eax
+	fnsave	112(%eax)
+/*   npxproc = NULL; */
+	movl	$0,_npxproc
+/* } */
+L8:
+/* now we own the FPU. */
+
+/*
+ * The process' FP state is saved in the pcb, but if we get
+ * switched, the cpu_switch() will store our FP state in the
+ * pcb.  It should be possible to avoid all the copying for
+ * this, e.g., by setting a flag to tell cpu_switch() to
+ * save the state somewhere else.
+ */
+/* tmp = curpcb->pcb_savefpu; */
+	pushl	%edi
+	pushl	%esi
+	pushl	%ecx
+	leal	-176(%ebp),%edi
+	movl	_curpcb,%esi
+	addl	$112,%esi
+	cld
+	movl	$44,%ecx
+	rep
+	movsl
+	popl	%ecx
+	popl	%esi
+	popl	%edi
+/* stop_emulating(); */
+	clts
+/* npxproc = curproc; */
+	movl	_curproc,%eax
+	movl	%eax,_npxproc
+/* } */
+L7:
+4:
+	pushl %ecx
+	cmpl $1792,%ecx
+	jbe 2f
+	movl $1792,%ecx
+2:
+	subl %ecx,0(%esp)
+	cmpl $256,%ecx
+	jb 5f
+	pushl %esi
+	pushl %ecx
+	.align 4,0x90
+3:
+	movl 0(%esi),%eax
+	movl 32(%esi),%eax
+	movl 64(%esi),%eax
+	movl 96(%esi),%eax
+	movl 128(%esi),%eax
+	movl 160(%esi),%eax
+	movl 192(%esi),%eax
+	movl 224(%esi),%eax
+	addl $256,%esi
+	subl $256,%ecx
+	cmpl $256,%ecx
+	jae 3b
+	popl %ecx
+	popl %esi
+5:
+	ALIGN_TEXT
+fastmove_loop:
+	fildq	0(%esi)
+	fildq	8(%esi)
+	fildq	16(%esi)
+	fildq	24(%esi)
+	fildq	32(%esi)
+	fildq	40(%esi)
+	fildq	48(%esi)
+	fildq	56(%esi)
+	fistpq 56(%edi)
+	fistpq 48(%edi)
+	fistpq 40(%edi)
+	fistpq 32(%edi)
+	fistpq 24(%edi)
+	fistpq 16(%edi)
+	fistpq 8(%edi)
+	fistpq 0(%edi)
+	addl $-64,%ecx
+	addl $64,%esi
+	addl $64,%edi
+	cmpl $63,%ecx
+	ja fastmove_loop
+	popl %eax
+	addl %eax,%ecx
+	cmpl $64,%ecx
+	jae 4b
+	
+/* if (intr_nesting_level > 0) */
+
+	cmpb	$0,_intr_nesting_level
+	je	L9
+	
+/* Restore reentrantly. */
+	frstor	-176(%ebp)
+	movl	%edx,%cr0
+	jmp	L10
+
+/* else { */
+	ALIGN_TEXT
+L9:
+/* curpcb->pcb_savefpu = tmp; */
+	pushl	%edi
+	pushl	%esi
+	pushl	%ecx
+	movl	_curpcb,%edi
+	addl	$112,%edi
+	leal	-176(%ebp),%esi
+	cld
+	movl	$44,%ecx
+	rep
+	movsl
+	popl	%ecx
+	popl	%esi
+	popl	%edi
+
+/* start_emulating(); */
+	smsw	%ax
+	orb	$8,%al
+	lmsw	%ax
+/* npxproc = NULL; */
+	movl	$0,_npxproc
+/* } */
+L10:
+	movl	%ebp,%esp
+	popl	%ebp
+	
+	ALIGN_TEXT
+fastmove_tail:
+	movb	%cl,%al
+	shrl	$2,%ecx				/* copy longword-wise */
+	cld
+	rep
+	movsl
+	movb	%al,%cl
+	andb	$3,%cl				/* copy remaining bytes */
+	rep
+	movsb
+
+	ret
+#endif /* I586_FAST_BCOPY */
 
 /*
  * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
Index: trap.c
===================================================================
RCS file: /usr/cvs/src/sys/i386/i386/trap.c,v
retrieving revision 1.76
diff -u -r1.76 trap.c
--- trap.c	1996/05/18 03:36:19	1.76
+++ trap.c	1996/05/18 11:23:39
@@ -319,6 +319,14 @@
 			(void) trap_pfault(&frame, FALSE);
 			return;
 
+		case T_DNA:
+#if NNPX > 0
+			/* if a transparent fault (due to context switch "late") */
+			if (npxdna())
+				return;
+#endif	/* NNPX > 0 */
+			break;
+
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			/*

===

As it is, it will check the copy size but won't call fastmove.  When
you take out this line:

	jmp	slow_bcopy	/* XXX take this out and see it crash */

it should crash with fireworks on the screen right after the kernel
load.

Satoshi



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199605251039.DAA21280>