Date: Mon, 8 Apr 1996 19:25:31 -0700 From: asami@cs.berkeley.edu (Satoshi Asami) To: terry@lambert.org Cc: paul@netcraft.co.uk, davidg@Root.COM, current@FreeBSD.org, nisha@cs.berkeley.edu, tege@matematik.su.se, hasty@star-gate.com Subject: Re: fast memory copy for large data sizes Message-ID: <199604090225.TAA16078@sunrise.cs.berkeley.edu> In-Reply-To: <199604052314.QAA25117@phaeton.artisoft.com> (message from Terry Lambert on Fri, 5 Apr 1996 16:14:00 -0700 (MST))
next in thread | previous in thread | raw e-mail | index | archive | help
* I also don't see the code seriously dealing with misalignment between * wource and target, which need to be aligned on the same boundry for * everything but the initial and final sub-increment sized moves. That's a good point. I changed our routine to fall back to the original code if the alignment is not multiple of 8. Use our code all the time: 70MB/s if multiple of 8 42MB/s if not multiple of 8 Use original code if not multiple of 8: 70MB/s if multiple of 8 56MB/s if multiple of 4 but not multiple of 8 34MB/s if not multiple of 4 This is the "rawread" test (read/lseek loop, reading from same part of file/disk all the time -- I used a file and read 64K blocks so it should all be coming from the disk cache). * Often it's better if the alignment isn't there to fallback to the * old code. >From the above, it seems like we can still win in some cases but I don't think further complicating the code is going to help us much, as (probably) most of the big moves are going to be 8-byte aligned anyway. Satoshi P.S. Here's the code after taking Terry's suggestions into account: Index: support.s =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/support.s,v retrieving revision 1.31 diff -u -r1.31 support.s --- 1.31 1995/12/28 23:14:40 +++ support.s 1996/04/09 01:58:54 @@ -463,6 +463,14 @@ /* bcopy(%esi, %edi, %ebx) */ 3: movl %ebx,%ecx + cmpl $1024,%ecx + jbe slow_copyout + + call fastmove + jmp done_copyout + + ALIGN_TEXT +slow_copyout: shrl $2,%ecx cld rep @@ -510,6 +518,14 @@ cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault + cmpl $1024,%ecx + jbe slow_copyin + + call fastmove + jmp done_copyin + + ALIGN_TEXT +slow_copyin: movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -520,6 +536,8 @@ rep movsb + ALIGN_TEXT +done_copyin: popl %edi popl %esi xorl %eax,%eax @@ -534,6 +552,84 @@ movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax + ret + +/* fastmove(src, dst, len) + src in %esi + dst in %edi + len in %ecx + uses %eax and %edx for tmp. storage + */ + ALIGN_TEXT +fastmove: + cmpl $63,%ecx + jbe L57 + + movl %esi,%eax + andl $7,%eax /* check if src addr is multiple of 8 */ + jnz L57 + + movl %edi,%eax + andl $7,%eax /* check if dst addr is multiple of 8 */ + jnz L57 + + movl %cr0,%edx + movl $8, %eax /* CR0_TS */ + not %eax + andl %eax,%edx /* clear CR0_TS */ + movl %edx,%cr0 + + subl $108,%esp + fsave (%esp) + + ALIGN_TEXT +L58: + fildq 0(%esi) + fildq 8(%esi) + fildq 16(%esi) + fildq 24(%esi) + fildq 32(%esi) + fildq 40(%esi) + fildq 48(%esi) + fildq 56(%esi) + fxch %st(7) + fistpq 0(%edi) + fxch %st(5) + fistpq 8(%edi) + fxch %st(3) + fistpq 16(%edi) + fxch %st(1) + fistpq 24(%edi) + fistpq 32(%edi) + fistpq 40(%edi) + fistpq 48(%edi) + fistpq 56(%edi) + addl $-64,%ecx + addl $64,%esi + addl $64,%edi + cmpl $63,%ecx + ja L58 + + frstor (%esp) + addl $108,%esp + + andl $8,%edx + movl %cr0,%eax + orl %edx, %eax /* reset CR0_TS to the original value */ + movl %eax,%cr0 + + ALIGN_TEXT +L57: + movb %cl,%al + shrl $2,%ecx /* copy longword-wise */ + cld + rep + movsl + movb %al,%cl + andb $3,%cl /* copy remaining bytes */ + rep + movsb + ret /*
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199604090225.TAA16078>