Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 16 Nov 1999 09:41:40 -0500 (EST)
From:      Mikhail Teterin <mi@kot.ne.mediaone.net>
To:        Poul-Henning Kamp <phk@critter.freebsd.dk>
Cc:        questions@freebsd.org
Subject:   Re: -lmd's MD5File()
Message-ID:  <199911161441.JAA27403@rtfm.newton>
In-Reply-To: <23589.942733382@critter.freebsd.dk> from Poul-Henning Kamp at "Nov 16, 1999 07:23:02 am"

next in thread | previous in thread | raw e-mail | index | archive | help
Poul-Henning Kamp once stated:

=In message <199911152335.SAA31130@misha.cisco.com>, Mikhail Teterin writes:
=>Why does not the function in subject use mmap? In my tests, using mmap
=>instead of  reading the file block  at a time gave  17-26% performance
=>improvement:
=>
=>	The new method: 135735 microseconds per iteration
=>	The old method: 164525 microseconds per iteration
=>
=>	The new method: 387283 microseconds per iteration
=>	The old method: 529743 microseconds per iteration
=>
=>Should I submit patches, or is there a good reason? Thanks!
=
=How would it work for a 5GB file ?

It will fall back to the old method. This will make the code uglier, but
quite a bit more  efficient -- most of the time,  files are smaller then
2Gb... And,  I hope, eventually  FreeBSD will  allow bigger files  to be
mmaped...

This is how my testings were done (I like TCL). After `make', run

	tclsh8.0 test.tcl

		-mi

# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	/tmp/md5
#	/tmp/md5/test.tcl
#	/tmp/md5/mitest.c
#	/tmp/md5/Makefile
#
echo c - /tmp/md5
mkdir -p /tmp/md5 > /dev/null 2>&1
echo x - /tmp/md5/test.tcl
sed 's/^X//' >/tmp/md5/test.tcl << 'END-of-/tmp/md5/test.tcl'
Xload libmitest.so
X
Xif {[md5file1 /kernel] != [md5file2 /kernel]} {
X	puts stderr "Bad -- the results differ!"
X}
X
Xputs "The new method: [time {md5file1 /kernel} 30]"
Xputs "The old method: [time {md5file2 /kernel} 30]"
END-of-/tmp/md5/test.tcl
echo x - /tmp/md5/mitest.c
sed 's/^X//' >/tmp/md5/mitest.c << 'END-of-/tmp/md5/mitest.c'
X#include <sys/types.h>
X#include <stdio.h>
X#include <md5.h>
X#include <sys/mman.h>
X#include <sys/stat.h>
X#include <fcntl.h>
X#include <tcl.h>
X
X#define MAX_MMAP_SIZE 2147483648	/* 2 Gb */
X
Xstatic char * my_MD5File(const char *filename, char *buf)
X{
X	MD5_CTX ctx;
X	int f;
X	void *p;
X	struct stat stats;
X
X	if(stat(filename, &stats) == -1) err(filename);
X
X	if(stats.st_size > MAX_MMAP_SIZE) {
X		fprintf(stderr, "%d is above %d -- falling back",
X			stats.st_size, MAX_MMAP_SIZE);
X		return MD5File(filename, buf);
X	}
X
X	f = open(filename,O_RDONLY);
X
X	p = mmap(NULL, stats.st_size, PROT_READ, 0, f, 0);
X	if(!p) err("mmap");
X
X	MD5Init(&ctx);
X	MD5Update(&ctx, p, stats.st_size);
X
X	munmap(p, stats.st_size);
X	close(f);
X
X	return MD5End(&ctx, buf);
X}
X
Xstatic int
Xmd5file(ClientData cd, Tcl_Interp *I, int objc, Tcl_Obj * CONST  objv[])
X{
X	char *result;
X
X	result = Tcl_GetStringFromObj(objv[1], NULL);
X	result = cd ? MD5File(result, NULL) : my_MD5File(result, NULL);
X
X	if(!result) {
X		Tcl_SetResult(I, "(null)", TCL_STATIC);
X		return TCL_ERROR;
X	}
X
X	Tcl_SetResult(I, result, TCL_DYNAMIC);
X	return TCL_OK;
X}
X
Xint Mitest_Init(Tcl_Interp *I) {
X	Tcl_CreateObjCommand(I, "md5file1", md5file, NULL, NULL);
X	Tcl_CreateObjCommand(I, "md5file2", md5file, NULL+1, NULL);
X	return Tcl_PkgProvide(I, "mitest", "1.0");
X	
X}
END-of-/tmp/md5/mitest.c
echo x - /tmp/md5/Makefile
sed 's/^X//' >/tmp/md5/Makefile << 'END-of-/tmp/md5/Makefile'
XLIB=mitest
XSHLIB_MAJOR=1
X
XSRCS=mitest.c
X
XCFLAGS+=-I/usr/local/include/tcl8.0
X
XLDADD=-lmd
X
X.include <bsd.lib.mk>
END-of-/tmp/md5/Makefile
exit


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-questions" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199911161441.JAA27403>