Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 3 Apr 1997 16:08:41 +0200 (CEST)
From:      S Sigala <ssigala@globalnet.it>
To:        freebsd-hackers@freebsd.org
Subject:   Making holes in files with lseek()
Message-ID:  <Pine.BSF.3.96.970403160625.645A-100000@lattice.latte.it>

next in thread | raw e-mail | index | archive | help
Hello, i have just written this little program that replaces sequences
of nul bytes with holes (using lseek()).  This seem to work, but i would
like to know the ideal length of the nul bytes sequence where a hole is
better (requires less space on disk) than the sequence.
In other words, how much disk space is wasted by a hole?  Does every
lseek() call (a seek below the end of the file) create a hole?

Thanks in advance.

Regards,
		-sandro

-- CUT HERE ------------------------------------------------------------------

#define USEFUL_LENGTH	16

#include <sys/stat.h>

#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int lflag = USEFUL_LENGTH;

void usage __P((void));
void setfile __P((char *path, struct stat *fs));
void process_file __P((char *filename));

int
main(argc, argv)
	int argc;
	char *argv[];
{
	int c;

	while ((c = getopt(argc, argv, "l:")) != -1)
		switch (c) {
		case 'l':
			if ((lflag = atoi(optarg)) <= 0)
				errx(1, "invalid -l value");
			break;
		case '?':
		default:
			usage();
			/* NOTREACHED */
		}
	argc -= optind;
	argv += optind;

	if (argc < 1)
		usage();

	while (*argv)
		process_file(*argv++);

	return 0;
}

void
usage()
{
	fprintf(stderr, "usage: holeify [-l length] filename ...\n");
	exit(1);
}

void
setfile(path, fs)
	char *path;
	struct stat *fs;
{
	static struct timeval tv[2];

	/* Set file access and modification times. */
	TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec);
	TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec);
	utimes(path, tv);

	fs->st_mode &= S_ISUID | S_ISGID | S_ISVTX |
		S_IRWXU | S_IRWXG | S_IRWXO;

	/* Set owner and group. */
	if (chown(path, fs->st_uid, fs->st_gid))
		fs->st_mode &= ~(S_ISUID | S_ISGID);
	/* Set mode. */
	chmod(path, fs->st_mode);
	/* Set flags. */
	chflags(path, fs->st_flags);
}

void
process_file(filename)
	char *filename;
{
	unsigned char ibuf[BUFSIZ], obuf[BUFSIZ];
	char tempfile[12];
	struct stat fstat_s;
	int ifd, ofd, isize, osize;
	int i, num0;

	ifd = open(filename, O_RDONLY);
	ofd = mkstemp(strcpy(tempfile, "temp.XXXX"));

	num0 = osize = 0;
	while ((isize = read(ifd, ibuf, BUFSIZ)) > 0)
		for (i = 0; i < isize; i++) {
			if (ibuf[i] == '\0')
				num0++;
			else {
				if (num0 > 0) {
					if (osize > 0) {
						/* Flush the output buffer. */
						write(ofd, obuf, osize);
						osize = 0;
					}
					if (num0 >= lflag) {
						/* Create a hole. */
						lseek(ofd, num0, SEEK_CUR);
						num0 = 0;
					} else {
						/* Fill with zeros. */
						for (; num0; num0--) {
							obuf[osize++] = '\0';
							if (osize == BUFSIZ) {
								write(ofd, obuf, BUFSIZ);
								osize = 0;
							}
						}
					}
				}

				obuf[osize++] = ibuf[i];

				/* Flush the output buffer if full. */
				if (osize == BUFSIZ) {
					write(ofd, obuf, BUFSIZ);
					osize = 0;
				}
			}
		}

	if (osize > 0)
		write(ofd, obuf, osize);
	if (num0 > 0) {
		/*
		 * Create a hole if required.
		 */
		if (num0 > 1) {
			if (num0 - 1 >= lflag) {
				/* Large enough: create a hole. */
				lseek(ofd, num0 - 1, SEEK_CUR);
			} else {
				/* Fill with zeros. */
				for (; num0 - 1; num0--)
					write(ofd, "", 1);
			}
		}
		/*
		 * Put a null byte at the end to make the hole.
		 */
		write(ofd, "", 1);
	}

	fstat(ifd, &fstat_s);
	close(ifd);
	close(ofd);

	/*
	 * Restore old file attributes and rename to old file name.
	 */
	setfile(tempfile, &fstat_s);
	rename(tempfile, filename);
}




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.3.96.970403160625.645A-100000>