From owner-svn-src-stable-10@FreeBSD.ORG Wed Oct 8 22:09:37 2014 Return-Path: Delivered-To: svn-src-stable-10@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 82AAF9F1; Wed, 8 Oct 2014 22:09:37 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6E1BD610; Wed, 8 Oct 2014 22:09:37 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id s98M9bA8042175; Wed, 8 Oct 2014 22:09:37 GMT (envelope-from marcel@FreeBSD.org) Received: (from marcel@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id s98M9bvW042173; Wed, 8 Oct 2014 22:09:37 GMT (envelope-from marcel@FreeBSD.org) Message-Id: <201410082209.s98M9bvW042173@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: marcel set sender to marcel@FreeBSD.org using -f From: Marcel Moolenaar Date: Wed, 8 Oct 2014 22:09:37 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r272775 - stable/10/usr.bin/mkimg X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-10@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: SVN commit messages for only the 10-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 08 Oct 2014 22:09:37 -0000 Author: marcel Date: Wed Oct 8 22:09:36 2014 New Revision: 272775 URL: https://svnweb.freebsd.org/changeset/base/272775 Log: MFC 272384: Improve performance of mking(1). Requested by: gjb Modified: stable/10/usr.bin/mkimg/Makefile stable/10/usr.bin/mkimg/image.c Directory Properties: stable/10/ (props changed) Modified: stable/10/usr.bin/mkimg/Makefile ============================================================================== --- stable/10/usr.bin/mkimg/Makefile Wed Oct 8 22:06:38 2014 (r272774) +++ stable/10/usr.bin/mkimg/Makefile Wed Oct 8 22:09:36 2014 (r272775) @@ -4,7 +4,7 @@ PROG= mkimg SRCS= format.c image.c mkimg.c scheme.c MAN= mkimg.1 -MKIMG_VERSION=20140927 +MKIMG_VERSION=20141001 CFLAGS+=-DMKIMG_VERSION=${MKIMG_VERSION} CFLAGS+=-DSPARSE_WRITE Modified: stable/10/usr.bin/mkimg/image.c ============================================================================== --- stable/10/usr.bin/mkimg/image.c Wed Oct 8 22:06:38 2014 (r272774) +++ stable/10/usr.bin/mkimg/image.c Wed Oct 8 22:09:36 2014 (r272775) @@ -27,71 +27,462 @@ #include __FBSDID("$FreeBSD$"); +#include +#include +#include #include #include #include #include #include +#include #include #include +#include #include #include "image.h" #include "mkimg.h" -#define BUFFER_SIZE (1024*1024) +struct chunk { + STAILQ_ENTRY(chunk) ch_list; + size_t ch_size; /* Size of chunk in bytes. */ + lba_t ch_block; /* Block address in image. */ + union { + struct { + off_t ofs; /* Offset in backing file. */ + int fd; /* FD of backing file. */ + } file; + struct { + void *ptr; /* Pointer to data in memory */ + } mem; + } ch_u; + u_int ch_type; +#define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */ +#define CH_TYPE_FILE 1 /* File-backed chunk. */ +#define CH_TYPE_MEMORY 2 /* Memory-backed chunk */ +}; + +static STAILQ_HEAD(chunk_head, chunk) image_chunks; +static u_int image_nchunks; + +static char image_swap_file[PATH_MAX]; +static int image_swap_fd = -1; +static u_int image_swap_pgsz; +static off_t image_swap_size; -static char image_tmpfile[PATH_MAX]; -static int image_fd = -1; static lba_t image_size; -static void -cleanup(void) +static int +is_empty_sector(void *buf) { + uint64_t *p = buf; + size_t n, max; + + assert(((uintptr_t)p & 3) == 0); - if (image_fd != -1) - close(image_fd); - unlink(image_tmpfile); + max = secsz / sizeof(uint64_t); + for (n = 0; n < max; n++) { + if (p[n] != 0UL) + return (0); + } + return (1); } -int -image_copyin(lba_t blk, int fd, uint64_t *sizep) +/* + * Swap file handlng. + */ + +static off_t +image_swap_alloc(size_t size) +{ + off_t ofs; + size_t unit; + + unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; + assert((unit & (unit - 1)) == 0); + + size = (size + unit - 1) & ~(unit - 1); + + ofs = image_swap_size; + image_swap_size += size; + if (ftruncate(image_swap_fd, image_swap_size) == -1) { + image_swap_size = ofs; + ofs = -1LL; + } + return (ofs); +} + +/* + * Image chunk handling. + */ + +static struct chunk * +image_chunk_find(lba_t blk) +{ + static struct chunk *last = NULL; + struct chunk *ch; + + ch = (last != NULL && last->ch_block <= blk) + ? last : STAILQ_FIRST(&image_chunks); + while (ch != NULL) { + if (ch->ch_block <= blk && + (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) { + last = ch; + break; + } + ch = STAILQ_NEXT(ch, ch_list); + } + return (ch); +} + +static size_t +image_chunk_grow(struct chunk *ch, size_t sz) +{ + size_t dsz, newsz; + + newsz = ch->ch_size + sz; + if (newsz > ch->ch_size) { + ch->ch_size = newsz; + return (0); + } + /* We would overflow -- create new chunk for remainder. */ + dsz = SIZE_MAX - ch->ch_size; + assert(dsz < sz); + ch->ch_size = SIZE_MAX; + return (sz - dsz); +} + +static struct chunk * +image_chunk_memory(struct chunk *ch, lba_t blk) +{ + struct chunk *new; + void *ptr; + + ptr = calloc(1, secsz); + if (ptr == NULL) + return (NULL); + + if (ch->ch_block < blk) { + new = malloc(sizeof(*new)); + if (new == NULL) { + free(ptr); + return (NULL); + } + memcpy(new, ch, sizeof(*new)); + ch->ch_size = (blk - ch->ch_block) * secsz; + new->ch_block = blk; + new->ch_size -= ch->ch_size; + STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); + image_nchunks++; + ch = new; + } + + if (ch->ch_size > secsz) { + new = malloc(sizeof(*new)); + if (new == NULL) { + free(ptr); + return (NULL); + } + memcpy(new, ch, sizeof(*new)); + ch->ch_size = secsz; + new->ch_block++; + new->ch_size -= secsz; + STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); + image_nchunks++; + } + + ch->ch_type = CH_TYPE_MEMORY; + ch->ch_u.mem.ptr = ptr; + return (ch); +} + +static int +image_chunk_skipto(lba_t to) +{ + struct chunk *ch; + lba_t from; + size_t sz; + + ch = STAILQ_LAST(&image_chunks, chunk, ch_list); + from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL; + + assert(from <= to); + + /* Nothing to do? */ + if (from == to) + return (0); + /* Avoid bugs due to overflows. */ + if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz)) + return (EFBIG); + sz = (to - from) * secsz; + if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) { + sz = image_chunk_grow(ch, sz); + if (sz == 0) + return (0); + from = ch->ch_block + (ch->ch_size / secsz); + } + ch = malloc(sizeof(*ch)); + if (ch == NULL) + return (ENOMEM); + memset(ch, 0, sizeof(*ch)); + ch->ch_block = from; + ch->ch_size = sz; + ch->ch_type = CH_TYPE_ZEROES; + STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); + image_nchunks++; + return (0); +} + +static int +image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd) +{ + struct chunk *ch; + + ch = STAILQ_LAST(&image_chunks, chunk, ch_list); + if (ch != NULL && ch->ch_type == CH_TYPE_FILE) { + if (fd == ch->ch_u.file.fd && + blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) && + ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) { + sz = image_chunk_grow(ch, sz); + if (sz == 0) + return (0); + blk = ch->ch_block + (ch->ch_size / secsz); + ofs = ch->ch_u.file.ofs + ch->ch_size; + } + } + ch = malloc(sizeof(*ch)); + if (ch == NULL) + return (ENOMEM); + memset(ch, 0, sizeof(*ch)); + ch->ch_block = blk; + ch->ch_size = sz; + ch->ch_type = CH_TYPE_FILE; + ch->ch_u.file.ofs = ofs; + ch->ch_u.file.fd = fd; + STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); + image_nchunks++; + return (0); +} + +static int +image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd) +{ + uint8_t *p = buf; + int error; + + error = 0; + sz = (sz + secsz - 1) & ~(secsz - 1); + while (!error && sz > 0) { + if (is_empty_sector(p)) + error = image_chunk_skipto(blk + 1); + else + error = image_chunk_append(blk, secsz, ofs, fd); + blk++; + p += secsz; + sz -= secsz; + ofs += secsz; + } + return (error); +} + +/* + * File mapping support. + */ + +static void * +image_file_map(int fd, off_t ofs, size_t sz) +{ + void *ptr; + size_t unit; + int flags, prot; + + unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; + assert((unit & (unit - 1)) == 0); + + flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED; + /* Allow writing to our swap file only. */ + prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0); + sz = (sz + unit - 1) & ~(unit - 1); + ptr = mmap(NULL, sz, prot, flags, fd, ofs); + return ((ptr == MAP_FAILED) ? NULL : ptr); +} + +static int +image_file_unmap(void *buffer, size_t sz) +{ + size_t unit; + + unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; + sz = (sz + unit - 1) & ~(unit - 1); + munmap(buffer, sz); + return (0); +} + +/* + * Input/source file handling. + */ + +static int +image_copyin_stream(lba_t blk, int fd, uint64_t *sizep) { char *buffer; uint64_t bytesize; - ssize_t bcnt, rdsz; - int error, partial; + off_t swofs; + size_t iosz; + ssize_t rdsz; + int error; - assert(BUFFER_SIZE % secsz == 0); + /* + * This makes sure we're doing I/O in multiples of the page + * size as well as of the sector size. 2MB is the minimum + * by virtue of secsz at least 512 bytes and the page size + * at least 4K bytes. + */ + iosz = secsz * image_swap_pgsz; - buffer = malloc(BUFFER_SIZE); - if (buffer == NULL) - return (ENOMEM); bytesize = 0; - partial = 0; - while (1) { - rdsz = read(fd, buffer, BUFFER_SIZE); - if (rdsz <= 0) { - error = (rdsz < 0) ? errno : 0; - break; - } - if (partial) - abort(); - bytesize += rdsz; - bcnt = (rdsz + secsz - 1) / secsz; - error = image_write(blk, buffer, bcnt); + do { + swofs = image_swap_alloc(iosz); + if (swofs == -1LL) + return (errno); + buffer = image_file_map(image_swap_fd, swofs, iosz); + if (buffer == NULL) + return (errno); + rdsz = read(fd, buffer, iosz); + if (rdsz > 0) + error = image_chunk_copyin(blk, buffer, rdsz, swofs, + image_swap_fd); + else if (rdsz < 0) + error = errno; + else + error = 0; + image_file_unmap(buffer, iosz); + /* XXX should we relinguish unused swap space? */ if (error) + return (error); + + bytesize += rdsz; + blk += (rdsz + secsz - 1) / secsz; + } while (rdsz > 0); + + if (sizep != NULL) + *sizep = bytesize; + return (0); +} + +static int +image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) +{ + off_t cur, data, end, hole, pos; + void *buf; + uint64_t bytesize; + size_t iosz, sz; + int error; + + /* + * We'd like to know the size of the file and we must + * be able to seek in order to mmap(2). If this isn't + * possible, then treat the file as a stream/pipe. + */ + end = lseek(fd, 0L, SEEK_END); + if (end == -1L) + return (image_copyin_stream(blk, fd, sizep)); + + /* + * We need the file opened for the duration and our + * caller is going to close the file. Make a dup(2) + * so that control the faith of the descriptor. + */ + fd = dup(fd); + if (fd == -1) + return (errno); + + iosz = secsz * image_swap_pgsz; + + bytesize = 0; + cur = pos = 0; + error = 0; + while (!error && cur < end) { + hole = lseek(fd, cur, SEEK_HOLE); + data = lseek(fd, cur, SEEK_DATA); + + /* + * Treat the entire file as data if sparse files + * are not supported by the underlying file system. + */ + if (hole == -1 && data == -1) { + data = cur; + hole = end; + } + + if (cur == hole && data > hole) { + hole = pos; + pos = data & ~((uint64_t)secsz - 1); + + blk += (pos - hole) / secsz; + error = image_chunk_skipto(blk); + + bytesize += pos - hole; + cur = data; + } else if (cur == data && hole > data) { + data = pos; + pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1); + + while (data < pos) { + sz = (pos - data > (off_t)iosz) + ? iosz : (size_t)(pos - data); + + buf = image_file_map(fd, data, sz); + if (buf != NULL) { + error = image_chunk_copyin(blk, buf, + sz, data, fd); + image_file_unmap(buf, sz); + } else + error = errno; + + blk += sz / secsz; + bytesize += sz; + data += sz; + } + cur = hole; + } else { + /* + * I don't know what this means or whether it + * can happen at all... + */ + error = EDOOFUS; break; - blk += bcnt; - partial = ((ssize_t)(bcnt * secsz) != rdsz) ? 1 : 0; + } } - free(buffer); - if (sizep != NULL) + if (error) + close(fd); + if (!error && sizep != NULL) *sizep = bytesize; return (error); } int +image_copyin(lba_t blk, int fd, uint64_t *sizep) +{ + struct stat sb; + int error; + + error = image_chunk_skipto(blk); + if (!error) { + if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) + error = image_copyin_stream(blk, fd, sizep); + else + error = image_copyin_mapped(blk, fd, sizep); + } + return (error); +} + +/* + * Output/sink file handling. + */ + +int image_copyout(int fd) { int error; @@ -115,71 +506,124 @@ image_copyout_done(int fd) return (error); } -int -image_copyout_region(int fd, lba_t blk, lba_t size) +static int +image_copyout_memory(int fd, size_t size, void *ptr) { - char *buffer; - off_t ofs; + + if (write(fd, ptr, size) == -1) + return (errno); + return (0); +} + +static int +image_copyout_zeroes(int fd, size_t size) +{ + static uint8_t *zeroes = NULL; size_t sz; - ssize_t rdsz, wrsz; int error; - ofs = lseek(fd, 0L, SEEK_CUR); + if (lseek(fd, (off_t)size, SEEK_CUR) != -1) + return (0); + + /* + * If we can't seek, we must write. + */ + + if (zeroes == NULL) { + zeroes = calloc(1, secsz); + if (zeroes == NULL) + return (ENOMEM); + } + + while (size > 0) { + sz = (size > secsz) ? secsz : size; + error = image_copyout_memory(fd, sz, zeroes); + if (error) + return (error); + size -= sz; + } + return (0); +} + +static int +image_copyout_file(int fd, size_t size, int ifd, off_t iofs) +{ + void *buf; + size_t iosz, sz; + int error; + + iosz = secsz * image_swap_pgsz; + + while (size > 0) { + sz = (size > iosz) ? iosz : size; + buf = image_file_map(ifd, iofs, sz); + if (buf == NULL) + return (errno); + error = image_copyout_memory(fd, sz, buf); + image_file_unmap(buf, sz); + if (error) + return (error); + size -= sz; + iofs += sz; + } + return (0); +} + +int +image_copyout_region(int fd, lba_t blk, lba_t size) +{ + struct chunk *ch; + size_t ofs, sz; + int error; - blk *= secsz; - if (lseek(image_fd, blk, SEEK_SET) != blk) - return (errno); - buffer = malloc(BUFFER_SIZE); - if (buffer == NULL) - return (errno); - error = 0; size *= secsz; + while (size > 0) { - sz = (BUFFER_SIZE < size) ? BUFFER_SIZE : size; - rdsz = read(image_fd, buffer, sz); - if (rdsz <= 0) { - error = (rdsz < 0) ? errno : 0; + ch = image_chunk_find(blk); + if (ch == NULL) + return (EINVAL); + ofs = (blk - ch->ch_block) * secsz; + sz = ch->ch_size - ofs; + sz = ((lba_t)sz < size) ? sz : (size_t)size; + switch (ch->ch_type) { + case CH_TYPE_ZEROES: + error = image_copyout_zeroes(fd, sz); break; - } - wrsz = (ofs == -1) ? - write(fd, buffer, rdsz) : - sparse_write(fd, buffer, rdsz); - if (wrsz < 0) { - error = errno; + case CH_TYPE_FILE: + error = image_copyout_file(fd, sz, ch->ch_u.file.fd, + ch->ch_u.file.ofs + ofs); + break; + case CH_TYPE_MEMORY: + error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr); break; + default: + return (EDOOFUS); } - assert(wrsz == rdsz); - size -= rdsz; + size -= sz; + blk += sz / secsz; } - free(buffer); - return (error); + return (0); } int image_data(lba_t blk, lba_t size) { - char *buffer, *p; - - blk *= secsz; - if (lseek(image_fd, blk, SEEK_SET) != blk) - return (1); + struct chunk *ch; + lba_t lim; - size *= secsz; - buffer = malloc(size); - if (buffer == NULL) - return (1); - - if (read(image_fd, buffer, size) != (ssize_t)size) { - free(buffer); - return (1); + while (1) { + ch = image_chunk_find(blk); + if (ch == NULL) + return (0); + if (ch->ch_type != CH_TYPE_ZEROES) + return (1); + lim = ch->ch_block + (ch->ch_size / secsz); + if (lim >= blk + size) + return (0); + size -= lim - blk; + blk = lim; } - - p = buffer; - while (size > 0 && *p == '\0') - size--, p++; - - free(buffer); - return ((size == 0) ? 0 : 1); + /*NOTREACHED*/ } lba_t @@ -192,39 +636,87 @@ image_get_size(void) int image_set_size(lba_t blk) { + int error; - image_size = blk; - if (ftruncate(image_fd, blk * secsz) == -1) - return (errno); - return (0); + error = image_chunk_skipto(blk); + if (!error) + image_size = blk; + return (error); } int image_write(lba_t blk, void *buf, ssize_t len) { + struct chunk *ch; - blk *= secsz; - if (lseek(image_fd, blk, SEEK_SET) != blk) - return (errno); - len *= secsz; - if (sparse_write(image_fd, buf, len) != len) - return (errno); + while (len > 0) { + if (!is_empty_sector(buf)) { + ch = image_chunk_find(blk); + if (ch == NULL) + return (ENXIO); + /* We may not be able to write to files. */ + if (ch->ch_type == CH_TYPE_FILE) + return (EINVAL); + if (ch->ch_type == CH_TYPE_ZEROES) { + ch = image_chunk_memory(ch, blk); + if (ch == NULL) + return (ENOMEM); + } + assert(ch->ch_type == CH_TYPE_MEMORY); + memcpy(ch->ch_u.mem.ptr, buf, secsz); + } + blk++; + buf = (char *)buf + secsz; + len--; + } return (0); } +static void +image_cleanup(void) +{ + struct chunk *ch; + + while ((ch = STAILQ_FIRST(&image_chunks)) != NULL) { + switch (ch->ch_type) { + case CH_TYPE_FILE: + /* We may be closing the same file multiple times. */ + if (ch->ch_u.file.fd != -1) + close(ch->ch_u.file.fd); + break; + case CH_TYPE_MEMORY: + free(ch->ch_u.mem.ptr); + break; + default: + break; + } + STAILQ_REMOVE_HEAD(&image_chunks, ch_list); + free(ch); + } + if (image_swap_fd != -1) + close(image_swap_fd); + unlink(image_swap_file); +} + int image_init(void) { const char *tmpdir; - if (atexit(cleanup) == -1) + STAILQ_INIT(&image_chunks); + image_nchunks = 0; + + image_swap_size = 0; + image_swap_pgsz = getpagesize(); + + if (atexit(image_cleanup) == -1) return (errno); if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') tmpdir = _PATH_TMP; - snprintf(image_tmpfile, sizeof(image_tmpfile), "%s/mkimg-XXXXXX", + snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX", tmpdir); - image_fd = mkstemp(image_tmpfile); - if (image_fd == -1) + image_swap_fd = mkstemp(image_swap_file); + if (image_swap_fd == -1) return (errno); return (0); }