Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 29 Jun 2016 23:34:58 -0700
From:      Julian Hsiao <julian@hsiao.email>
To:        freebsd-hackers@freebsd.org
Subject:   ggatel(8) extension for binding multiple files
Message-ID:  <nl2eii$ukl$1@ger.gmane.org>

next in thread | raw e-mail | index | archive | help
Hi,

I've been working on extending ggatel(8) to support binding multiple files
to one device, similar to how sparse bundles work on OS X.  You could
simulate the functionality with ggatel(8) / md(4) + gconcat(8), but this
scales poorly when you have 10^5 files.  I've got a working prototype that
passes some simple tests I wrote.  I also tried using it as backing store
for UFS and ZFS, copied /usr/{src,obj} over, and ran make build world (with
ZFS followed by zpool scrub).

To use it, instead of passing a file to ggatel(8) as the last argument,
pass a directory with files numerically named in hex (i.e. 0, 1, 2 ..., 9,
a, b, ..., e, f, 10, 11, ...).

So, I think it's a good time to gauge the community's interest in this
feature and whether it'd be possible to merge it to trunk.

Known issues:
  - I reserve an address space the size of the device, so on 32-bit systems
    you can't create devices bigger than a few GiB.
  - Similarly, the minimum sector size is most likely 4 kiB.
  - At most 100 files are mapped simultaneously, and the eviction algorithm
    is random replacement.
  - I use alloca(3) instead of malloc(3) in map_bundle() because using the
    latter causes incorrect behavior somehow.  It's probably buffer
    overruns and / or UBs somewhere in my code.
  - Both ggatel(8) and md(4) implement BIO_DELETE by zeroing the requested
    range.  However, this interacts poorly with ZFS since it BIO_DELETEs
    the entire device at pool creation.  I know there is a ZFS sysctl to
    disable the behavior, but I think the device should just not advertise
    support if it had to fake it anyway, so I didn't implement it.

There are definitely many more issues than ones listed above, so any
feedback are welcome.

Thanks.

Julian Hsiao

Index: sbin/ggate/ggatel/Makefile
===================================================================
diff --git a/stable/10/sbin/ggate/ggatel/Makefile 
b/stable/10/sbin/ggate/ggatel/Makefile
--- a/stable/10/sbin/ggate/ggatel/Makefile	(revision 301921)
+++ b/stable/10/sbin/ggate/ggatel/Makefile	(working copy)
@@ -4,7 +4,7 @@

 PROG=	ggatel
 MAN=	ggatel.8
-SRCS=	ggatel.c ggate.c
+SRCS=	ggatel.c ggatel2.c ggate.c

 CFLAGS+= -DLIBGEOM
 CFLAGS+= -I${.CURDIR}/../shared
Index: sbin/ggate/ggatel/ggatel.c
===================================================================
diff --git a/stable/10/sbin/ggate/ggatel/ggatel.c 
b/stable/10/sbin/ggate/ggatel/ggatel.c
--- a/stable/10/sbin/ggate/ggatel/ggatel.c	(revision 301921)
+++ b/stable/10/sbin/ggate/ggatel/ggatel.c	(working copy)
@@ -46,6 +46,8 @@
 #include <geom/gate/g_gate.h>
 #include "ggate.h"

+int check_divs(const char *const, unsigned int *const, size_t *const, 
size_t *const);
+void g_gatel_serve_bundle(const int, const size_t, const size_t, const 
size_t, const int);

 static enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;

@@ -168,17 +170,39 @@
 g_gatel_create(void)
 {
 	struct g_gate_ctl_create ggioc;
-	int fd;
+	int fd, isdir = -1;
+	size_t div_size, num_divs;

 	fd = open(path, g_gate_openflags(flags) | O_DIRECT | O_FSYNC);
-	if (fd == -1)
-		err(EXIT_FAILURE, "Cannot open %s", path);
+	if (fd == -1) {
+		if (errno == EISDIR) {
+			isdir = 1;
+		} else {
+			err(EXIT_FAILURE, "Cannot open %s", path);
+		}
+	} else {
+		struct stat sb;
+		if (fstat(fd, &sb) == -1) {
+			err(EXIT_FAILURE, "stat(%s) failed", path);
+		}
+		isdir = S_ISDIR(sb.st_mode);
+	}
+	assert(isdir != -1);
+
 	memset(&ggioc, 0, sizeof(ggioc));
 	ggioc.gctl_version = G_GATE_VERSION;
 	ggioc.gctl_unit = unit;
-	ggioc.gctl_mediasize = g_gate_mediasize(fd);
-	if (sectorsize == 0)
-		sectorsize = g_gate_sectorsize(fd);
+	if (isdir) {
+		if (fd != -1 && close(fd) == -1) {
+			err(EXIT_FAILURE, "close(%s) failed", path);
+		}	
+		fd = check_divs(path, &sectorsize, &div_size, &num_divs);
+		ggioc.gctl_mediasize = (off_t) div_size * num_divs;
+	} else {
+		ggioc.gctl_mediasize = g_gate_mediasize(fd);
+		if (sectorsize == 0)
+			sectorsize = g_gate_sectorsize(fd);
+	}
 	ggioc.gctl_sectorsize = sectorsize;
 	ggioc.gctl_timeout = timeout;
 	ggioc.gctl_flags = flags;
@@ -188,7 +212,11 @@
 	if (unit == -1)
 		printf("%s%u\n", G_GATE_PROVIDER_NAME, ggioc.gctl_unit);
 	unit = ggioc.gctl_unit;
-	g_gatel_serve(fd);
+	if (isdir) {
+		g_gatel_serve_bundle(fd, sectorsize, div_size, num_divs, unit);
+	} else {
+		g_gatel_serve(fd);
+	}
 }

 static void
Index: sbin/ggate/ggatel/ggatel2.c
===================================================================
diff --git a/stable/10/sbin/ggate/ggatel/ggatel2.c 
b/stable/10/sbin/ggate/ggatel/ggatel2.c
new file mode 10644
--- /dev/null	(nonexistent)
+++ b/stable/10/sbin/ggate/ggatel/ggatel2.c	(working copy)
@@ -0,0 +1,638 @@
+#include <math.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdint.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include <err.h>
+#include <time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/bio.h>
+#include <sys/disk.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/syslog.h>
+
+#include <geom/gate/g_gate.h>
+#include "ggate.h"
+
+/* ======== */
+
+//#pragma clang diagnostic push
+//#pragma clang diagnostic error "-Weverything"
+
+/* ======== */
+
+int check_divs(const char *const, unsigned int *const, size_t *const, 
size_t *const);
+void g_gatel_serve_bundle(const int, const unsigned int, const size_t, 
const size_t, const int);
+
+/* ======== */
+
+static void
+g_gate_verbose_log(const int v, const int p, const char *const m, ...)
+{
+    if (g_gate_verbose >= v) {
+        va_list ap;
+        va_start(ap, m);
+        g_gate_vlog(p, m, ap);
+        va_end(ap);
+    }
+}
+
+#ifdef NDEBUG
+__attribute__((unused))
+#endif
+static inline bool
+mul_overflow(const size_t a, const size_t b)
+{
+    return(a != 0 && (a * b) / a != b);
+}
+
+static unsigned int
+MINDIV_SIZE(void)
+{
+    static unsigned int ps;
+    if (ps == 0) {
+        const long ps2 = sysconf(_SC_PAGESIZE);
+        static_assert(sizeof(size_t) >= sizeof(long), "");
+        assert(ps2 > 0);
+        assert(ps2 <= UINT_MAX);
+        ps = (unsigned int) ps2;
+    }
+    return(ps);
+}
+
+static size_t
+DIV_NAME_BUFSIZE(void)
+{
+    static size_t dnbs;
+    if (dnbs == 0) {
+        dnbs = (size_t) (ceil(log(SIZE_MAX) / log(16)));
+        ++dnbs;
+        dnbs *= sizeof(char);
+
+    }
+    return(dnbs);
+}
+
+/* ======== */
+
+static void
+numtohexstr(const size_t num, char *const buf, const size_t buflen)
+{
+#ifdef NDEBUG
+    __attribute__((unused))
+#endif
+    const int r = snprintf(buf, buflen, "%zx", num);
+    assert(r > 0);
+    assert((unsigned int) r < buflen);
+}
+
+int
+check_divs(const char *const bundle, unsigned int *const blk_size,
+    size_t *const div_size, size_t *const num_divs)
+{
+    assert(blk_size != NULL);
+    assert(div_size != NULL);
+    assert(num_divs != NULL);
+
+    int dfd;
+    if ((dfd = open(bundle, O_RDONLY | O_DIRECTORY | O_CLOEXEC)) == -1) {
+        err(5, "open(%s) failed", bundle);
+    }
+    char *buf = malloc(DIV_NAME_BUFSIZE());
+    if (buf == NULL) {
+        err(5, "malloc(DIV_NAME_BUFSIZE) failed");
+    }
+
+    for (*num_divs = 0; *num_divs < SIZE_MAX; ++(*num_divs)) {
+        int fd;
+        struct stat sb = { .st_dev = 0 };
+
+        numtohexstr(*num_divs, buf, DIV_NAME_BUFSIZE());
+        if ((fd = openat(dfd, buf, O_RDONLY | O_CLOEXEC)) == -1) {
+            if (errno == ENOENT) {
+                break;
+            }
+            err(5, "open(%s/%s) failed", bundle, buf);
+        }
+        if (fstat(fd, &sb) == -1) {
+            err(5, "fstat(%s/%s) failed", bundle, buf);
+        }
+
+        if (S_ISCHR(sb.st_mode)) {
+            if (ioctl(fd, DIOCGMEDIASIZE, &sb.st_size) == -1) {
+                err(5, "ioctl(%s/%s, DIOCGMEDIASIZE) failed", bundle, buf);
+            }
+
+            unsigned int bs;
+            if (ioctl(fd, DIOCGSECTORSIZE, &bs) == -1) {
+                err(5, "ioctl(%s/%s, DIOCGSECTORSIZE) failed", bundle, buf);
+            }
+            if (*blk_size == 0) {
+                *blk_size = bs;
+            } else if (*blk_size != bs) {
+                errx(5, "sector size of %s/%s (%u bytes) is not the same as "
+                        "requested size or that of other divs (%u bytes).",
+                    bundle, buf, bs, *blk_size);
+            }
+        } else if (!S_ISREG(sb.st_mode)) {
+            errx(5, "%s/%s must be a file or character device.", bundle, buf);
+        }
+
+        if (close(fd) == -1) {
+            err(5, "close(%s/%s) failed", bundle, buf);
+        }
+
+        assert(sb.st_size > 0);
+        static_assert(sizeof(size_t) >= sizeof(sb.st_size), "");
+        const size_t st_size = (size_t) sb.st_size;
+
+        if (st_size < MINDIV_SIZE()) {
+            errx(5, "size of %s/%s is less than %u bytes.",
+                bundle, buf, MINDIV_SIZE());
+        }
+        if (st_size % MINDIV_SIZE() != 0) {
+            errx(5, "size of %s/%s is not a multiple of %u.",
+                bundle, buf, MINDIV_SIZE());
+        }
+
+        if (*num_divs == 0) {
+            *div_size = st_size;
+        } else if (st_size != *div_size) {
+            errx(5, "%s/%s is not the same size as other divs (%zu bytes).",
+                bundle, buf, *div_size);
+        }
+    }
+
+    if (*num_divs == 0) {
+        errx(5, "No divs found in %s.", bundle);
+    }
+
+    *blk_size = (*blk_size == 0) ? MINDIV_SIZE() : *blk_size;
+    if (*blk_size < MINDIV_SIZE()) {
+        errx(5, "sector size must be at least %u bytes.", MINDIV_SIZE());
+    }
+    if (*blk_size % MINDIV_SIZE() != 0) {
+        errx(5, "sector size must be a multiple of %u bytes.", MINDIV_SIZE());
+    }
+    if (*blk_size > *div_size) {
+        errx(5, "sector size cannot be greater than div size (%zu bytes).",
+            *div_size);
+    }
+
+    struct g_gate_ctl_create ggcc = { .gctl_mediasize = 0 };
+    static_assert(sizeof(long) == sizeof(ggcc.gctl_mediasize), "");
+    assert(!mul_overflow(*div_size, *num_divs));
+    assert(*div_size * *num_divs <= LONG_MAX);
+    static_assert(sizeof(unsigned int) == sizeof(ggcc.gctl_sectorsize), "");
+    g_gate_verbose_log(1, LOG_DEBUG, "blk_size = %u", *blk_size);
+    g_gate_verbose_log(1, LOG_DEBUG, "div_size = %zu", *div_size);
+    g_gate_verbose_log(1, LOG_DEBUG, "num_divs = %zu", *num_divs);
+
+    free(buf);
+    return(dfd);
+}
+
+static void
+map_fd(void *const addr, const int fd,
+#ifdef NDEBUG
+    __attribute__((unused))
+#endif
+    const size_t div_size)
+{
+    assert(div_size != 0);
+
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        err(1, "fstat() failed");
+    }
+
+    assert(sb.st_size > 0);
+    static_assert(sizeof(size_t) >= sizeof(sb.st_size), "");
+    const size_t st_size = (size_t) sb.st_size;
+    assert(st_size == div_size);
+    assert(st_size % MINDIV_SIZE() == 0);
+
+    void *m;
+    const int prot = PROT_READ | PROT_WRITE;
+    const int flags = MAP_SHARED | MAP_FIXED | MAP_NOCORE/* | MAP_NOSYNC*/;
+    if ((m = mmap(addr, st_size, prot, flags, fd, 0)) == MAP_FAILED) {
+        err(1, "mmap() failed");
+    }
+}
+
+static void
+map_bundle(const uintptr_t base, const uintptr_t addr, const size_t div_size,
+    const int bundlefd)
+{
+    assert(base % MINDIV_SIZE() == 0);
+    assert(addr % MINDIV_SIZE() == 0);
+    assert(addr >= base);
+    assert((addr - base) % MINDIV_SIZE() == 0);
+
+    int divfd;
+    // FIXME
+/*
+    char *div = malloc(DIV_NAME_BUFSIZE());
+    assert(div != NULL);
+ */
+    char *div = alloca(DIV_NAME_BUFSIZE());
+
+    numtohexstr((addr - base) / div_size, div, DIV_NAME_BUFSIZE());
+
+    g_gate_verbose_log(3, LOG_DEBUG,
+        "->   [ 0x%09" PRIxPTR ", 0x%09" PRIxPTR " ): 0x%lx; %s",
+        addr, addr + div_size, div_size, div);
+
+    if ((divfd = openat(bundlefd, div, O_RDWR | O_CLOEXEC)) == -1) {
+        err(6, "open(%s) failed", div);
+    }
+    map_fd((void *) addr, divfd, div_size);
+    if (close(divfd) == -1) {
+        err(6, "close(%s) failed", div);
+    }
+
+/*     free(div); */
+}
+
+static void *
+resv_vaddr(void *const addr, const size_t len)
+{
+    void *p;
+    const int prot = PROT_NONE;
+    const int flags = MAP_PRIVATE | MAP_ANON |
+        ((addr == NULL) ? 0 : MAP_FIXED);
+    if ((p = mmap(addr, len, prot, flags, -1, 0)) == MAP_FAILED) {
+        err(2, "mmap() failed");
+    }
+
+    return(p);
+}
+
+/* ======== */
+
+static sigjmp_buf memfault_env;
+static siginfo_t memfault_info;
+
+__attribute__((noreturn)) static void
+memfault_hdl(int sig, siginfo_t *info, __attribute__((unused)) void *uap)
+{
+    memfault_info = *info;
+    siglongjmp(memfault_env, sig);
+}
+
+static void
+install_memfault_hdl()
+{
+    struct sigaction a = {
+        .sa_sigaction = memfault_hdl,
+        .sa_flags = SA_SIGINFO
+    };
+    if (sigaction(SIGSEGV, &a, NULL) == -1) {
+        err(3, "sigaction(SIGSEGV) failed");
+    }
+    struct sigaction b = {
+        .sa_sigaction = memfault_hdl,
+        .sa_flags = SA_SIGINFO
+    };
+    if (sigaction(SIGBUS, &b, NULL) == -1) {
+        err(3, "sigaction(SIGBUS) failed");
+    }
+
+    // Use uncatchable signal as memfault_hdl installed flag
+    memfault_info.si_signo = SIGKILL;
+}
+
+static void
+check_expected_memfault(const void *const as, const void *const ae)
+{
+    if (memfault_info.si_signo == SIGKILL) {
+        errx(4, "memfault_info not initialized!");
+    } else if (memfault_info.si_signo != SIGSEGV &&
+               memfault_info.si_signo != SIGBUS) {
+        errx(4, "unexpected %s", strsignal(memfault_info.si_signo));
+    } else if (
+        !((as <= memfault_info.si_addr) && (memfault_info.si_addr < ae))
+    ) {
+        errx(4, "unexpected address %p", memfault_info.si_addr);
+    }
+}
+
+/* ======== */
+
+static void
+msync2(const void *const a, const size_t n, const int f)
+{
+    const uintptr_t b = (uintptr_t) a;
+    const size_t    m = b % MINDIV_SIZE();
+    const uintptr_t c = b - m;
+    const size_t   nn = n + m;
+
+    g_gate_verbose_log(3, LOG_DEBUG,
+        " msync(0x%09" PRIxPTR ",      0x%08lx)", c, nn);
+    if (nn > 0 && msync((void *) c, nn, f) == -1) {
+        err(8, "msync() failed");
+    }
+}
+
+__attribute__((unused)) static void *
+memcpy_msync(void *const d, const void *const s, const size_t n)
+{
+    const uintptr_t dd = (uintptr_t) d;
+    g_gate_verbose_log(3, LOG_DEBUG,
+        "memcpy(0x%09" PRIxPTR ", ..., 0x%08lx)", dd, n);
+
+    memcpy(d, s, n);
+    msync2(d, n, MS_SYNC);
+    return(d);
+}
+
+/* ======== */
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+struct bundle_spec {
+    size_t resv;
+    uintptr_t as;
+    uintptr_t ae;
+
+    int bundlefd;
+    size_t div_size;
+    size_t num_divs;
+    unsigned int blk_size;
+};
+#pragma clang diagnostic pop
+
+#define mapped_addrs_size 100
+static void *mapped_addrs[mapped_addrs_size];
+
+static void
+do_read(const struct bundle_spec *const bspec,
+    struct g_gate_ctl_io *const ggio)
+{
+    static_assert(sizeof(ggio->gctl_length) <= sizeof(size_t), "");
+    static_assert(sizeof(ggio->gctl_offset) <= sizeof(uintptr_t), "");
+    assert(ggio->gctl_length >= 0);
+    assert(ggio->gctl_offset >= 0);
+
+    assert(memfault_info.si_signo == SIGKILL);
+
+    assert(!mul_overflow(bspec->div_size, mapped_addrs_size));
+    const size_t max_len = bspec->div_size * mapped_addrs_size;
+    if ((size_t) ggio->gctl_length > max_len) {
+        ggio->gctl_error = ENOMEM;
+        return;
+    }
+
+    const uintptr_t a = bspec->as + (uintptr_t) ggio->gctl_offset;
+    assert(a >= bspec->as);
+    assert(a < bspec->ae);
+    const uintptr_t b = a + (uintptr_t) ggio->gctl_length;
+    assert(b <= bspec->ae);
+    const size_t m = ((size_t) ggio->gctl_offset) % bspec->div_size;
+
+    for (;;) {
+        volatile uintptr_t c = (volatile uintptr_t) NULL;
+        if (sigsetjmp(memfault_env, 1) == 0) {
+            for (c = a - m; c < b; c += bspec->div_size) {
+                static_assert(CHAR_BIT == 8, "");
+                volatile unsigned char *d1 = (volatile unsigned char *) c;
+                __attribute__((unused)) volatile unsigned char d2 = *d1;
+            }
+            break;
+        } else {
+            check_expected_memfault((void *) bspec->as, (void *) bspec->ae);
+            const uintptr_t si_addr = (uintptr_t) memfault_info.si_addr;
+            memfault_info.si_signo = SIGKILL;
+
+            assert((void *) c != NULL);
+            assert(si_addr == c);
+
+            // UB??
+            assert(bspec->as <= si_addr);
+            const size_t n = (si_addr - bspec->as) % bspec->div_size;
+            const uintptr_t d = si_addr - n;
+            assert(d <= si_addr);
+            assert(si_addr < d + 2 * bspec->div_size);
+
+            assert(d >= bspec->as);
+            assert(d < bspec->ae);
+            assert((d - bspec->as) % bspec->div_size == 0);
+
+            // Too lazy to do LRU
+            assert(mapped_addrs_size <= UINT_MAX);
+            const size_t i = arc4random_uniform(
+                (unsigned int) mapped_addrs_size);
+            if (mapped_addrs[i] != NULL) {
+                resv_vaddr(mapped_addrs[i], bspec->div_size);
+
+                const uintptr_t mai = (uintptr_t) mapped_addrs[i];
+                g_gate_verbose_log(3, LOG_DEBUG,
+                    "<-   [ 0x%09" PRIxPTR ", 0x%09" PRIxPTR " ): 0x%lx",
+                    mai, mai + bspec->div_size, bspec->div_size);
+            }
+            mapped_addrs[i] = (void *) d;
+            assert(d % bspec->blk_size == 0);
+            map_bundle(bspec->as, d, bspec->div_size, bspec->bundlefd);
+        }
+    }
+
+    g_gate_verbose_log(2, LOG_DEBUG, "do_read(0x%lx, 0x%lx)",
+        ggio->gctl_offset, ggio->gctl_length);
+    ggio->gctl_data = (void *) a;
+    ggio->gctl_error = 0;
+}
+
+static void
+do_write(const struct bundle_spec *const bspec,
+    struct g_gate_ctl_io *const ggio)
+{
+    static_assert(sizeof(ggio->gctl_length) <= sizeof(size_t), "");
+    static_assert(sizeof(ggio->gctl_offset) <= sizeof(uintptr_t), "");
+    assert(ggio->gctl_length >= 0);
+    assert(ggio->gctl_offset >= 0);
+
+    assert(memfault_info.si_signo == SIGKILL);
+    assert(((size_t) ggio->gctl_length) % bspec->blk_size == 0);
+
+    uintptr_t d = bspec->as + (uintptr_t) ggio->gctl_offset;
+    assert(d >= bspec->as);
+    assert(d < bspec->ae);
+    uintptr_t s = (uintptr_t) ggio->gctl_data;
+    size_t len = (size_t) ggio->gctl_length;
+
+    for (;;) {
+        if (sigsetjmp(memfault_env, 1) == 0) {
+            g_gate_verbose_log(3, LOG_DEBUG,
+                "memcpy(0x%09" PRIxPTR ", ..., 0x%08lx)", d, len);
+            /*for (size_t i = 0; i < len; i += bspec->blk_size) {
+                memcpy((void *) (d + i), (void *) (s + i), bspec->blk_size);
+            }*/
+            memcpy((void *) d, (void *) s, len);
+            break;
+        } else {
+            check_expected_memfault((void *) bspec->as, (void *) bspec->ae);
+            const uintptr_t si_addr = (uintptr_t) memfault_info.si_addr;
+            memfault_info.si_signo = SIGKILL;
+
+            // UB??
+            assert(bspec->as <= si_addr);
+            const size_t m = (si_addr - bspec->as) % bspec->div_size;
+            const uintptr_t a = si_addr - m;
+            assert(a <= si_addr);
+            assert(si_addr < a + 2 * bspec->div_size);
+
+            // Yup, still lazy...
+            assert(mapped_addrs_size <= UINT_MAX);
+            const size_t i = arc4random_uniform(
+                (unsigned int) mapped_addrs_size);
+            if (mapped_addrs[i] != NULL) {
+                resv_vaddr(mapped_addrs[i], bspec->div_size);
+
+                const uintptr_t mai = (uintptr_t) mapped_addrs[i];
+                g_gate_verbose_log(3, LOG_DEBUG,
+                    "<-   [ 0x%09" PRIxPTR ", 0x%09" PRIxPTR " ): 0x%lx",
+                    mai, mai + bspec->div_size, bspec->div_size);
+            }
+            mapped_addrs[i] = (void *) a;
+            assert(a % bspec->blk_size == 0);
+            map_bundle(bspec->as, a, bspec->div_size, bspec->bundlefd);
+
+            // More UB??
+            assert(d <= si_addr);
+            assert(len >= si_addr - d);
+            len = len - (si_addr - d);
+            s = s + (si_addr - d);
+            d = si_addr;
+        }
+    }
+
+    g_gate_verbose_log(2, LOG_DEBUG, "do_write(0x%lx, 0x%lx)",
+        ggio->gctl_offset, ggio->gctl_length);
+    ggio->gctl_error = 0;
+}
+
+static void
+do_flush(const struct bundle_spec *const bspec,
+    __attribute__((unused)) const struct g_gate_ctl_io *const ggio)
+{
+    if (g_gate_verbose >= 4) {
+        for (size_t i = 0; i < mapped_addrs_size; ++i) {
+            const void *ma = mapped_addrs[i];
+            if (ma != NULL) {
+                g_gate_verbose_log(4, LOG_DEBUG,
+                    "0x%09" PRIxPTR, (uintptr_t) ma);
+            }
+        }
+    }
+    g_gate_verbose_log(2, LOG_DEBUG, "do_flush()");
+    msync2((void *) bspec->as, bspec->resv, MS_SYNC);
+}
+
+__attribute__((noreturn)) void
+g_gatel_serve_bundle(const int dfd_, const unsigned int ss_, const size_t ds_,
+    const size_t nd_, const int unit)
+{
+    freopen("/dev/null", "r", stdin);
+    if (g_gate_verbose == 0) {
+        if (daemon(0, 0) == -1) {
+            g_gate_destroy(unit, 1);
+            err(EXIT_FAILURE, "Cannot daemonize");
+        }
+        freopen("/dev/null", "w", stdout);
+        freopen("/dev/null", "w", stderr);
+    }
+    g_gate_verbose_log(1, LOG_DEBUG, "Worker created: %u.", getpid());
+
+    assert(dfd_ != -1);
+    assert(!mul_overflow(ds_, nd_));
+    struct bundle_spec bspec = {
+        .resv = ds_ * nd_,
+        .bundlefd = dfd_,
+        .div_size = ds_,
+        .num_divs = nd_,
+        .blk_size = ss_,
+    };
+    bspec.as = (uintptr_t) (resv_vaddr(NULL, bspec.resv));
+    bspec.ae = bspec.as + bspec.resv;
+
+    g_gate_verbose_log(1, LOG_DEBUG,
+        "[ 0x%09" PRIxPTR ", 0x%09" PRIxPTR "  ): 0x%lx",
+        bspec.as, bspec.ae, bspec.resv);
+
+    assert(bspec.blk_size > 0);
+    void *ggio_data_buf;
+    if ((ggio_data_buf = malloc(bspec.blk_size)) == NULL) {
+        err(EXIT_FAILURE, "malloc() failed");
+    }
+
+    install_memfault_hdl();
+
+    for (;;) {
+        struct g_gate_ctl_io ggio = {
+            .gctl_version = G_GATE_VERSION,
+            .gctl_unit = unit,
+            .gctl_data = (void *) ggio_data_buf,
+            .gctl_length = 262144
+        };
+
+        g_gate_ioctl(G_GATE_CMD_START, &ggio);
+
+        switch (ggio.gctl_error) {
+        case 0:
+            break;
+        case ECANCELED:
+            g_gate_close_device();
+            if (close(bspec.bundlefd) == -1) {
+                err(EXIT_FAILURE, "close() failed");
+            }
+            do_flush(&bspec, &ggio);
+            free(ggio_data_buf);
+            g_gate_verbose_log(1, LOG_DEBUG, "Finished.");
+            exit(EXIT_SUCCESS);
+        case ENOMEM:
+            assert(ggio.gctl_cmd == BIO_WRITE);
+            assert(ggio.gctl_length > 0);
+            static_assert(sizeof(ggio.gctl_length) <= sizeof(size_t), "");
+            if (realloc(ggio_data_buf, (size_t) ggio.gctl_length) == NULL) {
+                err(EXIT_FAILURE, "realloc() failed");
+            }
+            continue;
+        case ENXIO:
+        default:
+            g_gate_xlog("ioctl(/dev/%s): %s.", G_GATE_CTL_NAME,
+                strerror(ggio.gctl_error));
+        }
+
+        switch(ggio.gctl_cmd) {
+        case BIO_READ:
+            do_read(&bspec, &ggio);
+            break;
+        case BIO_WRITE:
+            do_write(&bspec, &ggio);
+            break;
+        case BIO_FLUSH:
+            do_flush(&bspec, &ggio);
+            break;
+        default:
+            g_gate_verbose_log(1, LOG_DEBUG, "unsupported: %d", 
ggio.gctl_cmd);
+            ggio.gctl_error = EOPNOTSUPP;
+            break;
+        }
+
+        g_gate_ioctl(G_GATE_CMD_DONE, &ggio);
+        g_gate_verbose_log(3, LOG_DEBUG, "========");
+    }
+}

Property changes on: stable/10/sbin/ggate/ggatel/ggatel2.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property





Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?nl2eii$ukl$1>