Date: Fri, 7 Mar 2014 22:53:29 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r262911 - projects/sendfile/sys/kern Message-ID: <201403072253.s27MrTEM093095@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Fri Mar 7 22:53:29 2014 New Revision: 262911 URL: http://svnweb.freebsd.org/changeset/base/262911 Log: Provide a hack to workaround the following condition: two sendfile(2) calls are issues on the same object, with ranges overlapping on at least one page. One of them grabs all pages, schedules I/O and returns. The second one encounters an overlapping page in a busy state. It sleeps with 'pgrbwt' wait channel, and sleep time is effectively the I/O time. Thus, second syscall is degenerating to the blocking on disk I/O mode. To avoid this, a sysctl kern.ipc.sendfile.pgrabnowait is provided (default to off). When sysctl is on and userland supplies SF_NODISKIO flag, then we are calling vm_page_grab(VM_ALLOC_NOWAIT). In case of a failure we return EAGAIN, hinting userland that it should continue monitor the socket via select/kevent/whatever. If the socket has zero data to send, then we have to notify it immediately, to avoid stall. Sponsored by: Netflix Sponsored by: Nginx, Inc. Modified: projects/sendfile/sys/kern/uipc_syscalls.c Modified: projects/sendfile/sys/kern/uipc_syscalls.c ============================================================================== --- projects/sendfile/sys/kern/uipc_syscalls.c Fri Mar 7 22:29:00 2014 (r262910) +++ projects/sendfile/sys/kern/uipc_syscalls.c Fri Mar 7 22:53:29 2014 (r262911) @@ -133,6 +133,10 @@ static int filt_sfsync(struct knote *kn, static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, "sendfile(2) tunables"); +static int sfpgrabnowait = 0; +SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, pgrabnowait, CTLFLAG_RW, + &sfpgrabnowait, 0, "Use VM_ALLOC_NOWAIT when SF_NODISKIO is requested"); + #ifdef SFSYNC_DEBUG static int sf_sync_debug = 0; SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW, @@ -2718,18 +2722,28 @@ sf_io_done(void *arg) } static int -sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len) +sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len, + int flags) { vm_page_t *pa = sfio->pa; int npages = sfio->npages; int nios, rv; nios = 0; + if (sfpgrabnowait && (flags & SF_NODISKIO)) + flags = VM_ALLOC_NOWAIT; + else + flags = 0; VM_OBJECT_WLOCK(obj); - for (int i = 0; i < npages; i++) + for (int i = 0; i < npages; i++) { pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)), - VM_ALLOC_WIRED | VM_ALLOC_NORMAL); + VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags); + if (pa[i] == NULL) { + npages = sfio->npages = i; + break; + } + } for (int i = 0; i < npages; i++) { int j, a; @@ -3079,7 +3093,37 @@ retry_space: refcount_init(&sfio->nios, 1); sfio->npages = npages; - nios = sendfile_swapin(obj, sfio, off, space); + nios = sendfile_swapin(obj, sfio, off, space, flags); + + if (sfio->npages != npages) { + /* + * sendfile_swapin() encountered a busy page, + * and was called with SF_NODISKIO. We don't + * return EBUSY, like old I/O blocking sendfile + * did, because situtation is different. No + * extra operation like read(2) or aio_read(2) + * is required from userland. We just need it + * to retry soonish. + * We rely on remote side ACKing our data to + * drive this timeout. And in the worst case, + * when we do not have data to send, we put + * the socket on the notification queue immediately. + */ + error = EAGAIN; + if (sfio->npages == 0 && hdrlen == 0) { + if (vp != NULL) + VOP_UNLOCK(vp, 0); + SOCKBUF_LOCK(&so->so_snd); + if (!sbused(&so->so_snd)) + sowwakeup_locked(so); + else + SOCKBUF_UNLOCK(&so->so_snd); + free(sfio, M_TEMP); + goto done; + } + fixspace(npages, sfio->npages, off, &space); + npages = sfio->npages; + } /* * Loop and construct maximum sized mbuf chain to be bulk @@ -3180,7 +3224,8 @@ retry_space: mh = NULL; } - if (error) { + if (m == NULL) { + KASSERT(error, ("%s: no mbuf and no error", __func__)); free(sfio, M_TEMP); goto done; }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201403072253.s27MrTEM093095>