From owner-svn-src-all@freebsd.org Fri Nov 3 14:10:58 2017 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id C8F6FE531F6; Fri, 3 Nov 2017 14:10:58 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 97B0D65E75; Fri, 3 Nov 2017 14:10:58 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vA3EAvxZ080226; Fri, 3 Nov 2017 14:10:57 GMT (envelope-from hselasky@FreeBSD.org) Received: (from hselasky@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vA3EAvPS080224; Fri, 3 Nov 2017 14:10:57 GMT (envelope-from hselasky@FreeBSD.org) Message-Id: <201711031410.vA3EAvPS080224@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hselasky set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky Date: Fri, 3 Nov 2017 14:10:57 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r325362 - head/sys/fs/cuse X-SVN-Group: head X-SVN-Commit-Author: hselasky X-SVN-Commit-Paths: head/sys/fs/cuse X-SVN-Commit-Revision: 325362 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 03 Nov 2017 14:10:58 -0000 Author: hselasky Date: Fri Nov 3 14:10:57 2017 New Revision: 325362 URL: https://svnweb.freebsd.org/changeset/base/325362 Log: Allow CUSE(3) to free all memory mapped memory by using regular SWAP objects instead of malloc(). The SWAP objects are automagically freed when there are no more consumers. This greatly simplifies the mmap logic inside CUSE(3) in the kernel. This change fixes an issue where mmapped memory can accumulate and never get freed, if many different mmap sizes are needed over time. Further this change fixes memory leaks when the CUSE(3) kernel module is unloaded. While at it make sure the CUSE_ALLOC_PAGES_MAX limit is treated as an exclusive limit. CUSE(3) memory maps must be less than CUSE_ALLOC_PAGES_MAX number of pages. Reviewed by: kib @ Differential Revision: https://reviews.freebsd.org/D11392 Sponsored by: Mellanox Technologies MFC after: 1 week Modified: head/sys/fs/cuse/cuse.c head/sys/fs/cuse/cuse_ioctl.h Modified: head/sys/fs/cuse/cuse.c ============================================================================== --- head/sys/fs/cuse/cuse.c Fri Nov 3 13:52:34 2017 (r325361) +++ head/sys/fs/cuse/cuse.c Fri Nov 3 14:10:57 2017 (r325362) @@ -1,6 +1,6 @@ /* $FreeBSD$ */ /*- - * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved. + * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,9 @@ #include #include +#include +#include +#include #include #include @@ -69,8 +73,6 @@ MODULE_VERSION(cuse, 1); */ MODULE_VERSION(cuse4bsd, 1); -#define NBUSY ((uint8_t *)1) - #ifdef FEATURE FEATURE(cuse, "Userspace character devices"); #endif @@ -94,10 +96,10 @@ struct cuse_client_command { }; struct cuse_memory { - struct cuse_server *owner; - uint8_t *virtaddr; + TAILQ_ENTRY(cuse_memory) entry; + vm_object_t object; uint32_t page_count; - uint32_t is_allocated; + uint32_t alloc_nr; }; struct cuse_server_dev { @@ -112,6 +114,7 @@ struct cuse_server { TAILQ_HEAD(, cuse_client_command) head; TAILQ_HEAD(, cuse_server_dev) hdev; TAILQ_HEAD(, cuse_client) hcli; + TAILQ_HEAD(, cuse_memory) hmem; struct cv cv; struct selinfo selinfo; pid_t pid; @@ -128,8 +131,8 @@ struct cuse_client { uint8_t ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4); - int fflags; /* file flags */ - int cflags; /* client flags */ + int fflags; /* file flags */ + int cflags; /* client flags */ #define CUSE_CLI_IS_CLOSING 0x01 #define CUSE_CLI_KNOTE_NEED_READ 0x02 #define CUSE_CLI_KNOTE_NEED_WRITE 0x04 @@ -140,14 +143,13 @@ struct cuse_client { #define CUSE_CLIENT_CLOSING(pcc) \ ((pcc)->cflags & CUSE_CLI_IS_CLOSING) -static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory"); +static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory"); static TAILQ_HEAD(, cuse_server) cuse_server_head; static struct mtx cuse_mtx; static struct cdev *cuse_dev; static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX]; static int cuse_alloc_unit_id[CUSE_DEVICES_MAX]; -static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX]; static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs); static void cuse_client_kqfilter_read_detach(struct knote *kn); @@ -173,7 +175,7 @@ static d_ioctl_t cuse_client_ioctl; static d_read_t cuse_client_read; static d_write_t cuse_client_write; static d_poll_t cuse_client_poll; -static d_mmap_t cuse_client_mmap; +static d_mmap_single_t cuse_client_mmap_single; static d_kqfilter_t cuse_client_kqfilter; static struct cdevsw cuse_client_devsw = { @@ -186,7 +188,7 @@ static struct cdevsw cuse_client_devsw = { .d_read = cuse_client_read, .d_write = cuse_client_write, .d_poll = cuse_client_poll, - .d_mmap = cuse_client_mmap, + .d_mmap_single = cuse_client_mmap_single, .d_kqfilter = cuse_client_kqfilter, }; @@ -196,7 +198,7 @@ static d_ioctl_t cuse_server_ioctl; static d_read_t cuse_server_read; static d_write_t cuse_server_write; static d_poll_t cuse_server_poll; -static d_mmap_t cuse_server_mmap; +static d_mmap_single_t cuse_server_mmap_single; static struct cdevsw cuse_server_devsw = { .d_version = D_VERSION, @@ -208,7 +210,7 @@ static struct cdevsw cuse_server_devsw = { .d_read = cuse_server_read, .d_write = cuse_server_write, .d_poll = cuse_server_poll, - .d_mmap = cuse_server_mmap, + .d_mmap_single = cuse_server_mmap_single, }; static void cuse_client_is_closing(struct cuse_client *); @@ -252,7 +254,6 @@ cuse_kern_init(void *arg) (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF, (CUSE_VERSION >> 0) & 0xFF); } - SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0); static void @@ -280,7 +281,6 @@ cuse_kern_uninit(void *arg) mtx_destroy(&cuse_mtx); } - SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0); static int @@ -398,74 +398,80 @@ cuse_convert_error(int error) } static void -cuse_server_free_memory(struct cuse_server *pcs) +cuse_vm_memory_free(struct cuse_memory *mem) { - struct cuse_memory *mem; - uint32_t n; + /* last user is gone - free */ + vm_object_deallocate(mem->object); - for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) { - mem = &cuse_mem[n]; - - /* this memory is never freed */ - if (mem->owner == pcs) { - mem->owner = NULL; - mem->is_allocated = 0; - } - } + /* free CUSE memory */ + free(mem, M_CUSE); } static int -cuse_server_alloc_memory(struct cuse_server *pcs, - struct cuse_memory *mem, uint32_t page_count) +cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr, + uint32_t page_count) { - void *ptr; + struct cuse_memory *temp; + struct cuse_memory *mem; + vm_object_t object; int error; - cuse_lock(); + mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO); + if (mem == NULL) + return (ENOMEM); - if (mem->virtaddr == NBUSY) { - cuse_unlock(); - return (EBUSY); + object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count, + VM_PROT_DEFAULT, 0, curthread->td_ucred); + if (object == NULL) { + error = ENOMEM; + goto error_0; } - if (mem->virtaddr != NULL) { - if (mem->is_allocated != 0) { - cuse_unlock(); - return (EBUSY); - } - if (mem->page_count == page_count) { - mem->is_allocated = 1; - mem->owner = pcs; - cuse_unlock(); - return (0); - } + + cuse_lock(); + /* check if allocation number already exists */ + TAILQ_FOREACH(temp, &pcs->hmem, entry) { + if (temp->alloc_nr == alloc_nr) + break; + } + if (temp != NULL) { cuse_unlock(); - return (EBUSY); + error = EBUSY; + goto error_1; } - memset(mem, 0, sizeof(*mem)); + mem->object = object; + mem->page_count = page_count; + mem->alloc_nr = alloc_nr; + TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry); + cuse_unlock(); - mem->virtaddr = NBUSY; + return (0); - cuse_unlock(); +error_1: + vm_object_deallocate(object); +error_0: + free(mem, M_CUSE); + return (error); +} - ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO); - if (ptr == NULL) - error = ENOMEM; - else - error = 0; +static int +cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr) +{ + struct cuse_memory *mem; cuse_lock(); - - if (error) { - mem->virtaddr = NULL; + TAILQ_FOREACH(mem, &pcs->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; + } + if (mem == NULL) { cuse_unlock(); - return (error); + return (EINVAL); } - mem->virtaddr = ptr; - mem->page_count = page_count; - mem->is_allocated = 1; - mem->owner = pcs; + TAILQ_REMOVE(&pcs->hmem, mem, entry); cuse_unlock(); + cuse_vm_memory_free(mem); + return (0); } @@ -646,10 +652,10 @@ cuse_server_free_dev(struct cuse_server_dev *pcsd) } static void -cuse_server_free(void *arg) +cuse_server_unref(struct cuse_server *pcs) { - struct cuse_server *pcs = arg; struct cuse_server_dev *pcsd; + struct cuse_memory *mem; cuse_lock(); pcs->refs--; @@ -672,7 +678,12 @@ cuse_server_free(void *arg) cuse_lock(); } - cuse_server_free_memory(pcs); + while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) { + TAILQ_REMOVE(&pcs->hmem, mem, entry); + cuse_unlock(); + cuse_vm_memory_free(mem); + cuse_lock(); + } knlist_clear(&pcs->selinfo.si_note, 1); knlist_destroy(&pcs->selinfo.si_note); @@ -686,6 +697,15 @@ cuse_server_free(void *arg) free(pcs, M_CUSE); } +static void +cuse_server_free(void *arg) +{ + struct cuse_server *pcs = arg; + + /* drop refcount */ + cuse_server_unref(pcs); +} + static int cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td) { @@ -700,13 +720,13 @@ cuse_server_open(struct cdev *dev, int fflags, int dev free(pcs, M_CUSE); return (ENOMEM); } - /* store current process ID */ pcs->pid = curproc->p_pid; TAILQ_INIT(&pcs->head); TAILQ_INIT(&pcs->hdev); TAILQ_INIT(&pcs->hcli); + TAILQ_INIT(&pcs->hmem); cv_init(&pcs->cv, "cuse-server-cv"); @@ -1093,12 +1113,12 @@ cuse_server_ioctl(struct cdev *dev, unsigned long cmd, error = ENOMEM; break; } - if (pai->page_count > CUSE_ALLOC_PAGES_MAX) { + if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) { error = ENOMEM; break; } error = cuse_server_alloc_memory(pcs, - &cuse_mem[pai->alloc_nr], pai->page_count); + pai->alloc_nr, pai->page_count); break; case CUSE_IOCTL_FREE_MEMORY: @@ -1108,16 +1128,7 @@ cuse_server_ioctl(struct cdev *dev, unsigned long cmd, error = ENOMEM; break; } - /* we trust the character device driver in this case */ - - cuse_lock(); - if (cuse_mem[pai->alloc_nr].owner == pcs) { - cuse_mem[pai->alloc_nr].is_allocated = 0; - cuse_mem[pai->alloc_nr].owner = NULL; - } else { - error = EINVAL; - } - cuse_unlock(); + error = cuse_server_free_memory(pcs, pai->alloc_nr); break; case CUSE_IOCTL_GET_SIG: @@ -1276,49 +1287,49 @@ cuse_server_poll(struct cdev *dev, int events, struct } static int -cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) +cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset, + vm_size_t size, struct vm_object **object, int nprot) { - uint32_t page_nr = offset / PAGE_SIZE; + uint32_t page_nr = *offset / PAGE_SIZE; uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX; struct cuse_memory *mem; struct cuse_server *pcs; - uint8_t *ptr; int error; - if (alloc_nr >= CUSE_ALLOC_UNIT_MAX) - return (ENOMEM); - error = cuse_server_get(&pcs); if (error != 0) - pcs = NULL; + return (error); cuse_lock(); - mem = &cuse_mem[alloc_nr]; - - /* try to enforce slight ownership */ - if ((pcs != NULL) && (mem->owner != pcs)) { - cuse_unlock(); - return (EINVAL); + /* lookup memory structure */ + TAILQ_FOREACH(mem, &pcs->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; } - if (mem->virtaddr == NULL) { + if (mem == NULL) { cuse_unlock(); return (ENOMEM); } - if (mem->virtaddr == NBUSY) { - cuse_unlock(); - return (ENOMEM); - } + /* verify page offset */ page_nr %= CUSE_ALLOC_PAGES_MAX; - if (page_nr >= mem->page_count) { cuse_unlock(); return (ENXIO); } - ptr = mem->virtaddr + (page_nr * PAGE_SIZE); + /* verify mmap size */ + if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) || + (size > ((mem->page_count - page_nr) * PAGE_SIZE))) { + cuse_unlock(); + return (EINVAL); + } + vm_object_reference(mem->object); + *object = mem->object; cuse_unlock(); - *paddr = vtophys(ptr); + /* set new VM object offset to use */ + *offset = page_nr * PAGE_SIZE; + /* success */ return (0); } @@ -1351,7 +1362,7 @@ cuse_client_free(void *arg) free(pcc, M_CUSE); /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); } static int @@ -1394,13 +1405,13 @@ cuse_client_open(struct cdev *dev, int fflags, int dev pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO); if (pcc == NULL) { /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); return (ENOMEM); } if (devfs_set_cdevpriv(pcc, &cuse_client_free)) { printf("Cuse: Cannot set cdevpriv.\n"); /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); free(pcc, M_CUSE); return (ENOMEM); } @@ -1550,7 +1561,6 @@ cuse_client_read(struct cdev *dev, struct uio *uio, in error = ENOMEM; break; } - len = uio->uio_iov->iov_len; cuse_lock(); @@ -1610,7 +1620,6 @@ cuse_client_write(struct cdev *dev, struct uio *uio, i error = ENOMEM; break; } - len = uio->uio_iov->iov_len; cuse_lock(); @@ -1753,59 +1762,56 @@ cuse_client_poll(struct cdev *dev, int events, struct } return (revents); - pollnval: +pollnval: /* XXX many clients don't understand POLLNVAL */ return (events & (POLLHUP | POLLPRI | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)); } static int -cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) +cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset, + vm_size_t size, struct vm_object **object, int nprot) { - uint32_t page_nr = offset / PAGE_SIZE; + uint32_t page_nr = *offset / PAGE_SIZE; uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX; struct cuse_memory *mem; - struct cuse_server *pcs; struct cuse_client *pcc; - uint8_t *ptr; int error; - if (alloc_nr >= CUSE_ALLOC_UNIT_MAX) - return (ENOMEM); - error = cuse_client_get(&pcc); if (error != 0) - pcs = NULL; - else - pcs = pcc->server; + return (error); cuse_lock(); - mem = &cuse_mem[alloc_nr]; - - /* try to enforce slight ownership */ - if ((pcs != NULL) && (mem->owner != pcs)) { - cuse_unlock(); - return (EINVAL); + /* lookup memory structure */ + TAILQ_FOREACH(mem, &pcc->server->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; } - if (mem->virtaddr == NULL) { + if (mem == NULL) { cuse_unlock(); return (ENOMEM); } - if (mem->virtaddr == NBUSY) { - cuse_unlock(); - return (ENOMEM); - } + /* verify page offset */ page_nr %= CUSE_ALLOC_PAGES_MAX; - if (page_nr >= mem->page_count) { cuse_unlock(); return (ENXIO); } - ptr = mem->virtaddr + (page_nr * PAGE_SIZE); + /* verify mmap size */ + if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) || + (size > ((mem->page_count - page_nr) * PAGE_SIZE))) { + cuse_unlock(); + return (EINVAL); + } + vm_object_reference(mem->object); + *object = mem->object; cuse_unlock(); - *paddr = vtophys(ptr); + /* set new VM object offset to use */ + *offset = page_nr * PAGE_SIZE; + /* success */ return (0); } Modified: head/sys/fs/cuse/cuse_ioctl.h ============================================================================== --- head/sys/fs/cuse/cuse_ioctl.h Fri Nov 3 13:52:34 2017 (r325361) +++ head/sys/fs/cuse/cuse_ioctl.h Fri Nov 3 14:10:57 2017 (r325362) @@ -35,6 +35,7 @@ #define CUSE_BUF_MIN_PTR 0x10000UL #define CUSE_BUF_MAX_PTR 0x20000UL #define CUSE_ALLOC_UNIT_MAX 128 /* units */ +/* All memory allocations must be less than the following limit */ #define CUSE_ALLOC_PAGES_MAX (((16UL * 1024UL * 1024UL) + PAGE_SIZE - 1) / PAGE_SIZE) struct cuse_dev;