From owner-svn-src-head@FreeBSD.ORG Wed Apr 10 02:12:41 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.FreeBSD.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id 5907C412; Wed, 10 Apr 2013 02:12:41 +0000 (UTC) (envelope-from neel@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 3A79BB5F; Wed, 10 Apr 2013 02:12:41 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r3A2CeQh038559; Wed, 10 Apr 2013 02:12:40 GMT (envelope-from neel@svn.freebsd.org) Received: (from neel@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r3A2CdhV038552; Wed, 10 Apr 2013 02:12:39 GMT (envelope-from neel@svn.freebsd.org) Message-Id: <201304100212.r3A2CdhV038552@svn.freebsd.org> From: Neel Natu Date: Wed, 10 Apr 2013 02:12:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r249321 - head/usr.sbin/bhyve X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 10 Apr 2013 02:12:41 -0000 Author: neel Date: Wed Apr 10 02:12:39 2013 New Revision: 249321 URL: http://svnweb.freebsd.org/changeset/base/249321 Log: Improve PCI BAR emulation: - Respect the MEMEN and PORTEN bits in the command register - Allow the guest to reprogram the address decoded by the BAR Submitted by: Gopakumar T Obtained from: NetApp Modified: head/usr.sbin/bhyve/consport.c head/usr.sbin/bhyve/dbgport.c head/usr.sbin/bhyve/inout.c head/usr.sbin/bhyve/inout.h head/usr.sbin/bhyve/mem.c head/usr.sbin/bhyve/mem.h head/usr.sbin/bhyve/pci_emul.c Modified: head/usr.sbin/bhyve/consport.c ============================================================================== --- head/usr.sbin/bhyve/consport.c Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/consport.c Wed Apr 10 02:12:39 2013 (r249321) @@ -128,6 +128,7 @@ console_handler(struct vmctx *ctx, int v static struct inout_port consport = { "bvmcons", BVM_CONSOLE_PORT, + 1, IOPORT_F_INOUT, console_handler }; Modified: head/usr.sbin/bhyve/dbgport.c ============================================================================== --- head/usr.sbin/bhyve/dbgport.c Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/dbgport.c Wed Apr 10 02:12:39 2013 (r249321) @@ -105,6 +105,7 @@ again: static struct inout_port dbgport = { "bvmdbg", BVM_DBG_PORT, + 1, IOPORT_F_INOUT, dbg_handler }; Modified: head/usr.sbin/bhyve/inout.c ============================================================================== --- head/usr.sbin/bhyve/inout.c Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/inout.c Wed Apr 10 02:12:39 2013 (r249321) @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include "inout.h" @@ -41,6 +42,9 @@ SET_DECLARE(inout_port_set, struct inout #define MAX_IOPORTS (1 << 16) +#define VERIFY_IOPORT(port, size) \ + assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS) + static struct { const char *name; int flags; @@ -69,6 +73,23 @@ default_inout(struct vmctx *ctx, int vcp return (0); } +static void +register_default_iohandler(int start, int size) +{ + struct inout_port iop; + + VERIFY_IOPORT(start, size); + + bzero(&iop, sizeof(iop)); + iop.name = "default"; + iop.port = start; + iop.size = size; + iop.flags = IOPORT_F_INOUT; + iop.handler = default_inout; + + register_inout(&iop); +} + int emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict) @@ -113,17 +134,11 @@ void init_inout(void) { struct inout_port **iopp, *iop; - int i; /* * Set up the default handler for all ports */ - for (i = 0; i < MAX_IOPORTS; i++) { - inout_handlers[i].name = "default"; - inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT; - inout_handlers[i].handler = default_inout; - inout_handlers[i].arg = NULL; - } + register_default_iohandler(0, MAX_IOPORTS); /* * Overwrite with specified handlers @@ -141,11 +156,28 @@ init_inout(void) int register_inout(struct inout_port *iop) { - assert(iop->port < MAX_IOPORTS); - inout_handlers[iop->port].name = iop->name; - inout_handlers[iop->port].flags = iop->flags; - inout_handlers[iop->port].handler = iop->handler; - inout_handlers[iop->port].arg = iop->arg; + int i; + + VERIFY_IOPORT(iop->port, iop->size); + + for (i = iop->port; i < iop->port + iop->size; i++) { + inout_handlers[i].name = iop->name; + inout_handlers[i].flags = iop->flags; + inout_handlers[i].handler = iop->handler; + inout_handlers[i].arg = iop->arg; + } + + return (0); +} + +int +unregister_inout(struct inout_port *iop) +{ + + VERIFY_IOPORT(iop->port, iop->size); + assert(inout_handlers[iop->port].name == iop->name); + + register_default_iohandler(iop->port, iop->size); return (0); } Modified: head/usr.sbin/bhyve/inout.h ============================================================================== --- head/usr.sbin/bhyve/inout.h Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/inout.h Wed Apr 10 02:12:39 2013 (r249321) @@ -39,6 +39,7 @@ typedef int (*inout_func_t)(struct vmctx struct inout_port { const char *name; int port; + int size; int flags; inout_func_t handler; void *arg; @@ -51,6 +52,7 @@ struct inout_port { static struct inout_port __CONCAT(__inout_port, __LINE__) = { \ #name, \ (port), \ + 1, \ (flags), \ (handler), \ 0 \ @@ -61,7 +63,7 @@ void init_inout(void); int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict); int register_inout(struct inout_port *iop); - +int unregister_inout(struct inout_port *iop); void init_bvmcons(void); #endif /* _INOUT_H_ */ Modified: head/usr.sbin/bhyve/mem.c ============================================================================== --- head/usr.sbin/bhyve/mem.c Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/mem.c Wed Apr 10 02:12:39 2013 (r249321) @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "mem.h" @@ -71,6 +72,8 @@ RB_HEAD(mmio_rb_tree, mmio_rb_range) mmi */ static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; +static pthread_rwlock_t rwlock; + static int mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) { @@ -125,10 +128,12 @@ mmio_rb_dump(struct mmio_rb_tree *rbt) { struct mmio_rb_range *np; + pthread_rwlock_rdlock(&rwlock); RB_FOREACH(np, mmio_rb_tree, rbt) { printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, np->mr_param.name); } + pthread_rwlock_unlock(&rwlock); } #endif @@ -161,7 +166,8 @@ emulate_mem(struct vmctx *ctx, int vcpu, { struct mmio_rb_range *entry; int err; - + + pthread_rwlock_rdlock(&rwlock); /* * First check the per-vCPU cache */ @@ -173,10 +179,11 @@ emulate_mem(struct vmctx *ctx, int vcpu, entry = NULL; if (entry == NULL) { - if (!mmio_rb_lookup(&mmio_rb_root, paddr, &entry)) { + if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { /* Update the per-vCPU cache */ mmio_hint[vcpu] = entry; } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { + pthread_rwlock_unlock(&rwlock); return (ESRCH); } } @@ -184,25 +191,29 @@ emulate_mem(struct vmctx *ctx, int vcpu, assert(entry != NULL); err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, mem_read, mem_write, &entry->mr_param); + pthread_rwlock_unlock(&rwlock); + return (err); } static int register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) { - struct mmio_rb_range *mrp; + struct mmio_rb_range *entry, *mrp; int err; err = 0; mrp = malloc(sizeof(struct mmio_rb_range)); - + if (mrp != NULL) { mrp->mr_param = *memp; mrp->mr_base = memp->base; mrp->mr_end = memp->base + memp->size - 1; - - err = mmio_rb_add(rbt, mrp); + pthread_rwlock_wrlock(&rwlock); + if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) + err = mmio_rb_add(rbt, mrp); + pthread_rwlock_unlock(&rwlock); if (err) free(mrp); } else @@ -225,10 +236,40 @@ register_mem_fallback(struct mem_range * return (register_mem_int(&mmio_rb_fallback, memp)); } +int +unregister_mem(struct mem_range *memp) +{ + struct mem_range *mr; + struct mmio_rb_range *entry = NULL; + int err, i; + + pthread_rwlock_wrlock(&rwlock); + err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); + if (err == 0) { + mr = &entry->mr_param; + assert(mr->name == memp->name); + assert(mr->base == memp->base && mr->size == memp->size); + RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); + + /* flush Per-vCPU cache */ + for (i=0; i < VM_MAXCPU; i++) { + if (mmio_hint[i] == entry) + mmio_hint[i] = NULL; + } + } + pthread_rwlock_unlock(&rwlock); + + if (entry) + free(entry); + + return (err); +} + void init_mem(void) { RB_INIT(&mmio_rb_root); RB_INIT(&mmio_rb_fallback); + pthread_rwlock_init(&rwlock, NULL); } Modified: head/usr.sbin/bhyve/mem.h ============================================================================== --- head/usr.sbin/bhyve/mem.h Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/mem.h Wed Apr 10 02:12:39 2013 (r249321) @@ -54,5 +54,6 @@ int emulate_mem(struct vmctx *, int int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); +int unregister_mem(struct mem_range *memp); #endif /* _MEM_H_ */ Modified: head/usr.sbin/bhyve/pci_emul.c ============================================================================== --- head/usr.sbin/bhyve/pci_emul.c Wed Apr 10 00:35:08 2013 (r249320) +++ head/usr.sbin/bhyve/pci_emul.c Wed Apr 10 02:12:39 2013 (r249321) @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -353,20 +355,150 @@ pci_emul_alloc_bar(struct pci_devinst *p return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); } +/* + * Register (or unregister) the MMIO or I/O region associated with the BAR + * register 'idx' of an emulated pci device. + */ +static void +modify_bar_registration(struct pci_devinst *pi, int idx, int registration) +{ + int error; + struct inout_port iop; + struct mem_range mr; + + switch (pi->pi_bar[idx].type) { + case PCIBAR_IO: + bzero(&iop, sizeof(struct inout_port)); + iop.name = pi->pi_name; + iop.port = pi->pi_bar[idx].addr; + iop.size = pi->pi_bar[idx].size; + if (registration) { + iop.flags = IOPORT_F_INOUT; + iop.handler = pci_emul_io_handler; + iop.arg = pi; + error = register_inout(&iop); + } else + error = unregister_inout(&iop); + break; + case PCIBAR_MEM32: + case PCIBAR_MEM64: + bzero(&mr, sizeof(struct mem_range)); + mr.name = pi->pi_name; + mr.base = pi->pi_bar[idx].addr; + mr.size = pi->pi_bar[idx].size; + if (registration) { + mr.flags = MEM_F_RW; + mr.handler = pci_emul_mem_handler; + mr.arg1 = pi; + mr.arg2 = idx; + error = register_mem(&mr); + } else + error = unregister_mem(&mr); + break; + default: + error = EINVAL; + break; + } + assert(error == 0); +} + +static void +unregister_bar(struct pci_devinst *pi, int idx) +{ + + modify_bar_registration(pi, idx, 0); +} + +static void +register_bar(struct pci_devinst *pi, int idx) +{ + + modify_bar_registration(pi, idx, 1); +} + +/* Are we decoding i/o port accesses for the emulated pci device? */ +static int +porten(struct pci_devinst *pi) +{ + uint16_t cmd; + + cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); + + return (cmd & PCIM_CMD_PORTEN); +} + +/* Are we decoding memory accesses for the emulated pci device? */ +static int +memen(struct pci_devinst *pi) +{ + uint16_t cmd; + + cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); + + return (cmd & PCIM_CMD_MEMEN); +} + +/* + * Update the MMIO or I/O address that is decoded by the BAR register. + * + * If the pci device has enabled the address space decoding then intercept + * the address range decoded by the BAR register. + */ +static void +update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) +{ + int decode; + + if (pi->pi_bar[idx].type == PCIBAR_IO) + decode = porten(pi); + else + decode = memen(pi); + + if (decode) + unregister_bar(pi, idx); + + switch (type) { + case PCIBAR_IO: + case PCIBAR_MEM32: + pi->pi_bar[idx].addr = addr; + break; + case PCIBAR_MEM64: + pi->pi_bar[idx].addr &= ~0xffffffffUL; + pi->pi_bar[idx].addr |= addr; + break; + case PCIBAR_MEMHI64: + pi->pi_bar[idx].addr &= 0xffffffff; + pi->pi_bar[idx].addr |= addr; + break; + default: + assert(0); + } + + if (decode) + register_bar(pi, idx); +} + int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, enum pcibar_type type, uint64_t size) { - int i, error; + int error; uint64_t *baseptr, limit, addr, mask, lobits, bar; - struct inout_port iop; - struct mem_range memp; assert(idx >= 0 && idx <= PCI_BARMAX); if ((size & (size - 1)) != 0) size = 1UL << flsl(size); /* round up to a power of 2 */ + /* Enforce minimum BAR sizes required by the PCI standard */ + if (type == PCIBAR_IO) { + if (size < 4) + size = 4; + } else { + if (size < 16) + size = 16; + } + switch (type) { case PCIBAR_NONE: baseptr = NULL; @@ -443,30 +575,7 @@ pci_emul_alloc_pbar(struct pci_devinst * pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); } - /* add a handler to intercept accesses to the I/O bar */ - if (type == PCIBAR_IO) { - iop.name = pdi->pi_name; - iop.flags = IOPORT_F_INOUT; - iop.handler = pci_emul_io_handler; - iop.arg = pdi; - - for (i = 0; i < size; i++) { - iop.port = addr + i; - register_inout(&iop); - } - } else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) { - /* add memory bar intercept handler */ - memp.name = pdi->pi_name; - memp.flags = MEM_F_RW; - memp.base = addr; - memp.size = size; - memp.handler = pci_emul_mem_handler; - memp.arg1 = pdi; - memp.arg2 = idx; - - error = register_mem(&memp); - assert(error == 0); - } + register_bar(pdi, idx); return (0); } @@ -1101,6 +1210,62 @@ pci_emul_cfgaddr(struct vmctx *ctx, int } INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr); +static uint32_t +bits_changed(uint32_t old, uint32_t new, uint32_t mask) +{ + + return ((old ^ new) & mask); +} + +static void +pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) +{ + int i; + uint16_t old; + + /* + * The command register is at an offset of 4 bytes and thus the + * guest could write 1, 2 or 4 bytes starting at this offset. + */ + + old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ + CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ + new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ + + /* + * If the MMIO or I/O address space decoding has changed then + * register/unregister all BARs that decode that address space. + */ + for (i = 0; i < PCI_BARMAX; i++) { + switch (pi->pi_bar[i].type) { + case PCIBAR_NONE: + case PCIBAR_MEMHI64: + break; + case PCIBAR_IO: + /* I/O address space decoding changed? */ + if (bits_changed(old, new, PCIM_CMD_PORTEN)) { + if (porten(pi)) + register_bar(pi, i); + else + unregister_bar(pi, i); + } + break; + case PCIBAR_MEM32: + case PCIBAR_MEM64: + /* MMIO address space decoding changed? */ + if (bits_changed(old, new, PCIM_CMD_MEMEN)) { + if (memen(pi)) + register_bar(pi, i); + else + unregister_bar(pi, i); + } + break; + default: + assert(0); + } + } +} + static int pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) @@ -1108,7 +1273,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int struct pci_devinst *pi; struct pci_devemu *pe; int coff, idx, needcfg; - uint64_t mask, bar; + uint64_t addr, bar, mask; assert(bytes == 1 || bytes == 2 || bytes == 4); @@ -1175,33 +1340,48 @@ pci_emul_cfgdata(struct vmctx *ctx, int if (bytes != 4 || (coff & 0x3) != 0) return (0); idx = (coff - PCIR_BAR(0)) / 4; + mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { case PCIBAR_NONE: - bar = 0; + pi->pi_bar[idx].addr = bar = 0; break; case PCIBAR_IO: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_IO_BASE; - bar = (*eax & mask) | PCIM_BAR_IO_SPACE; + addr = *eax & mask; + addr &= 0xffff; + bar = addr | PCIM_BAR_IO_SPACE; + /* + * Register the new BAR value for interception + */ + if (addr != pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_IO); + } break; case PCIBAR_MEM32: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = *eax & mask; + addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; + if (addr != pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_MEM32); + } break; case PCIBAR_MEM64: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = *eax & mask; + addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; + if (addr != (uint32_t)pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_MEM64); + } break; case PCIBAR_MEMHI64: mask = ~(pi->pi_bar[idx - 1].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = ((uint64_t)*eax << 32) & mask; - bar = bar >> 32; + addr = ((uint64_t)*eax << 32) & mask; + bar = addr >> 32; + if (bar != pi->pi_bar[idx - 1].addr >> 32) { + update_bar_address(pi, addr, idx - 1, + PCIBAR_MEMHI64); + } break; default: assert(0); @@ -1210,6 +1390,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); + } else if (coff == PCIR_COMMAND) { + pci_emul_cmdwrite(pi, *eax, bytes); } else { CFGWRITE(pi, coff, *eax, bytes); }