From owner-svn-soc-all@FreeBSD.ORG Mon Jun 22 20:21:10 2015 Return-Path: Delivered-To: svn-soc-all@nevdull.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id ABA49624 for ; Mon, 22 Jun 2015 20:21:10 +0000 (UTC) (envelope-from stefano@FreeBSD.org) Received: from socsvn.freebsd.org (socsvn.freebsd.org [IPv6:2001:1900:2254:206a::50:2]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 98E4831E for ; Mon, 22 Jun 2015 20:21:10 +0000 (UTC) (envelope-from stefano@FreeBSD.org) Received: from socsvn.freebsd.org ([127.0.1.124]) by socsvn.freebsd.org (8.14.9/8.14.9) with ESMTP id t5MKLAeX019055 for ; Mon, 22 Jun 2015 20:21:10 GMT (envelope-from stefano@FreeBSD.org) Received: (from www@localhost) by socsvn.freebsd.org (8.14.9/8.14.9/Submit) id t5MKL9Kd019038 for svn-soc-all@FreeBSD.org; Mon, 22 Jun 2015 20:21:09 GMT (envelope-from stefano@FreeBSD.org) Date: Mon, 22 Jun 2015 20:21:09 GMT Message-Id: <201506222021.t5MKL9Kd019038@socsvn.freebsd.org> X-Authentication-Warning: socsvn.freebsd.org: www set sender to stefano@FreeBSD.org using -f From: stefano@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r287458 - soc2015/stefano/ptnetmap/head/sys/net MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-soc-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for the entire Summer of Code repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 22 Jun 2015 20:21:10 -0000 Author: stefano Date: Mon Jun 22 20:21:09 2015 New Revision: 287458 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=287458 Log: [ptnetmap] add new netmap headers Modified: soc2015/stefano/ptnetmap/head/sys/net/netmap.h soc2015/stefano/ptnetmap/head/sys/net/netmap_user.h Modified: soc2015/stefano/ptnetmap/head/sys/net/netmap.h ============================================================================== --- soc2015/stefano/ptnetmap/head/sys/net/netmap.h Mon Jun 22 20:05:26 2015 (r287457) +++ soc2015/stefano/ptnetmap/head/sys/net/netmap.h Mon Jun 22 20:21:09 2015 (r287458) @@ -25,7 +25,7 @@ */ /* - * $FreeBSD$ + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ * * Definitions of constants and the structures used by the netmap * framework, for the part visible to both kernel and userspace. @@ -157,6 +157,11 @@ /* * must be set whenever buf_idx is changed (as it might be * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. */ #define NS_REPORT 0x0002 /* ask the hardware to report results */ @@ -491,6 +496,8 @@ #define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ #define NETMAP_BDG_NEWIF 6 /* create a virtual port */ #define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ #define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ @@ -513,7 +520,11 @@ /* monitor uses the NR_REG to select the rings to monitor */ #define NR_MONITOR_TX 0x100 #define NR_MONITOR_RX 0x200 - +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request netmap passthrough full support */ +#define NR_PASSTHROUGH_HOST 0x1000 /* * FreeBSD uses the size value embedded in the _IOWR to determine @@ -552,5 +563,4 @@ char nifr_name[IFNAMSIZ]; char data[NM_IFRDATA_LEN]; }; - #endif /* _NET_NETMAP_H_ */ Modified: soc2015/stefano/ptnetmap/head/sys/net/netmap_user.h ============================================================================== --- soc2015/stefano/ptnetmap/head/sys/net/netmap_user.h Mon Jun 22 20:05:26 2015 (r287457) +++ soc2015/stefano/ptnetmap/head/sys/net/netmap_user.h Mon Jun 22 20:21:09 2015 (r287458) @@ -284,6 +284,12 @@ * -NN bind individual NIC ring pair * {NN bind master side of pipe NN * }NN bind slave side of pipe NN + * a suffix starting with + and the following flags, + * in any order: + * x exclusive access + * z zero copy monitor + * t monitor tx side + * r monitor rx side * * req provides the initial values of nmreq before parsing ifname. * Remember that the ifname parsing will override the ring @@ -323,6 +329,13 @@ static int nm_close(struct nm_desc *); /* + * nm_mmap() do mmap or inherit from parent if the nr_arg2 + * (memory block) matches. + */ + +static int nm_mmap(struct nm_desc *, const struct nm_desc *); + +/* * nm_inject() is the same as pcap_inject() * nm_dispatch() is the same as pcap_dispatch() * nm_nextpkt() is the same as pcap_next() @@ -338,7 +351,8 @@ * An invalid netmap name will return errno = 0; * You can pass a pointer to a pre-filled nm_desc to add special * parameters. Flags is used as follows - * NM_OPEN_NO_MMAP use the memory from arg, only + * NM_OPEN_NO_MMAP XXX: avoid mmap + * use the memory from arg, only * if the nr_arg2 (memory block) matches. * NM_OPEN_ARG1 use req.nr_arg1 from arg * NM_OPEN_ARG2 use req.nr_arg2 from arg @@ -351,9 +365,12 @@ struct nm_desc *d = NULL; const struct nm_desc *parent = arg; u_int namelen; - uint32_t nr_ringid = 0, nr_flags; + uint32_t nr_ringid = 0, nr_flags, nr_reg; const char *port = NULL; - const char *errmsg = NULL; +#define MAXERRMSG 80 + char errmsg[MAXERRMSG] = ""; + enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state; + long num; if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { errno = 0; /* name not recognised, not an error */ @@ -362,60 +379,112 @@ if (ifname[0] == 'n') ifname += 7; /* scan for a separator */ - for (port = ifname; *port && !index("-*^{}", *port); port++) + for (port = ifname; *port && !index("-*^{}/", *port); port++) ; namelen = port - ifname; if (namelen >= sizeof(d->req.nr_name)) { - errmsg = "name too long"; + snprintf(errmsg, MAXERRMSG, "name too long"); goto fail; } - switch (*port) { - default: /* '\0', no suffix */ - nr_flags = NR_REG_ALL_NIC; - break; - case '-': /* one NIC */ - nr_flags = NR_REG_ONE_NIC; - nr_ringid = atoi(port + 1); - break; - case '*': /* NIC and SW, ignore port */ - nr_flags = NR_REG_NIC_SW; - if (port[1]) { - errmsg = "invalid port for nic+sw"; - goto fail; + p_state = P_START; + nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ + while (*port) { + switch (p_state) { + case P_START: + switch (*port) { + case '^': /* only SW ring */ + nr_flags = NR_REG_SW; + p_state = P_RNGSFXOK; + break; + case '*': /* NIC and SW */ + nr_flags = NR_REG_NIC_SW; + p_state = P_RNGSFXOK; + break; + case '-': /* one NIC ring pair */ + nr_flags = NR_REG_ONE_NIC; + p_state = P_GETNUM; + break; + case '{': /* pipe (master endpoint) */ + nr_flags = NR_REG_PIPE_MASTER; + p_state = P_GETNUM; + break; + case '}': /* pipe (slave endoint) */ + nr_flags = NR_REG_PIPE_SLAVE; + p_state = P_GETNUM; + break; + case '/': /* start of flags */ + p_state = P_FLAGS; + break; + default: + snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); + goto fail; + } + port++; + break; + case P_RNGSFXOK: + switch (*port) { + case '/': + p_state = P_FLAGS; + break; + default: + snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); + goto fail; + } + port++; + break; + case P_GETNUM: + num = strtol(port, (char **)&port, 10); + if (num < 0 || num >= NETMAP_RING_MASK) { + snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", + num, NETMAP_RING_MASK); + goto fail; + } + nr_ringid = num & NETMAP_RING_MASK; + p_state = P_RNGSFXOK; + break; + case P_FLAGS: + case P_FLAGSOK: + switch (*port) { + case 'x': + nr_flags |= NR_EXCLUSIVE; + break; + case 'z': + nr_flags |= NR_ZCOPY_MON; + break; + case 't': + nr_flags |= NR_MONITOR_TX; + break; + case 'r': + nr_flags |= NR_MONITOR_RX; + break; + default: + snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); + goto fail; + } + port++; + p_state = P_FLAGSOK; + break; } - break; - case '^': /* only sw ring */ - nr_flags = NR_REG_SW; - if (port[1]) { - errmsg = "invalid port for sw ring"; - goto fail; - } - break; - case '{': - nr_flags = NR_REG_PIPE_MASTER; - nr_ringid = atoi(port + 1); - break; - case '}': - nr_flags = NR_REG_PIPE_SLAVE; - nr_ringid = atoi(port + 1); - break; } - - if (nr_ringid >= NETMAP_RING_MASK) { - errmsg = "invalid ringid"; + if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { + snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); goto fail; } - + ND("flags: %s %s %s %s", + (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", + (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", + (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", + (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); d = (struct nm_desc *)calloc(1, sizeof(*d)); if (d == NULL) { - errmsg = "nm_desc alloc failure"; + snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); errno = ENOMEM; return NULL; } d->self = d; /* set this early so nm_close() works */ d->fd = open("/dev/netmap", O_RDWR); if (d->fd < 0) { - errmsg = "cannot open /dev/netmap"; + snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); goto fail; } @@ -426,7 +495,7 @@ /* these fields are overridden by ifname and flags processing */ d->req.nr_ringid |= nr_ringid; - d->req.nr_flags = nr_flags; + d->req.nr_flags |= nr_flags; memcpy(d->req.nr_name, ifname, namelen); d->req.nr_name[namelen] = '\0'; /* optionally import info from parent */ @@ -464,51 +533,31 @@ d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); if (ioctl(d->fd, NIOCREGIF, &d->req)) { - errmsg = "NIOCREGIF failed"; + snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); goto fail; } - if (IS_NETMAP_DESC(parent) && parent->mem && - parent->req.nr_arg2 == d->req.nr_arg2) { - /* do not mmap, inherit from parent */ - d->memsize = parent->memsize; - d->mem = parent->mem; - } else { - /* XXX TODO: check if memsize is too large (or there is overflow) */ - d->memsize = d->req.nr_memsize; - d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, - d->fd, 0); - if (d->mem == MAP_FAILED) { - errmsg = "mmap failed"; - goto fail; - } - d->done_mmap = 1; + if (!(new_flags & NM_OPEN_NO_MMAP) && nm_mmap(d, parent)) { + snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); + goto fail; } - { - struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); - struct netmap_ring *r = NETMAP_RXRING(nifp, ); - *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; - *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; - *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); - *(void **)(uintptr_t)&d->buf_end = - (char *)d->mem + d->memsize; - } + nr_reg = d->req.nr_flags & NR_REG_MASK; - if (d->req.nr_flags == NR_REG_SW) { /* host stack */ + if (nr_reg == NR_REG_SW) { /* host stack */ d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; - } else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */ + } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ d->first_tx_ring = 0; d->first_rx_ring = 0; d->last_tx_ring = d->req.nr_tx_rings - 1; d->last_rx_ring = d->req.nr_rx_rings - 1; - } else if (d->req.nr_flags == NR_REG_NIC_SW) { + } else if (nr_reg == NR_REG_NIC_SW) { d->first_tx_ring = 0; d->first_rx_ring = 0; d->last_tx_ring = d->req.nr_tx_rings; d->last_rx_ring = d->req.nr_rx_rings; - } else if (d->req.nr_flags == NR_REG_ONE_NIC) { + } else if (nr_reg == NR_REG_ONE_NIC) { /* XXX check validity */ d->first_tx_ring = d->last_tx_ring = d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; @@ -541,7 +590,7 @@ fail: nm_close(d); - if (errmsg) + if (errmsg[0]) D("%s %s", errmsg, ifname); if (errno == 0) errno = EINVAL; @@ -571,6 +620,44 @@ } +static int +nm_mmap(struct nm_desc *d, const struct nm_desc *parent) +{ + //XXX TODO: check if mmap is already done + + if (IS_NETMAP_DESC(parent) && parent->mem && + parent->req.nr_arg2 == d->req.nr_arg2) { + /* do not mmap, inherit from parent */ + D("do not mmap, inherit from parent"); + d->memsize = parent->memsize; + d->mem = parent->mem; + } else { + /* XXX TODO: check if memsize is too large (or there is overflow) */ + d->memsize = d->req.nr_memsize; + d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, + d->fd, 0); + if (d->mem == MAP_FAILED) { + goto fail; + } + d->done_mmap = 1; + } + { + struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); + struct netmap_ring *r = NETMAP_RXRING(nifp, ); + + *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; + *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; + *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); + *(void **)(uintptr_t)&d->buf_end = + (char *)d->mem + d->memsize; + } + + return 0; + +fail: + return EINVAL; +} + /* * Same prototype as pcap_inject(), only need to cast. */