Date: Mon, 13 Jun 2011 22:55:03 GMT From: Takuya ASADA <syuu@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 194710 for review Message-ID: <201106132255.p5DMt3Lw061762@skunkworks.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@194710?ac=10 Change 194710 by syuu@x200 on 2011/06/13 22:54:53 Reverted changes for if_tap. RSS ported from rwatson/tcp branch, SOFTRSS implemented(Refactored version of RPS, part of Kazuya GODA's GSoC project). Implemented bpf multiqueue emulation on SOFTRSS. Affected files ... .. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/RSS#1 add .. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/SOFTRSS#1 add .. //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 edit .. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.h#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr_internal.h#2 edit .. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.c#1 add .. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.h#1 add .. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.c#1 add .. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.h#1 add .. //depot/projects/soc2011/mq_bpf/tests/packet_trace.d#1 add .. //depot/projects/soc2011/mq_bpf/tests/queue_affinity.c#2 edit Differences ... ==== //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 (text+ko) ==== @@ -2713,6 +2713,7 @@ netinet/in_proto.c optional inet | inet6 \ compile-with "${NORMAL_C} -I$S/contrib/pf" netinet/in_rmx.c optional inet +netinet/in_rss.c optional inet rss | inet6 rss | inet softrss | inet6 softrss netinet/ip_divert.c optional inet ipdivert ipfirewall netinet/ipfw/dn_heap.c optional inet dummynet netinet/ipfw/dn_sched_fifo.c optional inet dummynet @@ -2772,6 +2773,7 @@ netinet/tcp_timewait.c optional inet | inet6 netinet/tcp_usrreq.c optional inet | inet6 netinet/udp_usrreq.c optional inet | inet6 +netinet/toeplitz.c optional inet rss | inet6 rss | inet softrss | inet6 softrss netinet/libalias/alias.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_db.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_mod.c optional libalias | netgraph_nat ==== //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 (text+ko) ==== @@ -421,6 +421,8 @@ NFSLOCKD RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h +RSS opt_rss.h +SOFTRSS opt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCP_OFFLOAD_DISABLE opt_inet.h #Disable code to dispatch tcp offloading @@ -895,4 +897,3 @@ # that "lies" about the amount of RAM it has. Until a cleaner method is # defined, this option will suffice in overriding what Redboot says. AR71XX_REALMEM opt_global.h - ==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 (text+ko) ==== ==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 (text+ko) ==== @@ -2779,6 +2779,7 @@ ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_MULTIQUEUE; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 (text+ko) ==== @@ -40,6 +40,8 @@ #include "opt_bpf.h" #include "opt_compat.h" #include "opt_netgraph.h" +#include "opt_kdtrace.h" +#include "opt_rss.h" #include <sys/types.h> #include <sys/param.h> @@ -65,7 +67,6 @@ #include <sys/proc.h> #include <sys/socket.h> -#include <sys/syslog.h> #include <net/if.h> #include <net/bpf.h> @@ -76,11 +77,14 @@ #include <net/bpf_zerocopy.h> #include <net/bpfdesc.h> #include <net/vnet.h> - +#ifdef SOFTRSS +#include <net/netisr.h> +#endif #include <netinet/in.h> #include <netinet/if_ether.h> #include <sys/kernel.h> #include <sys/sysctl.h> +#include <sys/sdt.h> #include <net80211/ieee80211_freebsd.h> @@ -199,6 +203,29 @@ .f_event = filt_bpfread, }; +SDT_PROVIDER_DECLARE(bpf); +SDT_PROVIDER_DEFINE(bpf); +SDT_PROBE_DEFINE2(bpf, functions, bpf_tap, entry, entry, "void*", "boolean_t"); +SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_rx, entry, entry, "void *", "uint32_t", "uint32_t"); +SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_tx, entry, entry, "void *", "uint32_t", "uint32_t"); +SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_rx, entry, entry, "void *", "uint32_t", "uint32_t"); +SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_tx, entry, entry, "void *", "uint32_t", "uint32_t"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocrxqlen, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioctxqlen, entry, entry, "int"); +SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, entry, "int", "int"); +SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, entry, "int", "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocenaqmask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocdisqmask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstrxqmask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, entry, "int"); +SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, entry, "int", "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocsttxqmask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, entry, "int"); +SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgttxqmask, entry, entry, "int", "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstothermask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrothermask, entry, entry, "int"); +SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocgtothermask, entry, entry, "int"); + /* * Wrapper functions for various buffering methods. If the set of buffer * modes expands, we will probably want to introduce a switch data structure @@ -1519,318 +1546,388 @@ case BIOCRXQLEN: { - log(LOG_DEBUG, "BIOCRXQLEN\n"); - struct ifnet *const ifp = d->bd_bif->bif_ifp; + struct ifnet *ifp; + + if (d->bd_bif == NULL) { + /* + * No interface attached yet. + */ + error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, -1); + break; + } + ifp = d->bd_bif->bif_ifp; *(int *)addr = ifp->if_rxq_num; + SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, ifp->if_rxq_num); break; } case BIOCTXQLEN: { - log(LOG_DEBUG, "BIOCTXQLEN\n"); - struct ifnet *const ifp = d->bd_bif->bif_ifp; + struct ifnet *ifp; + + if (d->bd_bif == NULL) { + /* + * No interface attached yet. + */ + error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, -1); + break; + } + ifp = d->bd_bif->bif_ifp; *(int *)addr = ifp->if_txq_num; + SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, ifp->if_txq_num); break; } case BIOCRXQAFFINITY: { u_long index; - log(LOG_DEBUG, "BIOCRXQAFFINITY\n"); + struct ifnet *ifp; + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + ifp = d->bd_bif->bif_ifp; index = *(u_long *)addr; if (index > ifp->if_rxq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCRXQAFFINITY: index too large index:%lx rxq_num:%d\n", index, ifp->if_rxq_num); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1); break; } if (!ifp->if_rxq_affinity) { - log(LOG_DEBUG, "!ifp->if_rxq_affinity\n"); + log(LOG_ERR, "!ifp->if_rxq_affinity\n"); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1); break; } *(u_long *)addr = ifp->if_rxq_affinity[index]; - log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr); + SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, index, ifp->if_rxq_affinity[index]); break; } case BIOCTXQAFFINITY: { u_long index; - log(LOG_DEBUG, "BIOCTXQAFFINITY\n"); if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1); break; } struct ifnet *const ifp = d->bd_bif->bif_ifp; index = *(u_long *)addr; if (index > ifp->if_txq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCTXQAFFINITY: index too large index:%lx txq_num:%x\n", index, ifp->if_txq_num); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1); break; } if (!ifp->if_txq_affinity) { - log(LOG_DEBUG, "!ifp->if_txq_affinity\n"); + log(LOG_ERR, "!ifp->if_txq_affinity\n"); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1); break; } *(u_long *)addr = ifp->if_txq_affinity[index]; - log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr); + SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, index, ifp->if_txq_affinity[index]); break; } case BIOCENAQMASK: { - log(LOG_DEBUG, "BIOCENAQMASK\n"); + struct ifnet *ifp; + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1); break; } if (d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "d->bd_qmask.qm_enabled\n"); + error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1); + break; + } + ifp = d->bd_bif->bif_ifp; +#ifdef SOFTRSS + if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) { + ifp->if_rxq_num = netisr_get_cpucount(); + ifp->if_capabilities |= IFCAP_MULTIQUEUE; + ifp->if_capenable |= IFCAP_MULTIQUEUE; + } +#else + if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) { + log(LOG_ERR, "if doesn't support multiqueue"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; +#endif + log(LOG_DEBUG, "if_rxq_num:%d\n", ifp->if_rxq_num); + log(LOG_DEBUG, "if_txq_num:%d\n", ifp->if_txq_num); d->bd_qmask.qm_enabled = TRUE; - log(LOG_DEBUG, "ifp->if_rxq_num:%d\n", ifp->if_rxq_num); d->bd_qmask.qm_rxq_mask = malloc(ifp->if_rxq_num * sizeof(boolean_t), M_BPF, M_WAITOK | M_ZERO); - log(LOG_DEBUG, "ifp->if_txq_num:%d\n", ifp->if_txq_num); d->bd_qmask.qm_txq_mask = malloc(ifp->if_txq_num * sizeof(boolean_t), M_BPF, M_WAITOK | M_ZERO); d->bd_qmask.qm_other_mask = FALSE; + SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, ifp->if_rxq_num); break; } case BIOCDISQMASK: { - log(LOG_DEBUG, "BIOCDISQMASK\n"); if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1); break; } d->bd_qmask.qm_enabled = FALSE; free(d->bd_qmask.qm_rxq_mask, M_BPF); free(d->bd_qmask.qm_txq_mask, M_BPF); + SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, 0); break; } case BIOCSTRXQMASK: { + struct ifnet *ifp; int index; - log(LOG_DEBUG, "BIOCSTRXQMASK\n"); + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ - error = EINVAL; + error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_rxq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCSTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); d->bd_qmask.qm_rxq_mask[index] = TRUE; + SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, index); break; } case BIOCCRRXQMASK: { int index; - log(LOG_DEBUG, "BIOCCRRXQMASK\n"); + struct ifnet *ifp; + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_rxq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCCRRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); d->bd_qmask.qm_rxq_mask[index] = FALSE; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, index); break; } case BIOCGTRXQMASK: { int index; - log(LOG_DEBUG, "BIOCGTRXQMASK\n"); + struct ifnet *ifp; + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_rxq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCGTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); *(uint32_t *)addr = d->bd_qmask.qm_rxq_mask[index]; + SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, index, d->bd_qmask.qm_rxq_mask[index]); break; } case BIOCSTTXQMASK: { + struct ifnet *ifp; int index; - log(LOG_DEBUG, "BIOCSTTXQMASK\n"); + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_txq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCSTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); d->bd_qmask.qm_txq_mask[index] = TRUE; + SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, index); break; } case BIOCCRTXQMASK: { + struct ifnet *ifp; int index; - log(LOG_DEBUG, "BIOCCRTXQMASK\n"); + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_txq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCCRTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num); error = EINVAL; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); d->bd_qmask.qm_txq_mask[index] = FALSE; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, index); break; } case BIOCGTTXQMASK: { int index; - log(LOG_DEBUG, "BIOCGTTXQMASK\n"); + struct ifnet *ifp; + if (d->bd_bif == NULL) { - log(LOG_DEBUG, "d->bd_bif == NULL\n"); + log(LOG_ERR, "d->bd_bif == NULL\n"); /* * No interface attached yet. */ error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1); break; } if (!d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n"); + log(LOG_ERR, "!d->bd_qmask.qm_enabled\n"); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1); break; } - struct ifnet *const ifp = d->bd_bif->bif_ifp; + ifp = d->bd_bif->bif_ifp; index = *(uint32_t *)addr; if (index > ifp->if_txq_num) { - log(LOG_DEBUG, "index too large\n"); + log(LOG_ERR, "BIOCGTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num); error = EINVAL; + SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1); break; } - log(LOG_DEBUG, "index:%d\n", index); *(uint32_t *)addr = d->bd_qmask.qm_txq_mask[index]; + SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, index, d->bd_qmask.qm_txq_mask[index]); break; } case BIOCSTOTHERMASK: - log(LOG_DEBUG, "BIOSTOTHERMASK\n"); d->bd_qmask.qm_other_mask = TRUE; + SDT_PROBE1(bpf, functions, bpfioctl_biocstothermask, entry, 1); break; case BIOCCROTHERMASK: - log(LOG_DEBUG, "BIOCCROTHERMASK\n"); d->bd_qmask.qm_other_mask = FALSE; + SDT_PROBE1(bpf, functions, bpfioctl_bioccrothermask, entry, 0); break; case BIOCGTOTHERMASK: - log(LOG_DEBUG, "BIOCGTOTHERMASK\n"); - log(LOG_DEBUG, "mask:%d\n", d->bd_qmask.qm_other_mask); *(uint32_t *)addr = (uint32_t)d->bd_qmask.qm_other_mask; + SDT_PROBE1(bpf, functions, bpfioctl_biocgtothermask, entry, d->bd_qmask.qm_other_mask); break; } CURVNET_RESTORE(); @@ -2144,8 +2241,7 @@ BPFIF_LOCK(bp); LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "bpf_tap other_mask:%d\n", - d->bd_qmask.qm_other_mask); + SDT_PROBE2(bpf, functions, bpf_tap, entry, d, d->bd_qmask.qm_other_mask); if (!d->bd_qmask.qm_other_mask) continue; } @@ -2195,13 +2291,6 @@ u_int pktlen, slen; int gottime; -#if 0 - if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid)) - log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid)); -#endif - if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid)) - log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid)); - /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { m->m_flags &= ~M_PROMISC; @@ -2214,18 +2303,30 @@ BPFIF_LOCK(bp); LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (d->bd_qmask.qm_enabled) { -/* - log(LOG_DEBUG, "bpf_mtap rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n", - m->m_pkthdr.rxqid, m->m_pkthdr.txqid, - d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid], - d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]); -*/ - if (m->m_pkthdr.rxqid != (uint32_t)-1 && - !d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid]) - continue; - if (m->m_pkthdr.txqid != (uint32_t)-1 && - !d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]) - continue; + if (!(m->m_flags & M_FLOWID)) { + log(LOG_DEBUG, "m:%p ifp:%p !(m->flags & M_FLOWID)\n", + m, m->m_pkthdr.rcvif); + if (!d->bd_qmask.qm_other_mask) + continue; + } else { + if (m->m_pkthdr.rxqid != (uint32_t)-1) + KASSERT(m->m_pkthdr.rxqid < bp->bif_ifp->if_rxq_num, + ("rxqid is not vaild rxqid:%x rxq_num:%x", + m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num)); + if (m->m_pkthdr.txqid != (uint32_t)-1) + KASSERT(m->m_pkthdr.txqid < bp->bif_ifp->if_txq_num, + ("txqid is not vaild txqid:%x txq_num:%x", + m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num)); + + SDT_PROBE3(bpf, functions, bpf_mtap_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num); + SDT_PROBE3(bpf, functions, bpf_mtap_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num); + if (m->m_pkthdr.rxqid != (uint32_t)-1 && + !d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid]) + continue; + if (m->m_pkthdr.txqid != (uint32_t)-1 && + !d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]) + continue; + } } if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) continue; @@ -2267,13 +2368,6 @@ u_int pktlen, slen; int gottime; -#if 0 - if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid)) - log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid)); -#endif - if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid)) - log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid)); - /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { m->m_flags &= ~M_PROMISC; @@ -2295,10 +2389,8 @@ BPFIF_LOCK(bp); LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (d->bd_qmask.qm_enabled) { - log(LOG_DEBUG, "bpf_mtap2 rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n", - m->m_pkthdr.rxqid, m->m_pkthdr.txqid, - d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid], - d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]); + SDT_PROBE3(bpf, functions, bpf_mtap2_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num); + SDT_PROBE3(bpf, functions, bpf_mtap2_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num); if (m->m_pkthdr.rxqid != (uint32_t)-1 && !d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid]) ==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 (text+ko) ==== ==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 (text+ko) ==== ==== //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 (text+ko) ==== @@ -220,6 +220,7 @@ #define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ +#define IFCAP_MULTIQUEUE 0x100000 #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) ==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 (text+ko) ==== @@ -36,6 +36,7 @@ #include "opt_ipx.h" #include "opt_netgraph.h" #include "opt_mbuf_profiling.h" +#include "opt_rss.h" #include <sys/param.h> #include <sys/systm.h> @@ -69,6 +70,7 @@ #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/if_ether.h> +#include <netinet/in_rss.h> #include <netinet/ip_carp.h> #include <netinet/ip_var.h> #include <netinet/ip_fw.h> @@ -106,6 +108,9 @@ CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif +SYSCTL_DECL(_net_link); +SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); + /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); @@ -561,7 +566,7 @@ * mbuf chain m with the ethernet header at the front. */ static void -ether_input(struct ifnet *ifp, struct mbuf *m) +ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; @@ -754,6 +759,77 @@ CURVNET_RESTORE(); } +#if defined(RSS) || defined(SOFTRSS) +/* + * Ethernet input dispatch; by default, direct dispatch here regardless of + * global configuration. However, if RSS is enabled, hook up RSS affinity + * so that when deferred or hybrid dispatch is enabled, we can redistribute + * load based on RSS. + * + * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or + * not it had already done work distribution via multi-queue. Then we could + * direct dispatch in the event load balancing was already complete and + * handle the case of interfaces with different capabilities better. + * + * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions + * at multiple layers? + * + * XXXRW: For now, enable all this only if RSS is compiled in, although it + * works fine without RSS. Need to characterise the performance overhead + * of the detour through the netisr code in the event the result is always + * direct dispatch. + */ +static void +ether_nh_input(struct mbuf *m) +{ + + ether_input_internal(m->m_pkthdr.rcvif, m); +} + +static struct netisr_handler ether_nh = { + .nh_name = "ether", + .nh_handler = ether_nh_input, + .nh_proto = NETISR_ETHER, +#if defined(RSS) || defined(SOFTRSS) + .nh_policy = NETISR_POLICY_CPU, +#if defined(RSS) + .nh_dispatch = NETISR_DISPATCH_DIRECT, +#else + .nh_dispatch = NETISR_DISPATCH_HYBRID, +#endif + .nh_m2cpuid = rss_m2cpuid, +#else + .nh_policy = NETISR_POLICY_SOURCE, + .nh_dispatch = NETISR_DISPATCH_DIRECT, +#endif +}; + +static void +ether_init(__unused void *arg) +{ + + netisr_register(ðer_nh); +} +SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); +#endif /* RSS || SOFTRSS */ +#include <sys/syslog.h> +static void +ether_input(struct ifnet *ifp, struct mbuf *m) +{ + log(LOG_DEBUG, "%s ifp:%p m:%p\n", __func__, ifp, m); +#if defined(RSS) || defined(SOFTRSS) + /* + * We will rely on rcvif being set properly in the deferred context, + * so assert it is correct here. + */ + KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__)); + + netisr_dispatch(NETISR_ETHER, m); +#else + ether_input_internal(ifp, m); +#endif +} + /* * Upper layer processing for a received Ethernet packet. */ @@ -1008,8 +1084,6 @@ } #endif -SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if defined(INET) || defined(INET6) SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW, &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall"); ==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 (text+ko) ==== @@ -54,7 +54,6 @@ #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sysctl.h> -#include <sys/syslog.h> #include <sys/systm.h> #include <sys/ttycom.h> #include <sys/uio.h> @@ -870,10 +869,6 @@ } while (m == NULL); mtx_unlock(&tp->tap_mtx); - m->m_pkthdr.rxqid = (uint32_t)-1; - m->m_pkthdr.txqid = PCPU_GET(cpuid); - log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid); - /* feed packet to bpf */ BPF_MTAP(ifp, m); @@ -948,10 +943,6 @@ return (0); } - m->m_pkthdr.rxqid = PCPU_GET(cpuid); - m->m_pkthdr.txqid = (uint32_t)-1; - log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid); - /* Pass packet up to parent. */ (*ifp->if_input)(ifp, m); ifp->if_ipackets ++; /* ibytes are counted in parent */ @@ -1090,3 +1081,4 @@ knlist_remove(&tp->tap_rsel.si_note, kn, 0); } /* tapkqdetach */ + ==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 (text+ko) ==== ==== //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 (text+ko) ==== @@ -1,6 +1,6 @@ /*- * Copyright (c) 2007-2009 Robert N. M. Watson - * Copyright (c) 2010 Juniper Networks, Inc. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * This software was developed by Robert N. M. Watson under contract @@ -65,6 +65,7 @@ #include "opt_ddb.h" #include "opt_device_polling.h" +#include "opt_kdtrace.h" #include <sys/param.h> #include <sys/bus.h> @@ -82,6 +83,8 @@ #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/systm.h> +#include <sys/sdt.h> +#include <sys/syslog.h> #ifdef DDB #include <ddb/ddb.h> @@ -94,6 +97,31 @@ #include <net/netisr_internal.h> #include <net/vnet.h> +/* + * Locking strategy: three types of locks protect netisr processing: + * + * netisr configuration lock - serializes "rethreading" events, in which the + * number of worker threads is changed. + * + * netisr_rmlock - stabilizes the netisr system for network processing, + * almost always acquired as a read lock (except during configuration + * changes). + * + * nws_mtx - per-workstream lock that serializes access to queues. + */ + +/* + * netisr configuration lock: serialize rethread events, in which the thread + * count may be increased and decreased, to avoid interlacing of these + * events, which might expose incompletely started or stopped threads, etc. + * This is a sleep lock so that it can be held over ithread start/stop. + */ +static struct sx netisr_config_sx; +#define NETISR_CONFIG_LOCK_INIT() sx_init(&netisr_config_sx, \ + "netisr_config_sx") +#define NETISR_CONFIG_LOCK() sx_xlock(&netisr_config_sx) +#define NETISR_CONFIG_UNLOCK() sx_xunlock(&netisr_config_sx) + /*- * Synchronize use and modification of the registered netisr data structures; * acquire a read lock while modifying the set of registered protocols to @@ -114,51 +142,74 @@ * * XXXRW: rmlocks don't support assertions. */ +#define NETISR_RMLOCKING + +#ifdef NETISR_RMLOCKING static struct rmlock netisr_rmlock; #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ - RM_NOWITNESS) + RM_NOWITNESS | RM_RECURSE) #define NETISR_LOCK_ASSERT() #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) -/* #define NETISR_LOCKING */ +#else +#define NETISR_LOCK_INIT() +#define NETISR_LOCK_ASSERT() +#define NETISR_RLOCK(x) +#define NETISR_RUNLOCK(x) +#define NETISR_WLOCK() +#define NETISR_WUNLOCK() +#endif SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); /*- - * Three direct dispatch policies are supported: + * Three global direct dispatch policies are supported: * - * - Always defer: all work is scheduled for a netisr, regardless of context. - * (!direct) + * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of + * context (may be overriden by protocols). * - * - Hybrid: if the executing context allows direct dispatch, and we're - * running on the CPU the work would be done on, then direct dispatch if it - * wouldn't violate ordering constraints on the workstream. - * (direct && !direct_force) + * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, + * and we're running on the CPU the work would be performed on, then direct + * dispatch it if it wouldn't violate ordering constraints on the workstream. * - * - Always direct: if the executing context allows direct dispatch, always - * direct dispatch. (direct && direct_force) + * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, + * always direct dispatch. (The default.) * * Notice that changing the global policy could lead to short periods of * misordered processing, but this is considered acceptable as compared to - * the complexity of enforcing ordering during policy changes. + * the complexity of enforcing ordering during policy changes. Protocols can + * override the global policy (when they're not doing that, they select + * NETISR_DISPATCH_DEFAULT). + */ +#define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT +#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ +static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; +static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201106132255.p5DMt3Lw061762>