Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 02 Feb 2012 19:50:01 +0400
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        =?windows-1251?Q?=CA=EE=ED=FC=EA=EE=E2_=C5=E2=E3=E5=ED=E8=E9?= <kes-kes@yandex.ru>
Cc:        freebsd-net@freebsd.org, Andrey Zonov <andrey@zonov.org>
Subject:   Re: netisr defered - active only one thread
Message-ID:  <4F2AB0A9.70905@FreeBSD.org>
In-Reply-To: <1446971288.20120202105912@yandex.ru>
References:  <4F29A464.3080302@zonov.org> <4F29E2C8.5000909@FreeBSD.org> <4F2A2EAB.3010700@zonov.org> <1446971288.20120202105912@yandex.ru>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------030901040500010706090507
Content-Type: text/plain; charset=windows-1251; format=flowed
Content-Transfer-Encoding: 8bit

On 02.02.2012 12:59, Коньков Евгений wrote:
> Здравствуйте, Andrey.
>
> Вы писали 2 февраля 2012 г., 8:35:23:
>
> AZ>  On 02.02.2012 5:11, Alexander V. Chernikov wrote:
>>> On 01.02.2012 20:45, Andrey Zonov wrote:
>>>> Hi,
>>>>
>>>> I'm trying to tune machine with 8.2-STABLE for heavy network load and
>>>> now playing with netisr. Could anyone explain me why actually works only
>>>> one netisr thread if I set them to 8?
>>>
>>> Can you please supply `nestat -Q` output and clarify you usage pattern ?
>>> (I mean, this is router/web server/some kind of traffic receiver/etc..).
>>> For example, flow policy does not balance traffic from single flow
>>> between different CPUs.
>>>
>
> AZ>  This is a web server with multiple nginx instances.  5k/sec accepted
> AZ>  connections.  Input packet rate is 35kpps, output - 25kpps.
>
> AZ>  I thought of changing policy for IP, but how can I do this (without
> AZ>  patching)?  Is it safe?
>
> AZ>  netstat -Q (I turned on direct&  direct force for now):
> AZ>  Configuration:
> AZ>  Setting                          Value      Maximum
> AZ>  Thread count                         8            8
> AZ>  Default queue limit                256        10240
> AZ>  Direct dispatch                enabled          n/a
> AZ>  Forced direct dispatch         enabled          n/a
> AZ>  Threads bound to CPUs          enabled          n/a
>
> AZ>  Protocols:
> AZ>  Name   Proto QLimit Policy Flags
> AZ>  ip         1   5000   flow   ---
> AZ>  igmp       2    256 source   ---
> AZ>  rtsock     3    256 source   ---
> AZ>  arp        7    256 source   ---
> AZ>  ip6       10    256   flow   ---
>
> AZ>  Workstreams:
> AZ>  WSID CPU   Name     Len WMark   Disp'd  HDisp'd   QDrops   Queued Handled
> AZ>      0   0  ip         0     0  1125716        0        0        0 1125716
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     1        0        0        0      102 102
> AZ>             arp        0     0       27        0        0        0 27
> AZ>             ip6        0     0        0        0        0        0
> AZ>      1   1  ip         0     0  1222701        0        0        0 1222701
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       46        0        0        0 46
> AZ>             ip6        0     0        0        0        0        0
> AZ>      2   2  ip         0     0  1184381        0        0        0 1184381
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       45        0        0        0 45
> AZ>             ip6        0     0        0        0        0        0
> AZ>      3   3  ip         0     0  1191094        0        0        0 1191094
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       54        0        0        0 54
> AZ>             ip6        0     0        0        0        0        0
> AZ>      4   4  ip         0     0   846165        0        0        0 846165
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       19        0        0        0 19
> AZ>             ip6        0     0        0        0        0        0
> AZ>      5   5  ip         0     0   849478        0        0        0 849478
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       27        0        0        0 27
> AZ>             ip6        0     0        0        0        0        0
> AZ>      6   6  ip         0     0   870836        0        0        0 870836
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     0       29        0        0        0 29
> AZ>             ip6        0     0        0        0        0        0
> AZ>      7   7  ip         0  5000   594320        5   910862  3453459 4047784
> AZ>             igmp       0     0        0        0        0        0
> AZ>             rtsock     0     0        0        0        0        0
> AZ>             arp        0     5       21        0        0      109 130
> AZ>             ip6        0     1        0        0        0        1
>
> same problem, it is because one netisr take 100% so other threads
> stops?? to work fine. or packet scheduler has disbalanced scheduler
> and still trying to schedule packet to netisr:7 despite on it is 100%
> busy.

Can you please try an attached patch?

Rebuild kernel with this patch and set net.isr.dispatch to deferred / hybrid

P.S. it is also reasonable to set net.isr.bindthreads to 1


>
>
>


-- 
WBR, Alexander

--------------030901040500010706090507
Content-Type: text/plain;
 name="netisr_ip_flowid.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="netisr_ip_flowid.diff"

Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c	(revision 230910)
+++ sys/netinet/ip_input.c	(working copy)
@@ -78,6 +78,11 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/sctp.h>
+#include <libkern/jenkins.h>
+
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
@@ -145,9 +150,13 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_inte
 
 VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
 
+static VNET_DEFINE(uint32_t, flow_hashjitter);
+#define	V_flow_hashjitter	VNET(flow_hashjitter)
+static struct mbuf * ip_hash_mbuf(struct mbuf *m, uintptr_t source);
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
+	.nh_m2flow = ip_hash_mbuf,
 	.nh_proto = NETISR_IP,
 	.nh_policy = NETISR_POLICY_FLOW,
 };
@@ -305,6 +314,9 @@ ip_init(void)
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
+	if (V_flow_hashjitter == 0)
+		V_flow_hashjitter = arc4random();
+
 	/* Initialize packet filter hooks. */
 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_inet_pfil_hook.ph_af = AF_INET;
@@ -390,6 +402,73 @@ ip_fini(void *xtp)
 	callout_stop(&ipport_tick_callout);
 }
 
+static struct mbuf *
+ip_hash_mbuf(struct mbuf *m, uintptr_t source)
+{
+	struct ip *ip;
+	uint8_t proto;
+	int iphlen, offset;
+	uint32_t key[3];
+	struct tcphdr *th;
+	struct udphdr *uh;
+	struct sctphdr *sh;
+	uint16_t sport = 0, dport = 0;
+	uint32_t flowid, pullup_len = 0;
+
+#define	M_CHECK(length)	do {					\
+	pullup_len += length;					\
+	if ((m)->m_pkthdr.len < (pullup_len))			\
+		return (m);					\
+	if ((m)->m_len < (pullup_len) &&			\
+	   (((m) = m_pullup((m),(pullup_len))) == NULL))	\
+		return NULL;					\
+} while (0)
+
+	M_CHECK(sizeof(struct ip));
+	ip = mtod(m, struct ip *);
+
+	proto = ip->ip_p;
+	iphlen = ip->ip_hl << 2; /* XXX options? */
+
+	key[0] = 0;
+	key[1] = ip->ip_src.s_addr;
+	key[2] = ip->ip_dst.s_addr;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		M_CHECK(sizeof(struct tcphdr));
+		th = (struct tcphdr *)((caddr_t)ip + iphlen);
+		sport = th->th_sport;
+		dport = th->th_dport;
+	break;
+	case IPPROTO_UDP:
+		M_CHECK(sizeof(struct udphdr));
+		uh = (struct udphdr *)((caddr_t)ip + iphlen);
+		sport = uh->uh_sport;
+		dport = uh->uh_dport;
+	break;
+	case IPPROTO_SCTP:
+		M_CHECK(sizeof(struct sctphdr));
+		sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+		sport = sh->src_port;
+		dport = sh->dest_port;
+	break;
+	}
+
+	if (sport > 0) {
+		((uint16_t *)key)[0] = sport;
+		((uint16_t *)key)[1] = dport;
+		offset = 0;
+	} else
+		offset = V_flow_hashjitter + proto;
+
+	flowid = jenkins_hashword(key, 3, offset);
+	m->m_flags |= M_FLOWID;
+	m->m_pkthdr.flowid = flowid;
+
+	return m;
+}
+
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.

--------------030901040500010706090507--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4F2AB0A9.70905>