Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Dec 2006 20:19:49 GMT
From:      Marko Zec <zec@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 111257 for review
Message-ID:  <200612072019.kB7KJnQK049555@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=111257

Change 111257 by zec@zec_tca51 on 2006/12/07 20:19:28

	Virtualize tcp_syncache.

Affected files ...

.. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_syncache.h#1 add
.. //depot/projects/vimage/src/sys/netinet/vinet.h#4 edit

Differences ...

==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 (text+ko) ====

@@ -387,6 +387,7 @@
 #undef TCP_MINPROTOHDR
 
 	tcp_timer_init();
+	syncache_init();
 	tcp_hc_init();
 
 #ifdef VIMAGE
@@ -394,7 +395,6 @@
 		return;
 #endif
 
-	syncache_init();
 	tcp_reass_init();
 	ISN_LOCK_INIT();
 	callout_init(&isn_callout, CALLOUT_MPSAFE);

==== //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 (text+ko) ====

@@ -79,6 +79,7 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -112,51 +113,6 @@
     &tcp_syncookiesonly, 0,
     "Use only TCP SYN cookies");
 
-#define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
-#define	SYNCOOKIE_LIFETIME	16	/* seconds */
-
-struct syncache {
-	TAILQ_ENTRY(syncache)	sc_hash;
-	struct		in_conninfo sc_inc;	/* addresses */
-	u_long		sc_rxttime;		/* retransmit time */
-	u_int16_t	sc_rxmits;		/* retransmit counter */
-
-	u_int32_t	sc_tsreflect;		/* timestamp to reflect */
-	u_int32_t	sc_ts;			/* our timestamp to send */
-	u_int32_t	sc_tsoff;		/* ts offset w/ syncookies */
-	u_int32_t	sc_flowlabel;		/* IPv6 flowlabel */
-	tcp_seq		sc_irs;			/* seq from peer */
-	tcp_seq		sc_iss;			/* our ISS */
-	struct		mbuf *sc_ipopts;	/* source route */
-
-	u_int16_t	sc_peer_mss;		/* peer's MSS */
-	u_int16_t	sc_wnd;			/* advertised window */
-	u_int8_t	sc_ip_ttl;		/* IPv4 TTL */
-	u_int8_t	sc_ip_tos;		/* IPv4 TOS */
-	u_int8_t	sc_requested_s_scale:4,
-			sc_requested_r_scale:4;
-	u_int8_t	sc_flags;
-#define SCF_NOOPT	0x01			/* no TCP options */
-#define SCF_WINSCALE	0x02			/* negotiated window scaling */
-#define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
-						/* MSS is implicit */
-#define SCF_UNREACH	0x10			/* icmp unreachable received */
-#define SCF_SIGNATURE	0x20			/* send MD5 digests */
-#define SCF_SACK	0x80			/* send SACK option */
-};
-
-struct syncache_head {
-	struct mtx	sch_mtx;
-	TAILQ_HEAD(sch_head, syncache)	sch_bucket;
-	struct callout	sch_timer;
-	int		sch_nextc;
-	u_int		sch_length;
-	u_int		sch_oddeven;
-	u_int32_t	sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
-	u_int32_t	sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
-	u_int		sch_reseed;		/* time_uptime, seconds */
-};
-
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
@@ -183,46 +139,42 @@
 #define TCP_SYNCACHE_HASHSIZE		512
 #define TCP_SYNCACHE_BUCKETLIMIT	30
 
-struct tcp_syncache {
-	struct	syncache_head *hashbase;
-	uma_zone_t zone;
-	u_int	hashsize;
-	u_int	hashmask;
-	u_int	bucket_limit;
-	u_int	cache_count;		/* XXX: unprotected */
-	u_int	cache_limit;
-	u_int	rexmt_limit;
-	u_int	hash_secret;
-};
+#ifndef VIMAGE
 static struct tcp_syncache tcp_syncache;
+#endif
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
 
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
-     &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+    bucketlimit, CTLFLAG_RDTUN,
+    tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
 
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
-     &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+    cachelimit, CTLFLAG_RDTUN,
+    tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
 
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
-     &tcp_syncache.cache_count, 0, "Current number of entries in syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+    count, CTLFLAG_RD,
+    tcp_syncache.cache_count, 0, "Current number of entries in syncache");
 
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
-     &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+    hashsize, CTLFLAG_RDTUN,
+    tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
 
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
-     &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+    rexmtlimit, CTLFLAG_RW,
+    tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
 
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define SYNCACHE_HASH(inc, mask)					\
-	((tcp_syncache.hash_secret ^					\
+	((V_tcp_syncache.hash_secret ^					\
 	  (inc)->inc_faddr.s_addr ^					\
 	  ((inc)->inc_faddr.s_addr >> 16) ^				\
 	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
 
 #define SYNCACHE_HASH6(inc, mask)					\
-	((tcp_syncache.hash_secret ^					\
+	((V_tcp_syncache.hash_secret ^					\
 	  (inc)->inc6_faddr.s6_addr32[0] ^				\
 	  (inc)->inc6_faddr.s6_addr32[3] ^				\
 	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
@@ -258,58 +210,66 @@
 static void
 syncache_free(struct syncache *sc)
 {
+	INIT_VNET_INET(curvnetb);
+
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
 
-	uma_zfree(tcp_syncache.zone, sc);
+	uma_zfree(V_tcp_syncache.zone, sc);
 }
 
 void
 syncache_init(void)
 {
+	INIT_VNET_INET(curvnetb);
 	int i;
 
-	tcp_syncache.cache_count = 0;
-	tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
-	tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
-	tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
-	tcp_syncache.hash_secret = arc4random();
+	V_tcp_syncache.cache_count = 0;
+	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
+	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
+	V_tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
-	    &tcp_syncache.hashsize);
+	    &V_tcp_syncache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
-	    &tcp_syncache.bucket_limit);
-	if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) {
+	    &V_tcp_syncache.bucket_limit);
+	if (!powerof2(V_tcp_syncache.hashsize) ||
+	    V_tcp_syncache.hashsize == 0) {
 		printf("WARNING: syncache hash size is not a power of 2.\n");
-		tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+		V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	}
-	tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
+	V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
 
 	/* Set limits. */
-	tcp_syncache.cache_limit =
-	    tcp_syncache.hashsize * tcp_syncache.bucket_limit;
+	V_tcp_syncache.cache_limit =
+	    V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
-	    &tcp_syncache.cache_limit);
+	    &V_tcp_syncache.cache_limit);
 
 	/* Allocate the hash table. */
-	MALLOC(tcp_syncache.hashbase, struct syncache_head *,
-	    tcp_syncache.hashsize * sizeof(struct syncache_head),
+	MALLOC(V_tcp_syncache.hashbase, struct syncache_head *,
+	    V_tcp_syncache.hashsize * sizeof(struct syncache_head),
 	    M_SYNCACHE, M_WAITOK | M_ZERO);
 
 	/* Initialize the hash buckets. */
-	for (i = 0; i < tcp_syncache.hashsize; i++) {
-		TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
-		mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
+	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
+#ifdef VIMAGE
+		V_tcp_syncache.hashbase[i].sch_vnetb = curvnetb;
+#endif
+		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
+		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
-		callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer,
-			 &tcp_syncache.hashbase[i].sch_mtx, 0);
-		tcp_syncache.hashbase[i].sch_length = 0;
+		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
+			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
+		V_tcp_syncache.hashbase[i].sch_length = 0;
 	}
 
 	/* Create the syncache entry zone. */
-	tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
+	/* XXX one zone for all vnets should do fine - revisit!!! */
+	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
+	uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
 }
 
 /*
@@ -319,7 +279,7 @@
 static void
 syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
-	INIT_VNET_INET(curvnetb);
+	INIT_VNET_INET(sch->sch_vnetb);
 	struct syncache *sc2;
 
 	SCH_LOCK(sch);
@@ -328,7 +288,7 @@
 	 * Make sure that we don't overflow the per-bucket limit.
 	 * If the bucket is full, toss the oldest element.
 	 */
-	if (sch->sch_length >= tcp_syncache.bucket_limit) {
+	if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
@@ -345,7 +305,7 @@
 
 	SCH_UNLOCK(sch);
 
-	tcp_syncache.cache_count++;
+	V_tcp_syncache.cache_count++;
 	V_tcpstat.tcps_sc_added++;
 }
 
@@ -356,6 +316,7 @@
 static void
 syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
+	INIT_VNET_INET(sch->sch_vnetb);
 
 	SCH_LOCK_ASSERT(sch);
 
@@ -363,7 +324,7 @@
 	sch->sch_length--;
 
 	syncache_free(sc);
-	tcp_syncache.cache_count--;
+	V_tcp_syncache.cache_count--;
 }
 
 /*
@@ -374,10 +335,10 @@
 static void
 syncache_timer(void *xsch)
 {
-	INIT_VNET_INET(curvnetb);	/* XXX this can't work !!! */
 	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
 	int tick = ticks;
+	INIT_VNET_INET(sch->sch_vnetb);
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
@@ -397,7 +358,7 @@
 			continue;
 		}
 
-		if (sc->sc_rxmits > tcp_syncache.rexmt_limit) {
+		if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
 			syncache_drop(sc, sch);
 			V_tcpstat.tcps_sc_stale++;
 			continue;
@@ -419,13 +380,14 @@
 struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
+	INIT_VNET_INET(curvnetb);
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 #ifdef INET6
 	if (inc->inc_isipv6) {
-		sch = &tcp_syncache.hashbase[
-		    SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
+		sch = &V_tcp_syncache.hashbase[
+		    SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)];
 		*schp = sch;
 
 		SCH_LOCK(sch);
@@ -438,8 +400,8 @@
 	} else
 #endif
 	{
-		sch = &tcp_syncache.hashbase[
-		    SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
+		sch = &V_tcp_syncache.hashbase[
+		    SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)];
 		*schp = sch;
 
 		SCH_LOCK(sch);
@@ -795,7 +757,7 @@
 		/* Pull out the entry to unlock the bucket row. */
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
-		tcp_syncache.cache_count--;
+		V_tcp_syncache.cache_count--;
 		SCH_UNLOCK(sch);
 	}
 
@@ -933,7 +895,7 @@
 		goto done;
 	}
 
-	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
+	sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
 		 * The zone allocator couldn't provide more entries.
@@ -943,7 +905,7 @@
 		V_tcpstat.tcps_sc_zonefail++;
 		sc = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		syncache_drop(sc, sch);
-		sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
+		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (tcp_syncookies) {
 				bzero(&scs, sizeof(scs));

==== //depot/projects/vimage/src/sys/netinet/vinet.h#4 (text+ko) ====

@@ -48,6 +48,7 @@
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hostcache.h>
+#include <netinet/tcp_syncache.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
@@ -68,6 +69,7 @@
 	struct	tcpstat _tcpstat;	/* tcp statistics */
 	TAILQ_HEAD(, tcptw) _twq_2msl;
 	struct	tcp_hostcache _tcp_hostcache;
+	struct	tcp_syncache _tcp_syncache;
 
 	struct	inpcbhead _udb;
 	struct	inpcbinfo _udbinfo;
@@ -113,6 +115,7 @@
 #define V_tcpstat		VNET_INET(tcpstat)
 #define V_twq_2msl		VNET_INET(twq_2msl)
 #define V_tcp_hostcache		VNET_INET(tcp_hostcache)
+#define V_tcp_syncache		VNET_INET(tcp_syncache)
 
 #define V_udb			VNET_INET(udb)
 #define V_udbinfo		VNET_INET(udbinfo)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200612072019.kB7KJnQK049555>