Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Dec 2006 16:46:09 GMT
From:      Marko Zec <zec@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 111250 for review
Message-ID:  <200612071646.kB7Gk92U002414@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=111250

Change 111250 by zec@zec_tca51 on 2006/12/07 16:45:41

	Initial attempt at virtualizing tcp_hostcache.

Affected files ...

.. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.h#1 add
.. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 edit
.. //depot/projects/vimage/src/sys/netinet/vinet.h#3 edit

Differences ...

==== //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 (text+ko) ====

@@ -60,11 +60,6 @@
  * memory constrains.
  */
 
-/*
- * Many thanks to jlemon for basic structure of tcp_syncache which is being
- * followed here.
- */
-
 #include "opt_inet6.h"
 #include "opt_vimage.h"
 
@@ -98,57 +93,14 @@
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
+#include <netinet/tcp_hostcache.h>
 
 #include <vm/uma.h>
 
 
-TAILQ_HEAD(hc_qhead, hc_metrics);
-
-struct hc_head {
-	struct hc_qhead	hch_bucket;
-	u_int		hch_length;
-	struct mtx	hch_mtx;
-};
-
-struct hc_metrics {
-	/* housekeeping */
-	TAILQ_ENTRY(hc_metrics) rmx_q;
-	struct	hc_head *rmx_head; /* head of bucket tail queue */
-	struct	in_addr ip4;	/* IP address */
-	struct	in6_addr ip6;	/* IP6 address */
-	/* endpoint specific values for tcp */
-	u_long	rmx_mtu;	/* MTU for this path */
-	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
-	u_long	rmx_rtt;	/* estimated round trip time */
-	u_long	rmx_rttvar;	/* estimated rtt variance */
-	u_long	rmx_bandwidth;	/* estimated bandwidth */
-	u_long	rmx_cwnd;	/* congestion window */
-	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
-	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
-	/* tcp hostcache internal data */
-	int	rmx_expire;	/* lifetime for object */
-	u_long	rmx_hits;	/* number of hits */
-	u_long	rmx_updates;	/* number of updates */
-};
-
-/* Arbitrary values */
-#define TCP_HOSTCACHE_HASHSIZE		512
-#define TCP_HOSTCACHE_BUCKETLIMIT	30
-#define TCP_HOSTCACHE_EXPIRE		60*60	/* one hour */
-#define TCP_HOSTCACHE_PRUNE		5*60	/* every 5 minutes */
-
-struct tcp_hostcache {
-	struct	hc_head *hashbase;
-	uma_zone_t zone;
-	u_int	hashsize;
-	u_int	hashmask;
-	u_int	bucket_limit;
-	u_int	cache_count;
-	u_int	cache_limit;
-	int	expire;
-	int	purgeall;
-};
+#ifndef VIMAGE
 static struct tcp_hostcache tcp_hostcache;
+#endif
 
 static struct callout tcp_hc_callout;
 
@@ -157,25 +109,32 @@
 static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
 static void tcp_hc_purge(void *);
 
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
+    "TCP Host cache");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
-     &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, cachelimit,
+     CTLFLAG_RDTUN, tcp_hostcache.cache_limit, 0,
+     "Overall entry limit for hostcache");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
-     &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, hashsize,
+     CTLFLAG_RDTUN, tcp_hostcache.hashsize, 0,
+     "Size of TCP hostcache hashtable");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
-     &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
+     CTLFLAG_RDTUN, tcp_hostcache.bucket_limit, 0,
+     "Per-bucket hash limit for hostcache");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
-     &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, count,
+     CTLFLAG_RD, tcp_hostcache.cache_count, 0,
+     "Current number of entries in hostcache");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
-     &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, expire,
+     CTLFLAG_RW, tcp_hostcache.expire, 0,
+     "Expire time of TCP hostcache entries");
 
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
-     &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, purge,
+     CTLFLAG_RW, tcp_hostcache.purgeall, 0,
+     "Expire all entires on next purge run");
 
 SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
 	CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
@@ -186,7 +145,7 @@
 
 #define HOSTCACHE_HASH(ip) \
 	(((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) &	\
-	  tcp_hostcache.hashmask)
+	  V_tcp_hostcache.hashmask)
 
 /* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */
 #define HOSTCACHE_HASH6(ip6)				\
@@ -194,7 +153,7 @@
 	  (ip6)->s6_addr32[1] ^				\
 	  (ip6)->s6_addr32[2] ^				\
 	  (ip6)->s6_addr32[3]) &			\
-	 tcp_hostcache.hashmask)
+	 V_tcp_hostcache.hashmask)
 
 #define THC_LOCK(lp)		mtx_lock(lp)
 #define THC_UNLOCK(lp)		mtx_unlock(lp)
@@ -202,59 +161,64 @@
 void
 tcp_hc_init(void)
 {
+	INIT_VNET_INET(curvnetb);
 	int i;
 
 	/*
 	 * Initialize hostcache structures
 	 */
-	tcp_hostcache.cache_count = 0;
-	tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
-	tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
-	tcp_hostcache.cache_limit =
-	    tcp_hostcache.hashsize * tcp_hostcache.bucket_limit;
-	tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+	V_tcp_hostcache.cache_count = 0;
+	V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
+	V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
+	V_tcp_hostcache.cache_limit =
+	    V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit;
+	V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
 
 	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
-	    &tcp_hostcache.hashsize);
+	    &V_tcp_hostcache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit",
-	    &tcp_hostcache.cache_limit);
+	    &V_tcp_hostcache.cache_limit);
 	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit",
-	    &tcp_hostcache.bucket_limit);
-	if (!powerof2(tcp_hostcache.hashsize)) {
+	    &V_tcp_hostcache.bucket_limit);
+	if (!powerof2(V_tcp_hostcache.hashsize)) {
 		printf("WARNING: hostcache hash size is not a power of 2.\n");
-		tcp_hostcache.hashsize = 512;	/* safe default */
+		V_tcp_hostcache.hashsize = 512;	/* safe default */
 	}
-	tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1;
+	V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1;
 
 	/*
 	 * Allocate the hash table
 	 */
-	tcp_hostcache.hashbase = (struct hc_head *)
-	    malloc(tcp_hostcache.hashsize * sizeof(struct hc_head),
+	V_tcp_hostcache.hashbase = (struct hc_head *)
+	    malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head),
 		   M_HOSTCACHE, M_WAITOK | M_ZERO);
 
 	/*
 	 * Initialize the hash buckets
 	 */
-	for (i = 0; i < tcp_hostcache.hashsize; i++) {
-		TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket);
-		tcp_hostcache.hashbase[i].hch_length = 0;
-		mtx_init(&tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket);
+		V_tcp_hostcache.hashbase[i].hch_length = 0;
+		mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
 			  NULL, MTX_DEF);
 	}
 
 	/*
 	 * Allocate the hostcache entries.
+	 *
+	 * XXX don't need a separate zone for each hc instance - revisit!!!
 	 */
-	tcp_hostcache.zone = uma_zcreate("hostcache", sizeof(struct hc_metrics),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	uma_zone_set_max(tcp_hostcache.zone, tcp_hostcache.cache_limit);
+	V_tcp_hostcache.zone =
+	    uma_zcreate("hostcache", sizeof(struct hc_metrics),
+			NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit);
 
 	/*
 	 * Set up periodic cache cleanup.
 	 */
 	callout_init(&tcp_hc_callout, CALLOUT_MPSAFE);
-	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz,
+		      tcp_hc_purge, 0);
 }
 
 /*
@@ -266,6 +230,7 @@
 static struct hc_metrics *
 tcp_hc_lookup(struct in_conninfo *inc)
 {
+	INIT_VNET_INET(curvnetb);
 	int hash;
 	struct hc_head *hc_head;
 	struct hc_metrics *hc_entry;
@@ -280,7 +245,7 @@
 	else
 		hash = HOSTCACHE_HASH(&inc->inc_faddr);
 
-	hc_head = &tcp_hostcache.hashbase[hash];
+	hc_head = &V_tcp_hostcache.hashbase[hash];
 
 	/*
 	 * aquire lock for this bucket row
@@ -336,7 +301,7 @@
 	else
 		hash = HOSTCACHE_HASH(&inc->inc_faddr);
 
-	hc_head = &tcp_hostcache.hashbase[hash];
+	hc_head = &V_tcp_hostcache.hashbase[hash];
 
 	/*
 	 * aquire lock for this bucket row
@@ -348,8 +313,8 @@
 	/*
 	 * If the bucket limit is reached reuse the least used element
 	 */
-	if (hc_head->hch_length >= tcp_hostcache.bucket_limit ||
-	    tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) {
+	if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
+	    V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) {
 		hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
 		/*
 		 * At first we were dropping the last element, just to
@@ -359,17 +324,17 @@
 		 * be "lossy".
 		 */
 		TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
-		tcp_hostcache.hashbase[hash].hch_length--;
-		tcp_hostcache.cache_count--;
+		V_tcp_hostcache.hashbase[hash].hch_length--;
+		V_tcp_hostcache.cache_count--;
 		V_tcpstat.tcps_hc_bucketoverflow++;
 #if 0
-		uma_zfree(tcp_hostcache.zone, hc_entry);
+		uma_zfree(V_tcp_hostcache.zone, hc_entry);
 #endif
 	} else {
 		/*
 		 * Allocate a new entry, or balk if not possible
 		 */
-		hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT);
+		hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT);
 		if (hc_entry == NULL) {
 			THC_UNLOCK(&hc_head->hch_mtx);
 			return NULL;
@@ -385,14 +350,14 @@
 	else
 		hc_entry->ip4 = inc->inc_faddr;
 	hc_entry->rmx_head = hc_head;
-	hc_entry->rmx_expire = tcp_hostcache.expire;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire;
 
 	/*
 	 * Put it upfront
 	 */
 	TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
-	tcp_hostcache.hashbase[hash].hch_length++;
-	tcp_hostcache.cache_count++;
+	V_tcp_hostcache.hashbase[hash].hch_length++;
+	V_tcp_hostcache.cache_count++;
 	V_tcpstat.tcps_hc_added++;
 
 	return hc_entry;
@@ -406,6 +371,7 @@
 void
 tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
 {
+	INIT_VNET_INET(curvnetb);
 	struct hc_metrics *hc_entry;
 
 	/*
@@ -421,7 +387,7 @@
 		return;
 	}
 	hc_entry->rmx_hits++;
-	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
 
 	hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu;
 	hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
@@ -446,6 +412,7 @@
 u_long
 tcp_hc_getmtu(struct in_conninfo *inc)
 {
+	INIT_VNET_INET(curvnetb);
 	struct hc_metrics *hc_entry;
 	u_long mtu;
 
@@ -454,7 +421,7 @@
 		return 0;
 	}
 	hc_entry->rmx_hits++;
-	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
 
 	mtu = hc_entry->rmx_mtu;
 	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
@@ -468,6 +435,7 @@
 void
 tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu)
 {
+	INIT_VNET_INET(curvnetb);
 	struct hc_metrics *hc_entry;
 
 	/*
@@ -484,7 +452,7 @@
 			return;
 	}
 	hc_entry->rmx_updates++;
-	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
 
 	hc_entry->rmx_mtu = mtu;
 
@@ -517,7 +485,7 @@
 			return;
 	}
 	hc_entry->rmx_updates++;
-	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
 
 	if (hcml->rmx_rtt != 0) {
 		if (hc_entry->rmx_rtt == 0)
@@ -588,13 +556,14 @@
 static int
 sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
 {
+	INIT_VNET_INET(curvnetb);
 	int bufsize;
 	int linesize = 128;
 	char *p, *buf;
 	int len, i, error;
 	struct hc_metrics *hc_entry;
 
-	bufsize = linesize * (tcp_hostcache.cache_count + 1);
+	bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
 
 	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
 
@@ -604,9 +573,9 @@
 	p += len;
 
 #define msec(u) (((u) + 500) / 1000)
-	for (i = 0; i < tcp_hostcache.hashsize; i++) {
-		THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
-		TAILQ_FOREACH(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
 			      rmx_q) {
 			len = snprintf(p, linesize,
 			    "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
@@ -632,7 +601,7 @@
 			    hc_entry->rmx_expire);
 			p += len;
 		}
-		THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
 	}
 #undef msec
 	error = SYSCTL_OUT(req, buf, p - buf);
@@ -648,28 +617,32 @@
 tcp_hc_purge(void *arg)
 {
 	struct hc_metrics *hc_entry, *hc_next;
-	int all = (intptr_t)arg;
+	int all = 0;
 	int i;
 
-	if (tcp_hostcache.purgeall) {
+	VNETB_ITERLOOP_BEGIN()
+	INIT_VNET_INET(curvnetb);
+	if (V_tcp_hostcache.purgeall) {
 		all = 1;
-		tcp_hostcache.purgeall = 0;
+		V_tcp_hostcache.purgeall = 0;
 	}
 
-	for (i = 0; i < tcp_hostcache.hashsize; i++) {
-		THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
-		TAILQ_FOREACH_SAFE(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
-			      rmx_q, hc_next) {
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH_SAFE(hc_entry,
+				   &V_tcp_hostcache.hashbase[i].hch_bucket,
+				   rmx_q, hc_next) {
 			if (all || hc_entry->rmx_expire <= 0) {
-				TAILQ_REMOVE(&tcp_hostcache.hashbase[i].hch_bucket,
+				TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket,
 					      hc_entry, rmx_q);
-				uma_zfree(tcp_hostcache.zone, hc_entry);
-				tcp_hostcache.hashbase[i].hch_length--;
-				tcp_hostcache.cache_count--;
+				uma_zfree(V_tcp_hostcache.zone, hc_entry);
+				V_tcp_hostcache.hashbase[i].hch_length--;
+				V_tcp_hostcache.cache_count--;
 			} else
 				hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE;
 		}
-		THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
 	}
+	VNETB_ITERLOOP_END();
 	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
 }

==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 (text+ko) ====

@@ -387,6 +387,7 @@
 #undef TCP_MINPROTOHDR
 
 	tcp_timer_init();
+	tcp_hc_init();
 
 #ifdef VIMAGE
 	if (curvnetb != &vnetb_0)
@@ -394,7 +395,6 @@
 #endif
 
 	syncache_init();
-	tcp_hc_init();
 	tcp_reass_init();
 	ISN_LOCK_INIT();
 	callout_init(&isn_callout, CALLOUT_MPSAFE);

==== //depot/projects/vimage/src/sys/netinet/vinet.h#3 (text+ko) ====

@@ -47,6 +47,7 @@
 #include <netinet/icmp_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_hostcache.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
@@ -66,6 +67,7 @@
 	struct	inpcbinfo _tcbinfo;
 	struct	tcpstat _tcpstat;	/* tcp statistics */
 	TAILQ_HEAD(, tcptw) _twq_2msl;
+	struct	tcp_hostcache _tcp_hostcache;
 
 	struct	inpcbhead _udb;
 	struct	inpcbinfo _udbinfo;
@@ -110,6 +112,7 @@
 #define V_tcbinfo		VNET_INET(tcbinfo)
 #define V_tcpstat		VNET_INET(tcpstat)
 #define V_twq_2msl		VNET_INET(twq_2msl)
+#define V_tcp_hostcache		VNET_INET(tcp_hostcache)
 
 #define V_udb			VNET_INET(udb)
 #define V_udbinfo		VNET_INET(udbinfo)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200612071646.kB7Gk92U002414>