Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 16 Jan 2008 21:45:38 GMT
From:      Andre Oppermann <andre@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 133432 for review
Message-ID:  <200801162145.m0GLjctZ036755@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=133432

Change 133432 by andre@andre_flirtbox on 2008/01/16 21:45:36

	Handle all edge cases.
	
	Add SEQ_DELTA() function to ease segment trimming.

Affected files ...

.. //depot/projects/tcp_reass/netinet/tcp_reass.c#7 edit
.. //depot/projects/tcp_reass/netinet/tcp_seq.h#2 edit

Differences ...

==== //depot/projects/tcp_reass/netinet/tcp_reass.c#7 (text+ko) ====

@@ -100,23 +100,25 @@
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
-static int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
-    &tcp_reass_maxseg, 0,
+static int tcp_reass_maxblocks = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxblocks, CTLFLAG_RDTUN,
+    &tcp_reass_maxblocks, 0,
     "Global maximum number of TCP Segment Blocks in Reassembly Queue");
 
 int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, curblocks, CTLFLAG_RD,
     &tcp_reass_qsize, 0,
     "Global number of TCP Segment Blocks currently in Reassembly Queue");
 
+static void	tcp_reass_merge(struct tcpcb *, int *, struct trq *, struct trq *);
+
 /* Initialize TCP reassembly queue */
 static void
 tcp_reass_zone_change(void *tag)
 {
 
-	tcp_reass_maxseg = nmbclusters / 16;
-	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+	tcp_reass_maxblocks = nmbclusters / 16;
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
 }
 
 uma_zone_t	tcp_reass_zone;
@@ -126,12 +128,12 @@
 {
 
 	/* XXX: nmbclusters may be zero. */
-	tcp_reass_maxseg = nmbclusters / 16;
-	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
-	    &tcp_reass_maxseg);
+	tcp_reass_maxblocks = nmbclusters / 16;
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxblocks",
+	    &tcp_reass_maxblocks);
 	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct trq),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
 	EVENTHANDLER_REGISTER(nmbclusters_change,
 	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
 }
@@ -150,11 +152,21 @@
 	 * Call with th==NULL after becoming established to
 	 * force pre-ESTABLISHED data up to user socket.
 	 */
-	if (th == NULL)
+	if (th == NULL) {
+		if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
+		    TAILQ_EMPTY(&tp->t_trq) ||
+		    ((tqe = TAILQ_FIRST(&tp->t_trq)) &&
+		     tqe->trq_seq != tp->rcv_nxt))
+			return (0);
 		goto present;
+	}
 
-	KASSERT(SEQ_LEQ(tp->rcv_nxt, th->th_seq),
+	KASSERT(*tlenp > 0,
+	    ("%s: segment doesn't contain any data", __func__));
+	KASSERT(SEQ_GT(tp->rcv_nxt, th->th_seq),
 	    ("%s: sequence number below rcv_nxt", __func__));
+	KASSERT(!(tp->rcv_nxt == th->th_seq) || !(TAILQ_EMPTY(&tp->t_trq)),
+	    ("%s: got missing segment but queue is empty", __func__));
 
 	/*
 	 * Limit the number of segments in the reassembly queue to prevent
@@ -183,13 +195,22 @@
 		return (0);
 	}
 
+	/* Get rid of packet header and mtags. */
+	m_demote(m, 1);
+
+#if 0
+	/* Trim empty mbufs from head of chain. */
+	while (m->m_len == 0) {
+		n = m;
+		m = m->m_next;
+		m_free(n);
+	}
+#endif
+
 	/* NB: m_adj(m, -i) may free mbufs at the tail of a chain. */
 	for (mcnt = 0, n = m; n; n = n->m_next)
 		mcnt += (n->m_flags & M_EXT) ? n->m_ext.ext_size : MSIZE;
 
-	/* Get rid of packet header and mtags. */
-	m_demote(m, 1);
-
 	/* Check if this segment attaches to the end. */
 	tqe = TAILQ_LAST(&tp->t_trq, trq_head);
 	if (tqe && tqe->trq_seq + tqe->trq_len == th->th_seq) {
@@ -204,27 +225,33 @@
 		return (0);
 	}
 
+	/* Check if beyond last block. */
+	if (tqe && SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
+		goto insert;
+
+	/* Check if this is the first segment. */
+	if (TAILQ_EMPTY(&tp->t_trq))
+		goto insert;
+
 	/* Check if this is the missing segment. */
 	if (tp->rcv_nxt == th->th_seq) {
 		tqe = TAILQ_FIRST(&tp->t_trq);
-		KASSERT(tqe != NULL,
-		    ("%s: missing segment but nothing in queue", __func__));
-		KASSERT(SEQ_GT(tqe->trq_seq, th->th_seq),
+		KASSERT(SEQ_LEQ(tqe->trq_seq, th->th_seq),
 		    ("%s: first block starts below missing segment", __func__));
-		if (SEQ_LT(tqe->trq_seq, th->th_seq + *tlenp)) {
+		/* Check if segment prepends first block. */
+		if (SEQ_LEQ(tqe->trq_seq, th->th_seq + *tlenp)) {
 			/* Trim tail of segment. */
-			if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
-				m_adj(m, i);
-				*tlenp += i;		/* NB: i is negative */
+			if ((i = SEQ_DELTA(tqe->trq_seq, th->th_seq + *tlenp))) {
+				m_adj(m, -i);
+				*tlenp -= i;
 				/* TCP statistics. */
 				tcpstat.tcps_rcvpartduppack++;
-				tcpstat.tcps_rcvpartdupbyte -= i;
+				tcpstat.tcps_rcvpartdupbyte += i;
 				/* Update accounting. */
 				for (mcnt = 0, n = m; n; n = n->m_next)
 					mcnt += (n->m_flags & M_EXT) ?
 					    n->m_ext.ext_size : MSIZE;
 			}
-			/* Segment prepends first block. */
 			tqe->trq_len += *tlenp;
 			tqe->trq_mcnt += mcnt;
 			tp->t_trqmcnt += mcnt;
@@ -234,7 +261,7 @@
 			tqe->trq_m = m;
 			goto present;
 		}
-		goto insert;
+		goto insert;	/* No statistics, this segment is in line. */
 	}
 
 	/* TCP statistics. */
@@ -242,13 +269,19 @@
 	tcpstat.tcps_rcvoobyte += *tlenp;
 
 	/* See where it fits. */
-	TAILQ_FOREACH(tqe, &tp->t_trq, trq_q) {
+	TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
+#if 1
 		/* Segment is after this blocks coverage. */
 		if (SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
 			continue;
+#endif
 		/* Segment is after the previous one but before this one. */
 		if (SEQ_GT(tqe->trq_seq, th->th_seq + *tlenp))
 			break;		/* Insert as new block. */
+
+		KASSERT(SEQ_GT(tqe->trq_seq, th->th_seq + *tlenp),
+		    ("%s: iterated past insert point", __func__));
+
 		/* Segment is already fully covered. */
 		if (SEQ_LEQ(tqe->trq_seq, th->th_seq) &&
 		    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
@@ -258,54 +291,36 @@
 			*tlenp = 0;
 			return (0);
 		}
-		/* Segment appends to this block. */
-		if (SEQ_LEQ(tqe->trq_seq + tqe->trq_len, th->th_seq)) {
-			/* Trim head of segment. */
-			if ((i = tqe->trq_seq + tqe->trq_len - th->th_seq)) {
-				m_adj(m, i);
-				*tlenp -= i;
-				/* TCP Statistics. */
-				tcpstat.tcps_rcvpartduppack++;
-				tcpstat.tcps_rcvpartdupbyte += i;
-				/* XXX dupes */
-			}
-			tqe->trq_len += *tlenp;
-			tqe->trq_mcnt += mcnt;
+
+		/* Segment covers and extends on both ends. */
+		if (SEQ_GEQ(tqe->trq_seq, th->th_seq) &&
+		    SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
+			/* Replace block content. */
+			tp->t_trqmcnt -= tqe->trq_mcnt;
+			m_freem(tqe->trq_m);
+			tqe->trq_len = *tlenp;
+			tqe->trq_mcnt = mcnt;
 			tp->t_trqmcnt += mcnt;
-			tqe->trq_ml->m_next = m;
+			tqe->trq_seq = th->th_seq;
+			tqe->trq_m = m;
 			tqe->trq_ml = m_last(m);
-			/* Check if segment bridges two blocks to merge. */
-			if ((tqen = TAILQ_NEXT(tqe, trq_q)) &&
-			    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq)) {
-				/* Trim head of next block. */
-				/* XXXAO: Should trim tail of segment instead. */
-				if ((i = tqe->trq_seq + tqe->trq_len -
-				    tqen->trq_seq)) {
-					m_adj(tqen->trq_m, i);
-					tqen->trq_len -= i;
-					/* TCP statistics. */
-					tcpstat.tcps_rcvpartduppack++;
-					tcpstat.tcps_rcvpartdupbyte += i;
-				}
-				tqe->trq_len += tqen->trq_len;
-				tqe->trq_mcnt += tqen->trq_mcnt;
-				tqe->trq_ml->m_next = tqen->trq_m;
-				tqe->trq_ml = tqen->trq_ml;
-				TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
-				uma_zfree(tcp_reass_zone, tqen);
-				tcp_reass_qsize--;
-			}
+			/* Check if segment bridges next block to merge. */
+			if (tqen != NULL &&
+			    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))
+				tcp_reass_merge(tp, tlenp, tqe, tqen);
 			return (0);
 		}
-		/* Segment prepends. */
-		if (SEQ_GT(tqe->trq_seq, th->th_seq)) {
+
+		/* Segment prepends to this block. */
+		if (SEQ_GT(tqe->trq_seq, th->th_seq) &&
+		    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
 			/* Trim tail of segment. */
-			if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
-				m_adj(m, i);
-				*tlenp += i;		/* NB: i is negative */
+			if ((i = SEQ_DELTA(tqe->trq_seq, th->th_seq + *tlenp))) {
+				m_adj(m, -i);
+				*tlenp -= i;
 				/* TCP statistics. */
 				tcpstat.tcps_rcvpartduppack++;
-				tcpstat.tcps_rcvpartdupbyte -= i;
+				tcpstat.tcps_rcvpartdupbyte += i;
 				/* Update accounting. */
 				for (mcnt = 0, n = m; n; n = n->m_next)
 					mcnt += (n->m_flags & M_EXT) ?
@@ -320,6 +335,29 @@
 			tqe->trq_m = m;
 			return (0);
 		}
+
+		/* Segment appends to this block. */
+		if (SEQ_LEQ(tqe->trq_seq + tqe->trq_len, th->th_seq) &&
+		    SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
+			/* Trim head of segment. */
+			if ((i = SEQ_DELTA(tqe->trq_seq + tqe->trq_len, th->th_seq))) {
+				m_adj(m, i);
+				*tlenp -= i;
+				/* TCP Statistics. */
+				tcpstat.tcps_rcvpartduppack++;
+				tcpstat.tcps_rcvpartdupbyte += i;
+			}
+			tqe->trq_len += *tlenp;
+			tqe->trq_mcnt += mcnt;
+			tp->t_trqmcnt += mcnt;
+			tqe->trq_ml->m_next = m;
+			tqe->trq_ml = m_last(m);
+			/* Check if segment bridges two blocks to merge. */
+			if (tqen != NULL &&
+			    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))
+				tcp_reass_merge(tp, tlenp, tqe, tqen);
+			return (0);
+		}
 	}
 
 insert:
@@ -340,10 +378,15 @@
 	tqen->trq_ml = m_last(m);
 
 	/* Where to insert. */
-	if (tqe)
+	if (tqe != NULL && SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
+		TAILQ_INSERT_TAIL(&tp->t_trq, tqen, trq_q);
+	else if (tqe != NULL)
 		TAILQ_INSERT_BEFORE(tqe, tqen, trq_q);
-	else
+	else {
+		KASSERT(TAILQ_EMPTY(&tp->t_trq),
+		    ("%s: queue not empty", __func__));
 		TAILQ_INSERT_HEAD(&tp->t_trq, tqen, trq_q);
+	}
 
 	/* Missing segment? */
 	if (tp->rcv_nxt != th->th_seq)
@@ -353,25 +396,21 @@
 	 * Present data to user, advancing rcv_nxt through
 	 * completed sequence space.
 	 */
-	if (!TCPS_HAVEESTABLISHED(tp->t_state))
-		return (0);
-	tqe = TAILQ_FIRST(&tp->t_trq);
-	if (tqe == NULL || tqe->trq_seq != tp->rcv_nxt)
-		return (0);
 	SOCKBUF_LOCK(&so->so_rcv);
 	TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
-		/* We can never go more than one round. */
+		KASSERT(SEQ_GEQ(tqe->trq_seq, tp->rcv_nxt),
+		    ("%s: trq_seq < rcv_nxt", __func__));
 		if (tqe->trq_seq != tp->rcv_nxt)
 			break;
 #if 1
 		/* XXX: This is bogus if we had a FIN. */
-		flags = tqe->trq_flags & TH_FIN;
+		flags = tqe->trq_flags;
 #endif
-		tp->rcv_nxt += tqe->trq_len;
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			m_freem(tqe->trq_m);
 		else
 			sbappendstream_locked(&so->so_rcv, tqe->trq_m);
+		tp->rcv_nxt += tqe->trq_len;
 		tp->t_trqmcnt -= tqe->trq_mcnt;
 		TAILQ_REMOVE(&tp->t_trq, tqe, trq_q);
 		uma_zfree(tcp_reass_zone, tqe);
@@ -387,11 +426,64 @@
 #endif
 }
 
+static void
+tcp_reass_merge(struct tcpcb *tp, int *tlenp, struct trq *tqe, struct trq *tqen)
+{
+#if 0
+	struct mbuf *m;
+#endif
+	int i;
+
+	KASSERT(SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq),
+	    ("%s: blocks do not overlap, nothing to merge", __func__));
+
+	/* Appended block may reach beyond next block. */
+	while (SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq + tqen->trq_len)) {
+		tcpstat.tcps_rcvpartdupbyte += tqen->trq_len;	/* Statistics */
+		tp->t_trqmcnt -= tqe->trq_mcnt;
+		m_freem(tqen->trq_m);
+		TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
+		uma_zfree(tcp_reass_zone, tqen);
+		tcp_reass_qsize--;
+		/* And the one after that. */
+		if ((tqen = TAILQ_NEXT(tqe, trq_q)) == NULL)
+			return;
+	}
+
+	/* Trim head of next block. */
+	if ((i = SEQ_DELTA(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))) {
+		m_adj(tqen->trq_m, i);
+		tqen->trq_len -= i;
+		tcpstat.tcps_rcvpartdupbyte += i;		/* Statistics */
+#if 0
+		/* Trim ... */
+		while (tqen->trq_m->m_len == 0) {
+			m = tqen->trq_m;
+			tqen->trq_m = tqen->trq_m->m_next;
+			/* mcnt */
+			m_free(m);
+		}
+#endif
+		KASSERT(tqen->trq_m != NULL,
+		    ("%s: no remaining mbufs in block", __func__));
+	}
+
+	/* Merge blocks together. */
+	tqe->trq_len += tqen->trq_len;
+	tqe->trq_mcnt += tqen->trq_mcnt;
+	tqe->trq_ml->m_next = tqen->trq_m;
+	tqe->trq_ml = tqen->trq_ml;
+	TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
+	uma_zfree(tcp_reass_zone, tqen);
+	tcp_reass_qsize--;
+}
+
 /*
  * Free the reassembly queue on tcpcb free and on general memory shortage.
  */
 void
-tcp_reass_qfree(struct tcpcb *tp) {
+tcp_reass_qfree(struct tcpcb *tp)
+{
 	struct trq *tqe, *tqen;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);

==== //depot/projects/tcp_reass/netinet/tcp_seq.h#2 (text+ko) ====

@@ -44,6 +44,7 @@
 
 #define	SEQ_MIN(a, b)	((SEQ_LT(a, b)) ? (a) : (b))
 #define	SEQ_MAX(a, b)	((SEQ_GT(a, b)) ? (a) : (b))
+#define	SEQ_DELTA(a,b)	((int)SEQ_MAX(a, b) - SEQ_MIN(a, b))
 
 /* for modulo comparisons of timestamps */
 #define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801162145.m0GLjctZ036755>