Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 11 Jan 2008 21:20:41 GMT
From:      Andre Oppermann <andre@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 133069 for review
Message-ID:  <200801112120.m0BLKf75004734@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=133069

Change 133069 by andre@andre_flirtbox on 2008/01/11 21:20:25

	Initial branch and import of new TCP reassembly queue.
	It's primarily a code dump.  It compiles, doesn't crash and
	almost works as advertized.  Description is at the top of
	the file.  Lots of comments inline.

Affected files ...

.. //depot/projects/tcp_reass/netinet/tcp_input.c#2 edit
.. //depot/projects/tcp_reass/netinet/tcp_reass.c#2 edit
.. //depot/projects/tcp_reass/netinet/tcp_subr.c#2 edit
.. //depot/projects/tcp_reass/netinet/tcp_usrreq.c#2 edit
.. //depot/projects/tcp_reass/netinet/tcp_var.h#2 edit

Differences ...

==== //depot/projects/tcp_reass/netinet/tcp_input.c#2 (text+ko) ====

@@ -980,7 +980,7 @@
 	    tp->snd_nxt == tp->snd_max &&
 	    tiwin && tiwin == tp->snd_wnd && 
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
-	    LIST_EMPTY(&tp->t_segq) &&
+	    TAILQ_EMPTY(&tp->t_trq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
@@ -1705,8 +1705,7 @@
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0)
-			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
-			    (struct mbuf *)0);
+			(void) tcp_reass(tp, NULL, NULL, NULL);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
@@ -2234,7 +2233,7 @@
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
-		    LIST_EMPTY(&tp->t_segq) &&
+		    TAILQ_EMPTY(&tp->t_trq) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if (DELAY_ACK(tp))
 				tp->t_flags |= TF_DELACK;

==== //depot/projects/tcp_reass/netinet/tcp_reass.c#2 (text+ko) ====

@@ -1,6 +1,6 @@
 /*-
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2007
+ *	Andre Oppermann, Internet Business Solutions AG.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,14 +27,55 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_reass.c,v 1.352 2007/05/13 22:16:13 andre Exp $
  */
 
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/tcp_reass.c,v 1.353 2007/10/07 20:44:24 silby Exp $");
+/*
+ * Operational overview of TCP reassembly:
+ *
+ * It is the purpose of tcp reassembly to store segments that are received
+ * out of order.  This happens when packets are lost along the way due to
+ * various reasons.  The most common one is traffic overload which causes
+ * routers to stop accepting packets for brief moments.
+ *
+ * Upon arrival of the missing segment(s) the whole chain of stored segments
+ * is moved into the socket buffer.  In case of multiple missing segments
+ * the first consequtive part is moved with the remainder being kept in
+ * store until the next missing segment arrives.
+ *
+ * While in reassembly mode *all* arrving segments are put into the reassembly
+ * queue.
+ *
+ * Instead of storing all segments on their own we build blocks of consequtive
+ * segments chained together.  We use a tailq because a new segments has the
+ * highest probability to fit the tail of the chain.  If not, the second
+ * highest probability is the beginning of the chain for being the missing
+ * segment.  Otherwise we cycle through each consequtive block until a match
+ * is found.  If a segment matches the end of one block and the start of the
+ * next block the two blocks are joined together.  If no match is found a
+ * new block is created.
+ *
+ * This system is very efficient and can deal efficiently with long chains 
+ * and many holes.
+ *
+ * trq_tail ----------------------------------------------\
+ * trq_head --> [block] ------>	[block] ------>	[block] <-/
+ *		m_next		m_next		m_next
+ *		   |		   |		   |
+ *		m_next		m_next		m_next
+ *		   |		   |		   |
+ *		m_next		m_next		m_next
+ *
+ *
+ * A further improvement is to merge the content of mbufs together if the
+ * preceeding one has enough space to hold the data of the new one.  When
+ * trimming the head of an mbuf chain m_adj() empties the mbufs but leaves
+ * them in place.  Only when trimming from the tail it actually frees them.
+ * Normally we don't get mbuf chains so this isn't too much of a concern
+ * right now.  TODO.
+ */
 
 #include "opt_inet.h"
-#include "opt_inet6.h"
-#include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -48,30 +89,13 @@
 
 #include <vm/uma.h>
 
-#include <net/if.h>
-#include <net/route.h>
-
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_options.h>
-#include <netinet/ip6.h>
-#include <netinet6/in6_pcb.h>
-#include <netinet6/ip6_var.h>
-#include <netinet6/nd6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
-#include <netinet6/tcp6_var.h>
-#include <netinet/tcpip.h>
-#ifdef TCPDEBUG
-#include <netinet/tcp_debug.h>
-#endif /* TCPDEBUG */
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
@@ -114,7 +138,7 @@
 	tcp_reass_maxseg = nmbclusters / 16;
 	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
 	    &tcp_reass_maxseg);
-	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct trq),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
 	EVENTHANDLER_REGISTER(nmbclusters_change,
@@ -124,23 +148,15 @@
 int
 tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
 {
-	struct tseg_qent *q;
-	struct tseg_qent *p = NULL;
-	struct tseg_qent *nq;
-	struct tseg_qent *te = NULL;
+	struct trq *tqe, *tqen;
 	struct socket *so = tp->t_inpcb->inp_socket;
-	int flags;
+	struct mbuf *n;
+	int i, flags = 0, segs = 0;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
 
 	/*
-	 * XXX: tcp_reass() is rather inefficient with its data structures
-	 * and should be rewritten (see NetBSD for optimizations).  While
-	 * doing that it should move to its own file tcp_reass.c.
-	 */
-
-	/*
-	 * Call with th==NULL after become established to
+	 * Call with th==NULL after becoming established to
 	 * force pre-ESTABLISHED data up to user socket.
 	 */
 	if (th == NULL)
@@ -155,7 +171,7 @@
 	 */
 	if (th->th_seq != tp->rcv_nxt &&
 	    (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
-	     tp->t_segqlen >= tcp_reass_maxqlen)) {
+	     tp->t_trqlen >= tcp_reass_maxqlen)) {
 		tcp_reass_overflows++;
 		tcpstat.tcps_rcvmemdrop++;
 		m_freem(m);
@@ -163,97 +179,145 @@
 		return (0);
 	}
 
-	/*
-	 * Allocate a new queue entry. If we can't, or hit the zone limit
-	 * just drop the pkt.
-	 */
-	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
-	if (te == NULL) {
-		tcpstat.tcps_rcvmemdrop++;
-		m_freem(m);
-		*tlenp = 0;
+	/* Accounting. */
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += *tlenp;
+	/* NB:  m_adj(m, -i) may free mbufs at the tail of a chain. */
+	for (n = m; n; n = n->m_next)
+		segs++;
+	tp->t_trqlen += segs;
+	tcp_reass_qsize += segs;
+
+	/* Get rid of packet header and mtags. */
+	m_demote(m, 1);
+
+	/* Check if this segment attaches to the end. */
+	tqe = TAILQ_LAST(&tp->t_trq, trq_head);
+	if (tqe && tqe->trq_seq + tqe->trq_len == th->th_seq) {
+		tqe->trq_len += *tlenp;
+		tqe->trq_segs += segs;
+		tqe->trq_ml->m_next = m;
+		tqe->trq_ml = m_last(m);
 		return (0);
 	}
-	tp->t_segqlen++;
-	tcp_reass_qsize++;
 
-	/*
-	 * Find a segment which begins after this one does.
-	 */
-	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
-		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
-			break;
-		p = q;
-	}
-
-	/*
-	 * If there is a preceding segment, it may provide some of
-	 * our data already.  If so, drop the data from the incoming
-	 * segment.  If it provides all of our data, drop us.
-	 */
-	if (p != NULL) {
-		int i;
-		/* conversion to int (in i) handles seq wraparound */
-		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
-		if (i > 0) {
-			if (i >= *tlenp) {
-				tcpstat.tcps_rcvduppack++;
-				tcpstat.tcps_rcvdupbyte += *tlenp;
-				m_freem(m);
-				uma_zfree(tcp_reass_zone, te);
-				tp->t_segqlen--;
-				tcp_reass_qsize--;
-				/*
-				 * Try to present any queued data
-				 * at the left window edge to the user.
-				 * This is needed after the 3-WHS
-				 * completes.
-				 */
-				goto present;	/* ??? */
+	/* Check if this is the missing segment. */
+	if (tp->rcv_nxt == th->th_seq) {
+		tqe = TAILQ_FIRST(&tp->t_trq);
+		KASSERT(tqe != NULL,
+		    ("%s: missing segment but nothing in queue", __func__));
+		if (SEQ_LT(tqe->trq_seq, th->th_seq + *tlenp)) {
+			/* Trim tail. */
+			if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
+				m_adj(m, i);
+				*tlenp -= i;
+				/* Update accounting. */
+				if (segs > 1) {
+					for (n = m; n; n = n->m_next)
+						segs--;
+					tp->t_trqlen -= segs;
+					tcp_reass_qsize -= segs;
+				}
 			}
-			m_adj(m, i);
-			*tlenp -= i;
-			th->th_seq += i;
 		}
+		goto insert;
 	}
-	tcpstat.tcps_rcvoopack++;
-	tcpstat.tcps_rcvoobyte += *tlenp;
 
-	/*
-	 * While we overlap succeeding segments trim them or,
-	 * if they are completely covered, dequeue them.
-	 */
-	while (q) {
-		int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
-		if (i <= 0)
+	/* See where it fits. */
+	TAILQ_FOREACH(tqe, &tp->t_trq, trq_q) {
+		/* Segment is after our coverage. */
+		if (SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
+			continue;
+		/* Segment is after the previous one but before us. */
+		if (SEQ_GT(tqe->trq_seq, th->th_seq + *tlenp))
 			break;
-		if (i < q->tqe_len) {
-			q->tqe_th->th_seq += i;
-			q->tqe_len -= i;
-			m_adj(q->tqe_m, i);
-			break;
+		/* Segment is already fully covered. */
+		if (SEQ_LEQ(tqe->trq_seq, th->th_seq) &&
+		    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += *tlenp;
+			m_freem(m);
+			tp->t_trqlen -= segs;
+			tcp_reass_qsize -= segs;
+			*tlenp = 0;
+			return (0);
+		}
+		/* Segment appends. */
+		if (SEQ_LEQ(tqe->trq_seq + tqe->trq_len, th->th_seq)) {
+			/* Trim head. */
+			if ((i = tqe->trq_seq + tqe->trq_len - th->th_seq)) {
+				m_adj(m, i);
+				*tlenp -= i;
+			}
+			tqe->trq_len += *tlenp;
+			tqe->trq_segs += segs;
+			tqe->trq_ml->m_next = m;
+			tqe->trq_ml = m_last(m);
+			/* Check for next block to merge. */
+			if ((tqen = TAILQ_NEXT(tqe, trq_q)) &&
+			    SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq)) {
+				/* Trim head. */
+				if ((i = tqe->trq_seq + tqe->trq_len -
+				    tqen->trq_seq)) {
+					m_adj(tqen->trq_m, i);
+					tqen->trq_len -= i;
+				}
+				tqe->trq_len += tqen->trq_len;
+				tqe->trq_segs += tqen->trq_segs;
+				tqe->trq_ml->m_next = tqen->trq_m;
+				tqe->trq_ml = tqen->trq_ml;
+				TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
+				uma_zfree(tcp_reass_zone, tqen);
+			}
+			return (0);
+		}
+		/* Segment prepends. */
+		if (SEQ_GT(tqe->trq_seq, th->th_seq)) {
+			/* Trim tail. */
+			if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
+				m_adj(m, i);
+				*tlenp -= i;
+				/* Update accounting. */
+				if (segs > 1) {
+					for (n = m; n; n = n->m_next)
+						segs--;
+					tp->t_trqlen -= segs;
+					tcp_reass_qsize -= segs;
+				}
+			}
+			tqe->trq_len += *tlenp;
+			tqe->trq_segs += segs;
+			tqe->trq_m = m;
+			n = m_last(m);
+			n->m_next = tqe->trq_m;
+			return (0);
 		}
+	}
 
-		nq = LIST_NEXT(q, tqe_q);
-		LIST_REMOVE(q, tqe_q);
-		m_freem(q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-		q = nq;
+insert:
+	/* Prepare to insert into block queue. */
+	tqen = uma_zalloc(tcp_reass_zone, (M_NOWAIT|M_ZERO));
+	if (tqen == NULL) {
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		*tlenp = 0;
+		return (0);
 	}
+	tqen->trq_seq = th->th_seq;
+	tqen->trq_len = *tlenp;
+	tqen->trq_segs = segs;
+	tqen->trq_m = m;
+	tqen->trq_ml = m_last(m);
 
-	/* Insert the new segment queue entry into place. */
-	te->tqe_m = m;
-	te->tqe_th = th;
-	te->tqe_len = *tlenp;
+	/* Where to insert. */
+	if (tqe)
+		TAILQ_INSERT_BEFORE(tqe, tqen, trq_q);
+	else
+		TAILQ_INSERT_HEAD(&tp->t_trq, tqen, trq_q);
 
-	if (p == NULL) {
-		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
-	} else {
-		LIST_INSERT_AFTER(p, te, tqe_q);
-	}
-
+	/* Missing segment? */
+	if (tp->rcv_nxt != th->th_seq)
+		return (0);
 present:
 	/*
 	 * Present data to user, advancing rcv_nxt through
@@ -261,25 +325,55 @@
 	 */
 	if (!TCPS_HAVEESTABLISHED(tp->t_state))
 		return (0);
-	q = LIST_FIRST(&tp->t_segq);
-	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+	tqe = TAILQ_FIRST(&tp->t_trq);
+	if (tqe == NULL || tqe->trq_seq != tp->rcv_nxt)
 		return (0);
 	SOCKBUF_LOCK(&so->so_rcv);
-	do {
-		tp->rcv_nxt += q->tqe_len;
-		flags = q->tqe_th->th_flags & TH_FIN;
-		nq = LIST_NEXT(q, tqe_q);
-		LIST_REMOVE(q, tqe_q);
+	TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
+		if (tqe->trq_seq != tp->rcv_nxt)
+			break;
+#if 1
+		/* XXX: This is bogus if we had a FIN. */
+		flags = tqe->trq_flags & TH_FIN;
+#endif
+		tp->rcv_nxt += tqe->trq_len;
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
-			m_freem(q->tqe_m);
+			m_freem(tqe->trq_m);
 		else
-			sbappendstream_locked(&so->so_rcv, q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-		q = nq;
-	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+			sbappendstream_locked(&so->so_rcv, tqe->trq_m);
+		KASSERT(tp->t_trqlen >= tqe->trq_segs,
+		    ("%s: t_trqlen incorrect", __func__));
+		tp->t_trqlen -= tqe->trq_segs;
+		tcp_reass_qsize -= tqe->trq_segs;
+		TAILQ_REMOVE(&tp->t_trq, tqe, trq_q);
+		uma_zfree(tcp_reass_zone, tqe);
+	}
+	/* NB: sorwakeup_locked() does an implicit socket buffer unlock. */
+	sorwakeup_locked(so);
 	ND6_HINT(tp);
-	sorwakeup_locked(so);
+#if 1
 	return (flags);
+#else
+	return (0);
+#endif
+}
+
+/*
+ * Free the reassembly queue on tcpcb free and on general memory shortage.
+ */
+void
+tcp_reass_qfree(struct tcpcb *tp) {
+	struct trq *tqe, *tqen;
+
+	INP_LOCK_ASSERT(tp->t_inpcb);
+
+	TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
+		m_freem(tqe->trq_m);
+		KASSERT(tp->t_trqlen >= tqe->trq_segs,
+		    ("%s: t_trqlen incorrect", __func__));
+		tp->t_trqlen -= tqe->trq_segs;
+		tcp_reass_qsize -= tqe->trq_segs;
+		TAILQ_REMOVE(&tp->t_trq, tqe, trq_q);
+		uma_zfree(tcp_reass_zone, tqe);
+	}
 }

==== //depot/projects/tcp_reass/netinet/tcp_subr.c#2 (text+ko) ====

@@ -593,7 +593,6 @@
 		return (NULL);
 	tp = &tm->tcb;
 	tp->t_timers = &tm->tt;
-	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg = tp->t_maxopd =
 #ifdef INET6
 		isipv6 ? tcp_v6mssdflt :
@@ -611,7 +610,8 @@
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
-	TAILQ_INIT(&tp->snd_holes);
+	TAILQ_INIT(&tp->snd_holes);		/* Covered by M_ZERO. */
+	TAILQ_INIT(&tp->t_trq);			/* Covered by M_ZERO. */
 	tp->t_inpcb = inp;	/* XXX */
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
@@ -665,7 +665,6 @@
 void
 tcp_discardcb(struct tcpcb *tp)
 {
-	struct tseg_qent *q;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
@@ -742,18 +741,13 @@
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
-	/* free the reassembly queue, if any */
-	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
-		LIST_REMOVE(q, tqe_q);
-		m_freem(q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-	}
+	/* Free the reassembly queue and other data structures. */
+	tcp_reass_qfree(tp);
+	tcp_free_sackholes(tp);
+
 	/* Disconnect offload device, if any. */
 	tcp_offload_detach(tp);
 		
-	tcp_free_sackholes(tp);
 	inp->inp_ppcb = NULL;
 	tp->t_inpcb = NULL;
 	uma_zfree(tcpcb_zone, tp);
@@ -794,37 +788,27 @@
 	return (tp);
 }
 
+/*
+ * Walk the tcpbs, if existing, and flush the reassembly queue,
+ * if there is one...
+ * According to RFC xxx we may flush the reassembly queue even
+ * if we have indicated receipt of segments through SACK.
+ */
 void
 tcp_drain(void)
 {
+	struct inpcb *inpb;
+	struct tcpcb *tcpb;
 
 	if (do_tcpdrain) {
-		struct inpcb *inpb;
-		struct tcpcb *tcpb;
-		struct tseg_qent *te;
 
-	/*
-	 * Walk the tcpbs, if existing, and flush the reassembly queue,
-	 * if there is one...
-	 * XXX: The "Net/3" implementation doesn't imply that the TCP
-	 *      reassembly queue should be flushed, but in a situation
-	 *	where we're really low on mbufs, this is potentially
-	 *	usefull.
-	 */
 		INP_INFO_RLOCK(&tcbinfo);
 		LIST_FOREACH(inpb, tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_vflag & INP_TIMEWAIT)
 				continue;
 			INP_LOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
-				while ((te = LIST_FIRST(&tcpb->t_segq))
-			            != NULL) {
-					LIST_REMOVE(te, tqe_q);
-					m_freem(te->tqe_m);
-					uma_zfree(tcp_reass_zone, te);
-					tcpb->t_segqlen--;
-					tcp_reass_qsize--;
-				}
+				tcp_reass_qfree(tcpb);
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_UNLOCK(inpb);

==== //depot/projects/tcp_reass/netinet/tcp_usrreq.c#2 (text+ko) ====

@@ -1745,7 +1745,7 @@
 
 	db_print_indent(indent);
 	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
-	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+	   TAILQ_FIRST(&tp->t_trq), tp->t_trqlen, tp->t_dupacks);
 
 	db_print_indent(indent);
 	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",

==== //depot/projects/tcp_reass/netinet/tcp_var.h#2 (text+ko) ====

@@ -40,16 +40,20 @@
  */
 extern int	tcp_do_rfc1323;
 
-/* TCP segment queue entry */
-struct tseg_qent {
-	LIST_ENTRY(tseg_qent) tqe_q;
-	int	tqe_len;		/* TCP segment data length */
-	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
-	struct	mbuf	*tqe_m;		/* mbuf contains packet */
+/* TCP reassembly queue segment entry. */
+struct trq {
+	TAILQ_ENTRY(trq) trq_q;
+	tcp_seq		trq_seq;	/* start of segment */
+	int		trq_len;	/* length of segment */
+	int		trq_segs;	/* number of mbufs */
+	int		trq_flags;	/* flags for segment chain */
+#define TRQ_FIN		0x01		/* FIN was on last segment */
+	struct mbuf	*trq_m;		/* mbuf chain of data */
+	struct mbuf	*trq_ml;	/* last mbuf in chain of data */
 };
-LIST_HEAD(tsegqe_head, tseg_qent);
-extern int	tcp_reass_qsize;
-extern struct uma_zone *tcp_reass_zone;
+TAILQ_HEAD(trq_head, trq);
+extern	int		tcp_reass_qsize;
+extern	struct uma_zone	*tcp_reass_zone;
 
 struct sackblk {
 	tcp_seq start;		/* start seq no. of sack block */
@@ -92,8 +96,8 @@
  * Organized for 16 byte cacheline efficiency.
  */
 struct tcpcb {
-	struct	tsegqe_head t_segq;	/* segment reassembly queue */
-	int	t_segqlen;		/* segment reassembly queue length */
+	struct	trq_head t_trq;		/* segment reassembly queue */
+	int	t_trqlen;		/* segment reassembly queue length */
 	int	t_dupacks;		/* consecutive dup acks recd */
 
 	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
@@ -531,6 +535,7 @@
 	    const void *);
 int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
 void	 tcp_reass_init(void);
+void	 tcp_reass_qfree(struct tcpcb *);
 void	 tcp_input(struct mbuf *, int);
 u_long	 tcp_maxmtu(struct in_conninfo *, int *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, int *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801112120.m0BLKf75004734>