Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 3 Feb 2008 07:18:53 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 134697 for review
Message-ID:  <200802030718.m137IrmC041207@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=134697

Change 134697 by kmacy@kmacy:storage:toehead on 2008/02/03 07:18:30

	- fix socket buffer accounting 
	- use sbappendstream

Affected files ...

.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 edit

Differences ...

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 (text+ko) ====

@@ -586,12 +586,19 @@
 	int dack_mode, must_send, read;
 	u32 thres, credits, dack = 0;
 
+	so = tp->t_inpcb->inp_socket;
 	if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
-		(tp->t_state == TCPS_FIN_WAIT_2)))
+		(tp->t_state == TCPS_FIN_WAIT_2))) {
+		if (copied) {
+			SOCKBUF_LOCK(&so->so_rcv);
+			toep->tp_copied_seq += copied;
+			SOCKBUF_UNLOCK(&so->so_rcv);
+		}
+		
 		return;
-	INP_LOCK_ASSERT(tp->t_inpcb);
+	}
 	
-	so = tp->t_inpcb->inp_socket;
+	INP_LOCK_ASSERT(tp->t_inpcb);	
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (copied)
 		toep->tp_copied_seq += copied;
@@ -1755,9 +1762,12 @@
 		  "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
 		  m->m_seq, q->cur_buf, m->m_pkthdr.len);
 #endif
-	sbappend(&so->so_rcv, m);
+	SOCKBUF_LOCK(&so->so_rcv);
+	sbappendstream_locked(&so->so_rcv, m);
 	if (__predict_true((so->so_state & SS_NOFDREF) == 0))
-		sorwakeup(so);
+		sorwakeup_locked(so);
+	else
+		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*
@@ -1895,11 +1905,12 @@
 	    "new_rx_data: seq 0x%x len %u",
 	    m->m_seq, m->m_pkthdr.len);
 #endif
+	INP_UNLOCK(tp->t_inpcb);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sb_notify(&so->so_rcv))
 		DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
 
-	sbappend_locked(&so->so_rcv, m);
+	sbappendstream_locked(&so->so_rcv, m);
 
 #ifdef notyet
 	/*
@@ -1912,7 +1923,7 @@
 		so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax));
 #endif
 	
-	INP_UNLOCK(tp->t_inpcb);
+
 	DPRINTF("sb_cc=%d sb_mbcnt=%d\n",
 	    so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt);
 	    
@@ -2134,11 +2145,12 @@
 	tp->rcv_nxt += m->m_len;
 
 	tp->t_rcvtime = ticks;
+	SOCKBUF_LOCK(&so->so_rcv);
 	sbappendstream_locked(&so->so_rcv, m);
 	
 	if ((so->so_state & SS_NOFDREF) == 0)
 		sorwakeup_locked(so);
-	
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	TRACE_EXIT;
 }
 
@@ -2230,7 +2242,7 @@
 	if (!(bsp->flags & DDP_BF_NOFLIP))
 		q->cur_buf ^= 1;
 	tp->t_rcvtime = ticks;
-	sbappend(&so->so_rcv, m);
+	sbappendstream(&so->so_rcv, m);
 	if (__predict_true((so->so_state & SS_NOFDREF) == 0))
 		sorwakeup(so);
 	return (1);

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 (text+ko) ====

@@ -41,12 +41,14 @@
 #include <sys/condvar.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/socketvar.h>
 #include <sys/uio.h>
 
 #include <machine/bus.h>
+#include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -525,7 +527,42 @@
 	return pru_sosend(so, addr, uio, top, control, flags, td);
 }
 
+/*
+ * Following replacement or removal of the first mbuf on the first mbuf chain
+ * of a socket buffer, push necessary state changes back into the socket
+ * buffer so that other consumers see the values consistently.  'nextrecord'
+ * is the callers locally stored value of the original value of
+ * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
+ * NOTE: 'nextrecord' may be NULL.
+ */
+#if 1
+static __inline void
+sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+	/*
+	 * First, update for the new value of nextrecord.  If necessary, make
+	 * it the first record.
+	 */
+	if (sb->sb_mb != NULL)
+		sb->sb_mb->m_nextpkt = nextrecord;
+	else
+		sb->sb_mb = nextrecord;
 
+        /*
+         * Now update any dependent socket buffer fields to reflect the new
+         * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
+	 * addition of a second clause that takes care of the case where
+	 * sb_mb has been updated, but remains the last record.
+         */
+        if (sb->sb_mb == NULL) {
+                sb->sb_mbtail = NULL;
+                sb->sb_lastrecord = NULL;
+        } else if (sb->sb_mb->m_nextpkt == NULL)
+                sb->sb_lastrecord = sb->sb_mb;
+}
+#endif
 
 #define IS_NONBLOCKING(so)	((so)->so_state & SS_NBIO)
 
@@ -536,14 +573,13 @@
 	struct toepcb *toep = tp->t_toe;
 	struct mbuf *m;
 	uint32_t offset;
-	int err, flags, avail, len, buffers_freed, copied, copied_unacked;
+	int err, flags, avail, len, copied, copied_unacked;
 	int target;		/* Read at least this many bytes */
 	int user_ddp_ok, user_ddp_pending = 0;
 	struct ddp_state *p;
 	struct inpcb *inp = sotoinpcb(so);
 
-
-	copied = copied_unacked = buffers_freed = 0;
+	avail = offset = copied = copied_unacked = 0;
 	flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
 
 	err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
@@ -583,6 +619,8 @@
 			so->so_error = 0;
 			goto done;
 		}
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 
+			goto done;
 		if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
 			goto done;
 		if (tp->t_state == TCPS_CLOSED) {
@@ -625,15 +663,23 @@
 	} else if (copied >= target)
 		goto done;
 	else {
-		SOCKBUF_UNLOCK(&so->so_rcv);
-		INP_LOCK(inp);
-		t3_cleanup_rbuf(tp, copied_unacked);
-		INP_UNLOCK(inp);
-		SOCKBUF_LOCK(&so->so_rcv);
-		copied_unacked = 0;
+		int i = 0;
+		if (copied_unacked) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			INP_LOCK(inp);
+			t3_cleanup_rbuf(tp, copied_unacked);
+			INP_UNLOCK(inp);
+			copied_unacked = 0;
+			if (mp_ncpus > 1)
+				while (i++ < 200 && so->so_rcv.sb_mb == NULL)
+					cpu_spinwait();
+			SOCKBUF_LOCK(&so->so_rcv);
+		}
+		
 		if (so->so_rcv.sb_mb)
 			goto restart;
-		printf("sbwaiting 2 copied=%d target=%d\n", copied, target);
+		printf("sbwaiting 2 copied=%d target=%d avail=%d so=%p mb=%p cc=%d\n", copied, target, avail, so,
+		    so->so_rcv.sb_mb, so->so_rcv.sb_cc);
 		if ((err = sbwait(&so->so_rcv)) != 0)
 			goto done;
 	}
@@ -642,24 +688,27 @@
 	if (m->m_pkthdr.len == 0) {
 		if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
 			panic("empty mbuf and NOCOPY not set\n");
+		printf("dropping empty mbuf\n");
 		user_ddp_pending = 0;
-		sbfree(&so->so_rcv, m);
-		m = so->so_rcv.sb_mb = m_free(m);
+		sbdroprecord_locked(&so->so_rcv);
 		goto done;
 	}
+	
 	offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
 	DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n",
 	    m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, !!is_ddp(m));
+
 	if (offset >= m->m_pkthdr.len)
 		panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x "
 		    "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq,
 		    m->m_pkthdr.len, m->m_ddp_flags);
+
 	avail = m->m_pkthdr.len - offset;
 	if (len < avail) {
-		if (is_ddp(m) &&  (m->m_ddp_flags & DDP_BF_NOCOPY)) 
+		if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY)) 
 			panic("bad state in t3_soreceive\n");
 		avail = len;
-	}	
+	}
 #ifdef URGENT_DATA_SUPPORTED
 	/*
 	 * Check if the data we are preparing to copy contains urgent
@@ -720,16 +769,15 @@
 			goto done_unlocked;
 		}
 		SOCKBUF_LOCK(&so->so_rcv);
-		if (!(resid > uio->uio_resid))
-			printf("copied zero bytes :-/ resid=%d uio_resid=%d copied=%d copied_unacked=%d\n",
-			    resid, uio->uio_resid, copied, copied_unacked);
-	}	
-
-	sbdrop_locked(&so->so_rcv, avail);
-	buffers_freed++;
+		if (avail != (resid - uio->uio_resid))
+			printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
+			    avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
+	}
+	
 	copied += avail;
 	copied_unacked += avail;
 	len -= avail;
+	
 #ifdef URGENT_DATA_SUPPORTED
 skip_copy:
 	if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
@@ -741,7 +789,8 @@
 	 */
 	if (avail + offset >= m->m_pkthdr.len) {
 		unsigned int fl = m->m_ddp_flags;
-		int exitnow, got_psh = 0, nomoredata = 0;
+		int exitnow, count, got_psh = 0, nomoredata = 0;
+		struct mbuf *nextrecord;
 		
 		if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
 			if (is_ddp_psh(m) && user_ddp_pending)
@@ -757,25 +806,47 @@
 				p->ubuf_ddp_ready = 1;
 			}
 		}
+
+		nextrecord = m->m_nextpkt;
+		count = m->m_pkthdr.len;
+		while (count) {
+			count -= m->m_len;
+			sbfree(&so->so_rcv, m);
+			so->so_rcv.sb_mb = m_free(m);
+			m = so->so_rcv.sb_mb;	
+		}
+		sockbuf_pushsync(&so->so_rcv, nextrecord);
 		
+
 		exitnow = got_psh || nomoredata;
-		if  ((so->so_rcv.sb_mb == NULL) && exitnow)
+		if  ((so->so_rcv.sb_mb == NULL) && exitnow) {
+			printf("exiting\n");
 			goto done;
-	}
-	if (len > 0)
+		}
+		if (copied_unacked > (so->so_rcv.sb_hiwat >> 2)) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			INP_LOCK(inp);
+			t3_cleanup_rbuf(tp, copied_unacked);
+			INP_UNLOCK(inp);
+			copied_unacked = 0;
+			SOCKBUF_LOCK(&so->so_rcv);
+		}
+	} 
+	if (len > 0) 
 		goto restart;
+	
 
+	done:
 	
-done:
 	/*
 	 * If we can still receive decide what to do in preparation for the
 	 * next receive.  Note that RCV_SHUTDOWN is set if the connection
 	 * transitioned to CLOSE but not if it was in that state to begin with.
 	 */
 	if (__predict_true((so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
-		SOCKBUF_UNLOCK(&so->so_rcv);
-		SOCKBUF_LOCK(&so->so_rcv);
 		if (user_ddp_pending) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			SOCKBUF_LOCK(&so->so_rcv);
 			user_ddp_ok = 0;
 			t3_cancel_ubuf(toep);
 			if (so->so_rcv.sb_mb) {
@@ -810,12 +881,11 @@
 #endif
 	SOCKBUF_UNLOCK(&so->so_rcv);
 done_unlocked:	
-	if (copied) {
+	if (copied_unacked) {
 		INP_LOCK(inp);
 		t3_cleanup_rbuf(tp, copied_unacked);
 		INP_UNLOCK(inp);
 	}
-	
 	sbunlock(&so->so_rcv);
 
 	return (err);
@@ -844,27 +914,22 @@
 	 *  - iovcnt is 1
 	 *
 	 */
+	
 	if ((tp->t_flags & TF_TOE) && ((flags & (MSG_WAITALL|MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
-#ifdef notyet
-	    && ((so->so_state & SS_NBIO) == 0)
-#endif
 	    && (uio->uio_iovcnt == 1) &&
 	    ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) && (mp0 == NULL)) {
 		tdev =  TOE_DEV(so);
 		zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
 		zcopy_enabled = TOM_TUNABLE(tdev, ddp);
-
 		if ((uio->uio_resid > zcopy_thres) &&
 		    (uio->uio_iovcnt == 1)
-#if 0
-		    &&  ((so->so_state & SS_NBIO) == 0)
-#endif		    
 		    && zcopy_enabled) {
 			rv = t3_soreceive(so, flagsp, uio);
 			if (rv != EAGAIN)
 				return (rv);
-		}
-	}
+		} 
+	} else if (tp->t_flags & TF_TOE)
+		printf("skipping t3_soreceive\n");
 	return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200802030718.m137IrmC041207>