FreeBSD Mail Archives

Date:      Sun, 18 Aug 2002 04:18:01 -0700 (PDT)
From:      Jonathan Mini <mini@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 16190 for review
Message-ID:  <200208181118.g7IBI1jX008201@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

http://people.freebsd.org/~peter/p4db/chv.cgi?CH=16190

Change 16190 by mini@mini_stylus on 2002/08/18 04:17:56

	Fix files p4 seems to have corrupted (filled with NULs).
	How very very icky.

Affected files ...

.. //depot/projects/kse/sys/dev/fxp/if_fxp.c#17 edit
.. //depot/projects/kse/sys/kern/subr_mbuf.c#16 edit
.. //depot/projects/kse/sys/netinet/tcp_output.c#11 edit

Differences ...

==== //depot/projects/kse/sys/dev/fxp/if_fxp.c#17 (text+ko) ====

@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/dev/fxp/if_fxp.c,v 1.138 2002/08/09 01:48:28 luigi Exp $
+ * $FreeBSD: src/sys/dev/fxp/if_fxp.c,v 1.139 2002/08/18 07:04:58 sobomax Exp $
  */
 
 /*
@@ -1193,7 +1193,7 @@
 #ifdef DEVICE_POLLING
 	struct ifnet *ifp = &sc->sc_if;
 
-	if (ifp->if_ipending & IFF_POLLING)
+	if (ifp->if_flags & IFF_POLLING)
 		return;
 	if (ether_poll_register(fxp_poll, ifp)) {
 		/* disable interrupts */
@@ -1785,7 +1785,7 @@
 	 * ... but only do that if we are not polling. And because (presumably)
 	 * the default is interrupts on, we need to disable them explicitly!
 	 */
-	if ( ifp->if_ipending & IFF_POLLING )
+	if ( ifp->if_flags & IFF_POLLING )
 		CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
 	else
 #endif /* DEVICE_POLLING */
@@ -1991,20 +1991,161 @@
 		}
 		break;
 
-	case Sles[args->fd];
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		if (ifp->if_flags & IFF_ALLMULTI)
+			sc->flags |= FXP_FLAG_ALL_MCAST;
+		else
+			sc->flags &= ~FXP_FLAG_ALL_MCAST;
+		/*
+		 * Multicast list has changed; set the hardware filter
+		 * accordingly.
+		 */
+		if ((sc->flags & FXP_FLAG_ALL_MCAST) == 0)
+			fxp_mc_setup(sc);
+		/*
+		 * fxp_mc_setup() can set FXP_FLAG_ALL_MCAST, so check it
+		 * again rather than else {}.
+		 */
+		if (sc->flags & FXP_FLAG_ALL_MCAST)
+			fxp_init(sc);
+		error = 0;
+		break;
+
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		if (sc->miibus != NULL) {
+			mii = device_get_softc(sc->miibus);
+                        error = ifmedia_ioctl(ifp, ifr,
+                            &mii->mii_media, command);
+		} else {
+                        error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
+		}
+		break;
+
+	default:
+		error = EINVAL;
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * Fill in the multicast address list and return number of entries.
+ */
+static int
+fxp_mc_addrs(struct fxp_softc *sc)
+{
+	struct fxp_cb_mcs *mcsp = sc->mcsp;
+	struct ifnet *ifp = &sc->sc_if;
+	struct ifmultiaddr *ifma;
+	int nmcasts;
+
+	nmcasts = 0;
+	if ((sc->flags & FXP_FLAG_ALL_MCAST) == 0) {
+#if __FreeBSD_version < 500000
+		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 #else
-    struct file		*fp = p->p_fd->fd_ofiles[args->fd];
+		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 #endif
-    u_long		cmd = args->cmd;
-    caddr_t             data = (caddr_t) args->arg;
-    /*
-     * Pass the ioctl off to our standard handler.
-     */
-    return(fo_ioctl(fp, cmd, data, p));
+			if (ifma->ifma_addr->sa_family != AF_LINK)
+				continue;
+			if (nmcasts >= MAXMCADDR) {
+				sc->flags |= FXP_FLAG_ALL_MCAST;
+				nmcasts = 0;
+				break;
+			}
+			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+			    (void *)(uintptr_t)(volatile void *)
+				&sc->mcsp->mc_addr[nmcasts][0], 6);
+			nmcasts++;
+		}
+	}
+	mcsp->mc_cnt = nmcasts * 6;
+	return (nmcasts);
 }
-#endif /* DRM_LINUX */
-#endif /* __FreeBSD__ */
-
 
 !

 x_queued = 1;
+
+/*
+ * Program the multicast filter.
+ *
+ * We have an artificial restriction that the multicast setup command
+ * must be the first command in the chain, so we take steps to ensure
+ * this. By requiring this, it allows us to keep up the performance of
+ * the pre-initialized command ring (esp. link pointers) by not actually
+ * inserting the mcsetup command in the ring - i.e. its link pointer
+ * points to the TxCB ring, but the mcsetup descriptor itself is not part
+ * of it. We then can do 'CU_START' on the mcsetup descriptor and have it
+ * lead into the regular TxCB ring when it completes.
+ *
+ * This function must be called at splimp.
+ */
+static void
+fxp_mc_setup(struct fxp_softc *sc)
+{
+	struct fxp_cb_mcs *mcsp = sc->mcsp;
+	struct ifnet *ifp = &sc->sc_if;
+	int count;
+
+	/*
+	 * If there are queued commands, we must wait until they are all
+	 * completed. If we are already waiting, then add a NOP command
+	 * with interrupt option so that we're notified when all commands
+	 * have been completed - fxp_start() ensures that no additional
+	 * TX commands will be added when need_mcsetup is true.
+	 */
+	if (sc->tx_queued) {
+		struct fxp_cb_tx *txp;
+
+		/*
+		 * need_mcsetup will be true if we are already waiting for the
+		 * NOP command to be completed (see below). In this case, bail.
+		 */
+		if (sc->need_mcsetup)
+			return;
+		sc->need_mcsetup = 1;
+
+		/*
+		 * Add a NOP command with interrupt so that we are notified
+		 * when all TX commands have been processed.
+		 */
+		txp = sc->cbl_last->next;
+		txp->mb_head = NULL;
+		txp->cb_status = 0;
+		txp->cb_command = FXP_CB_COMMAND_NOP |
+		    FXP_CB_COMMAND_S | FXP_CB_COMMAND_I;
+		/*
+		 * Advance the end of list forward.
+		 */
+		sc->cbl_last->cb_command &= ~FXP_CB_COMMAND_S;
+		sc->cbl_last = txp;
+		sc->tx_queued++;
+		/*
+		 * Issue a resume in case the CU has just suspended.
+		 */
+		fxp_scb_wait(sc);
+		fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_RESUME);
+		/*
+		 * Set a 5 second timer just in case we don't hear from the
+		 * card again.
+		 */
+		ifp->if_timer = 5;
+
+		return;
+	}
+	sc->need_mcsetup = 0;
+
+	/*
+	 * Initialize multicast setup descriptor.
+	 */
+	mcsp->next = sc->cbl_base;
+	mcsp->mb_head = NULL;
+	mcsp->cb_status = 0;
+	mcsp->cb_command = FXP_CB_COMMAND_MCAS |
+	    FXP_CB_COMMAND_S | FXP_CB_COMMAND_I;
+	mcsp->link_addr = vtophys(&sc->cbl_base->cb_status);
+	(void) fxp_mc_addrs(sc);
+	sc->cbl_first = sc->cbl_last = (struct fxp_cb_tx *) mcsp;
+	sc->tx_queued = 1;
 
 	/*
 	 * Wait until command unit is not active. This should never

==== //depot/projects/kse/sys/kern/subr_mbuf.c#16 (text+ko) ====

@@ -232,40 +232,110 @@
 	(mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)];	\
 	(*((mb_lst)->mb_cont.mc_objcount))--;				\
 	if ((mb_bckt)->mb_numfree == 0) {				\
-		SLIST_REMOVs, >)) {
-		diff = ref;
-		timespecsub(&ref, &ts);
-	} else {
-		diff = ts;
-		timespecsub(&diff, &ref);
-	}
-	if (ts.tv_sec >= 2) {
-		/* badly off, adjust it */
-		tc_setclock(&ts);
-	}
+		SLIST_REMOVE_HEAD(_mchd, mb_blist);			\
+		SLIST_NEXT((mb_bckt), mb_blist) = NULL;			\
+		(mb_bckt)->mb_owner |= MB_BUCKET_FREE;			\
+	}								\
 }
 
+#define	MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst)				\
+	(mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp);	\
+	(mb_bckt)->mb_numfree++;					\
+	(*((mb_lst)->mb_cont.mc_objcount))++;
+
+#define	MB_MBTYPES_INC(mb_cnt, mb_type, mb_num)				\
+	if ((mb_type) != MT_NOTMBUF)					\
+	    (*((mb_cnt)->mb_cont.mc_types + (mb_type))) += (mb_num)
+
+#define	MB_MBTYPES_DEC(mb_cnt, mb_type, mb_num)				\
+	if ((mb_type) != MT_NOTMBUF)					\
+	    (*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num)
+
+/*
+ * Ownership of buckets/containers is represented by integers.  The PCPU
+ * lists range from 0 to NCPU-1.  We need a free numerical id for the general
+ * list (we use NCPU).  We also need a non-conflicting free bit to indicate
+ * that the bucket is free and removed from a container, while not losing
+ * the bucket's originating container id.  We use the highest bit
+ * for the free marker.
+ */
+#define	MB_GENLIST_OWNER	(NCPU)
+#define	MB_BUCKET_FREE		(1 << (sizeof(int) * 8 - 1))
+
+/* Statistics structures for allocator (per-CPU and general). */
+static struct mbpstat mb_statpcpu[NCPU + 1];
+struct mbstat mbstat;
+
+/* Sleep time for wait code (in ticks). */
+static int mbuf_wait = 64;
+
+static u_int mbuf_limit = 512;	/* Upper limit on # of mbufs per CPU. */
+static u_int clust_limit = 128;	/* Upper limit on # of clusters per CPU. */
+
+/*
+ * Objects exported by sysctl(8).
+ */
+SYSCTL_DECL(_kern_ipc);
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, 
+    "Maximum number of mbuf clusters available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
+    "Maximum number of mbufs available"); 
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
+    "Number used to scale kmem_map to ensure sufficient space for counters");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0,
+    "Maximum number of sendfile(2) sf_bufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
+    "Sleep time of mbuf subsystem wait allocations during exhaustion");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_limit, CTLFLAG_RW, &mbuf_limit, 0,
+    "Upper limit of number of mbufs allowed on each PCPU list");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_limit, CTLFLAG_RW, &clust_limit, 0,
+    "Upper limit of number of mbuf clusters allowed on each PCPU list");
+SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
+    "Mbuf general information and statistics");
+SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
+    sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics");
+
+/*
+ * Prototypes of local allocator routines.
+ */
+static void		*mb_alloc_wait(struct mb_lstmngr *, short);
+static struct mb_bucket	*mb_pop_cont(struct mb_lstmngr *, int,
+			    struct mb_pcpu_list *);
+static void		 mb_reclaim(void);
+static void		 mbuf_init(void *);
+
+/*
+ * Initial allocation numbers.  Each parameter represents the number of buckets
+ * of each object that will be placed initially in each PCPU container for
+ * said object.
+ */
+#define	NMB_MBUF_INIT	4
+#define	NMB_CLUST_INIT	16
+
 /*
- * Write system time back to RTC
+ * Internal flags that allow for cache locks to remain "persistent" across
+ * allocation and free calls.  They may be used in combination.
+ */
+#define	MBP_PERSIST	0x1	/* Return with lock still held. */
+#define	MBP_PERSISTENT	0x2	/* Cache lock is already held coming in. */
+
+/*
+ * Initialize the mbuf subsystem.
+ *
+ * We sub-divide the kmem_map into several submaps; this way, we don't have
+ * to worry about artificially limiting the number of mbuf or mbuf cluster
+ * allocations, due to fear of one type of allocation "stealing" address
+ * space initially reserved for another.
+ *
+ * Set up both the general containers and all the PCPU containers.  Populate
+ * the PCPU containers with initial numbers.
  */
+MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
 void
-resettodr()
+mbuf_init(void *dummy)
 {
-	struct timespec ts;
-	int error;
-
-	if (disable_rtc_set || clock_dev == NULL)
-		return;
-
-	getnanotime(&ts);
-	ts.tv_sec -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-	if ((error = CLOCK_SETTIME(clock_dev, &ts)) != 0) {
-		printf("warning: clock_settime failed (%d), time-of-day clock "
-		    "not adjusted to system time\n", error);
-		return;
-	}
-}
-
 
 !

 u_list *pcpu_cnt;
+	struct mb_pcpu_list *pcpu_cnt;
 	vm_size_t mb_map_size;
 	int i, j;
 

==== //depot/projects/kse/sys/netinet/tcp_output.c#11 (text+ko) ====

@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.67 2002/08/12 03:22:46 jennifer Exp $
+ * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.68 2002/08/17 18:26:01 dillon Exp $
  */
 
 #include "opt_inet6.h"
@@ -168,6 +168,7 @@
 	sendalot = 0;
 	off = tp->snd_nxt - tp->snd_una;
 	win = min(tp->snd_wnd, tp->snd_cwnd);
+	win = min(win, tp->snd_bwnd);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
@@ -275,149 +276,140 @@
 	win = sbspace(&so->so_rcv);
 
 	/*
-	 * Se, check if there's an initial rtt
-	 * or rttvar.  Convert from the route-table units
-	 * to scaled multiples of the slow timeout timer.
+	 * Sender silly window avoidance.   We transmit under the following
+	 * conditions when len is non-zero:
+	 *
+	 *	- We have a full segment
+	 *	- This is the last buffer in a write()/send() and we are
+	 *	  either idle or running NODELAY
+	 *	- we've timed out (e.g. persist timer)
+	 *	- we have more then 1/2 the maximum send window's worth of
+	 *	  data (receiver may be limited the window size)
+	 *	- we need to retransmit
 	 */
-	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+	if (len) {
+		if (len == tp->t_maxseg)
+			goto send;
 		/*
-		 * XXX the lock bit for RTT indicates that the value
-		 * is also a minimum value; this is subject to time.
+		 * NOTE! on localhost connections an 'ack' from the remote
+		 * end may occur synchronously with the output and cause
+		 * us to flush a buffer queued with moretocome.  XXX
+		 *
+		 * note: the len + off check is almost certainly unnecessary.
 		 */
-		if (rt->rt_rmx.rmx_locks & RTV_RTT)
-			tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
-		tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
-		tcpstat.tcps_usedrtt++;
-		if (rt->rt_rmx.rmx_rttvar) {
-			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
-			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
-			tcpstat.tcps_usedrttvar++;
-		} else {
-			/* default variation is +- 1 rtt */
-			tp->t_rttvar =
-			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
+		    (idle || (tp->t_flags & TF_NODELAY)) &&
+		    len + off >= so->so_snd.sb_cc &&
+		    (tp->t_flags & TF_NOPUSH) == 0) {
+			goto send;
 		}
-		TCPT_RANGESET(tp->t_rxtcur,
-			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-			      tp->t_rttmin, TCPTV_REXMTMAX);
+		if (tp->t_force)			/* typ. timeout case */
+			goto send;
+		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
+			goto send;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
+			goto send;
 	}
+
 	/*
-	 * if there's an mtu associated with the route, use it
-	 * else, use the link mtu.
+	 * Compare available window to amount of window
+	 * known to peer (as advertised window less
+	 * next expected input).  If the difference is at least two
+	 * max size segments, or at least 50% of the maximum possible
+	 * window, then want to send a window update to peer.
 	 */
-	if (rt->rt_rmx.rmx_mtu)
-		mss = rt->rt_rmx.rmx_mtu - min_protoh;
-	else {
-		if (isipv6) {
-			mss = nd_ifinfo[rt->rt_ifp->if_index].linkmtu -
-				min_protoh;
-			if (!in6_localaddr(&inp->in6p_faddr))
-				mss = min(mss, tcp_v6mssdflt);
-		} else {
-			mss = ifp->if_mtu - min_protoh;
-			if (!in_localaddr(inp->inp_faddr))
-				mss = min(mss, tcp_mssdflt);
-		}
+	if (win > 0) {
+		/*
+		 * "adv" is the amount we can increase the window,
+		 * taking into account that we are limited by
+		 * TCP_MAXWIN << tp->rcv_scale.
+		 */
+		long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
+			(tp->rcv_adv - tp->rcv_nxt);
+
+		if (adv >= (long) (2 * tp->t_maxseg))
+			goto send;
+		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+			goto send;
 	}
-	mss = min(mss, offer);
+
 	/*
-	 * maxopd stores the maximum length of data AND options
-	 * in a segment; maxseg is the amount of data in a normal
-	 * segment.  We need to store this value (maxopd) apart
-	 * from maxseg, because now every segment carries options
-	 * and thus we normally have somewhat less data in segments.
+	 * Send if we owe peer an ACK.
 	 */
-	tp->t_maxopd = mss;
-
+	if (tp->t_flags & TF_ACKNOW)
+		goto send;
+	if ((flags & TH_RST) ||
+	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
+		goto send;
+	if (SEQ_GT(tp->snd_up, tp->snd_una))
+		goto send;
 	/*
-	 * In case of T/TCP, origoffer==-1 indicates, that no segments
-	 * were received yet.  In this case we just guess, otherwise
-	 * we do the same as before T/TCP.
+	 * If our state indicates that FIN should be sent
+	 * and we have not yet done so, or we're retransmitting the FIN,
+	 * then we need to send.
 	 */
- 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
-	    (origoffer == -1 ||
-	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
-		mss -= TCPOLEN_TSTAMP_APPA;
- 	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
-	    (origoffer == -1 ||
-	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
-		mss -= TCPOLEN_CC_APPA;
+	if (flags & TH_FIN &&
+	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+		goto send;
 
-#if	(MCLBYTES & (MCLBYTES - 1)) == 0
-		if (mss > MCLBYTES)
-			mss &= ~(MCLBYTES-1);
-#else
-		if (mss > MCLBYTES)
-			mss = mss / MCLBYTES * MCLBYTES;
-#endif
 	/*
-	 * If there's a pipesize, change the socket buffer
-	 * to that size.  Make the socket buffers an integral
-	 * number of mss units; if the mss is larger than
-	 * the socket buffer, decrease the mss.
+	 * TCP window updates are not reliable, rather a polling protocol
+	 * using ``persist'' packets is used to insure receipt of window
+	 * updates.  The three ``states'' for the output side are:
+	 *	idle			not doing retransmits or persists
+	 *	persisting		to move a small or zero window
+	 *	(re)transmitting	and thereby not persisting
+	 *
+	 * callout_active(tp->tt_persist)
+	 *	is true when we are in persist state.
+	 * tp->t_force
+	 *	is set when we are called to send a persist packet.
+	 * callout_active(tp->tt_rexmt)
+	 *	is set when we are retransmitting
+	 * The output side is idle when both timers are zero.
+	 *
+	 * If send window is too small, there is data to transmit, and no
+	 * retransmit or persist is pending, then go to persist state.
+	 * If nothing happens soon, send when timer expires:
+	 * if window is nonzero, transmit what we can,
+	 * otherwise force out a byte.
 	 */
-#ifdef RTV_SPIPE
-	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
-#endif
-		bufsize = so->so_snd.sb_hiwat;
-	if (bufsize < mss)
-		mss = bufsize;
-	else {
-		bufsize = roundup(bufsize, mss);
-		if (bufsize > sb_max)
-			bufsize = sb_max;
-		if (bufsize > so->so_snd.sb_hiwat)
-			(void)sbreserve(&so->so_snd, bufsize, so, NULL);
+	if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
+	    !callout_active(tp->tt_persist)) {
+		tp->t_rxtshift = 0;
+		tcp_setpersist(tp);
 	}
-	tp->t_maxseg = mss;
 
-#ifdef RTV_RPIPE
-	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
-#endif
-		bufsize = so->so_rcv.sb_hiwat;
-	if (bufsize > mss) {
-		bufsize = roundup(bufsize, mss);
-		if (bufsize > sb_max)
-			bufsize = sb_max;
-		if (bufsize > so->so_rcv.sb_hiwat)
-			(void)sbreserve(&so->so_rcv, bufsize, so, NULL);
-	}
+	/*
+	 * No reason to send a segment, just return.
+	 */
+	return (0);
 
+send:
 	/*
-	 * Set the slow-start flight size depending on whether this
-	 * is a local network or not.
+	 * Before ESTABLISHED, force sending of initial options
+	 * unless TCP set not to do any options.
+	 * NOTE: we assume that the IP/TCP header plus TCP options
+	 * always fit in a single mbuf, leaving room for a maximum
+	 * link header, i.e.
+	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
 	 */
-	if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
-	    (!isipv6 && in_localaddr(inp->inp_faddr)))
-		tp->snd_cwnd = mss * ss_fltsz_local;
-	else 
-		tp->snd_cwnd = mss * ss_fltsz;
+	optlen = 0;
+#ifdef INET6
+	if (isipv6)
+		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	else
+#endif
+	hdrlen = sizeof (struct tcpiphdr);
+	if (flags & TH_SYN) {
+		tp->snd_nxt = tp->iss;
+		if ((tp->t_flags & TF_NOOPT) == 0) {
+			u_short mss;
 
-	if (rt->rt_rmx.rmx_ssthresh) {
-		/*
-		 * There's some sort of gateway or interface
-		 * buffer limit on the path.  Use this to set
-		 * the slow start threshhold, but set the
-		 * threshold to no less than 2*mss.
-		 */
-		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
-		tcpstat.tcps_usedssthresh++;
-	}
-}
-
-/*
- * Determine the MSS option to send on an outgoing SYN.
- */
-int
-tcp_mssopt(tp)
-	struct tcpcb *tp;
-{
-	struct rtentry *rt;
-#ifdef INET6
-	int isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-	size_t min_protoh = isipv6 ?
-			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
-			    sizeofof(mss));
+			opt[0] = TCPOPT_MAXSEG;
+			opt[1] = TCPOLEN_MAXSEG;
+			mss = htons((u_short) tcp_mssopt(tp));
+			(void)memcpy(opt + 2, &mss, sizeof(mss));
 			optlen = TCPOLEN_MAXSEG;
 
 			if ((tp->t_flags & TF_REQ_SCALE) &&

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe p4-projects" in the body of the message

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200208181118.g7IBI1jX008201>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation