Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 08 Feb 2001 22:27:25 +0000
From:      Tony Finch <dot@dotat.at>
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   kern/24959: proper TCP_NOPUSH/TCP_CORK compatibility
Message-ID:  <E14QzXB-00024t-00@hand.dotat.at>

next in thread | raw e-mail | index | archive | help

>Number:         24959
>Category:       kern
>Synopsis:       proper TCP_NOPUSH/TCP_CORK compatibility
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Thu Feb 08 14:30:01 PST 2001
>Closed-Date:
>Last-Modified:
>Originator:     Tony Finch
>Release:        FreeBSD 4.2-STABLE i386
>Organization:
Covalent Technologies, Inc.
>Environment:

FreeBSD hand.dotat.at 4.2-STABLE FreeBSD 4.2-STABLE #2: Thu Feb  8 08:45:48 GMT 2001     fanf@hand.dotat.at:/FreeBSD/obj/FreeBSD/releng4/sys/DELL-Latitude-CSx  i386

>Description:

Jonathan Lemon recently committed a fix to the implementation of
TCP_NOPUSH that expedites the output of the last packet in a sequence
(rev. 1.53 of tcp_usrreq.c). However there are a number of problems
with that commit, as follows:

  * the change hsan't been documented

  * although the new functionality is similar to Linux's
    TCP_CORK, the Linuxulator hasn't been updated to implement
    that support.

  * TCP_NOPUSH has additional T/TCP-related semantics that may be
    undesirable in an application that only wants the TCP_CORK
    semantics. If the option is set on a listening socket then it
    enables TAO.

This patch introduces a new TCP option called TCP_CORK which is
intended to be exactly the same as the Linux version. The old
tcp_output part of the TCP_NOPUSH functionality is renamed to use
TCP_CORK. TCP_NOPUSH is now implemented as a superset of TCP_CORK
that also includes the T/TCP functionality in tcp_input.

I've included two patches below, one against -STABLE and one against
-CURRENT. They're the same apart from the change in tcp_usrreq.c in
-CURRENT.

>How-To-Repeat:

>Fix:

--------------------------------------------------------------------------------
This patch is against RELENG_4

Index: alpha/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/linux/linux.h,v
retrieving revision 1.46.2.2
diff -u -r1.46.2.2 linux.h
--- alpha/linux/linux.h	2000/11/22 15:22:43	1.46.2.2
+++ alpha/linux/linux.h	2001/02/05 23:35:51
@@ -377,6 +377,10 @@
 #define	LINUX_IP_ADD_MEMBERSHIP		35
 #define	LINUX_IP_DROP_MEMBERSHIP	36
 
+#define LINUX_TCP_NODELAY	1
+#define LINUX_TCP_MAXSEG	2
+#define LINUX_TCP_CORK		3
+
 struct linux_sockaddr {
 	u_short	sa_family;
 	char	sa_data[14];
Index: compat/linux/linux_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.19.2.3
diff -u -r1.19.2.3 linux_socket.c
--- compat/linux/linux_socket.c	2000/11/22 15:39:01	1.19.2.3
+++ compat/linux/linux_socket.c	2001/02/08 21:25:38
@@ -46,6 +46,7 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
+#include <netinet/tcp.h>
 
 #include <machine/../linux/linux.h>
 #ifdef __alpha__
@@ -89,6 +90,21 @@
 }
 
 static int
+linux_to_bsd_tcp_sockopt(int opt)
+{
+
+	switch (opt) {
+	case LINUX_TCP_NODELAY:
+		return (TCP_NODELAY);
+	case LINUX_TCP_MAXSEG:
+		return (TCP_MAXSEG);
+	case LINUX_TCP_CORK:
+		return (TCP_CORK);
+	}
+	return (-1);
+}
+
+static int
 linux_to_bsd_ip_sockopt(int opt)
 {
 
@@ -768,8 +784,7 @@
 		name = linux_to_bsd_ip_sockopt(linux_args.optname);
 		break;
 	case IPPROTO_TCP:
-		/* Linux TCP option values match BSD's */
-		name = linux_args.optname;
+		name = linux_to_bsd_tcp_sockopt(linux_args.optname);
 		break;
 	default:
 		name = -1;
Index: i386/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/linux/linux.h,v
retrieving revision 1.41.2.1
diff -u -r1.41.2.1 linux.h
--- i386/linux/linux.h	2000/05/09 17:41:24	1.41.2.1
+++ i386/linux/linux.h	2001/02/05 23:35:24
@@ -375,6 +375,10 @@
 #define	LINUX_IP_ADD_MEMBERSHIP		35
 #define	LINUX_IP_DROP_MEMBERSHIP	36
 
+#define LINUX_TCP_NODELAY	1
+#define LINUX_TCP_MAXSEG	2
+#define LINUX_TCP_CORK		3
+
 struct linux_sockaddr {
 	u_short	sa_family;
 	char	sa_data[14];
Index: netinet/tcp.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.13.2.2
diff -u -r1.13.2.2 tcp.h
--- netinet/tcp.h	2001/01/09 18:25:18	1.13.2.2
+++ netinet/tcp.h	2001/02/08 21:10:37
@@ -129,7 +129,8 @@
  */
 #define	TCP_NODELAY	0x01	/* don't delay send to coalesce packets */
 #define	TCP_MAXSEG	0x02	/* set maximum segment size */
-#define TCP_NOPUSH	0x04	/* don't push last block of write */
+#define TCP_CORK	0x03	/* don't push last block of write */
+#define TCP_NOPUSH	0x04	/* ditto and also allow TAO on listen */
 #define TCP_NOOPT	0x08	/* don't use TCP options */
 
 #endif
Index: netinet/tcp_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.107.2.4
diff -u -r1.107.2.4 tcp_input.c
--- netinet/tcp_input.c	2000/08/16 06:14:23	1.107.2.4
+++ netinet/tcp_input.c	2001/02/08 21:23:29
@@ -817,7 +817,7 @@
 #endif
 			tp = intotcpcb(inp);
 			tp->t_state = TCPS_LISTEN;
-			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+			tp->t_flags |= tp0->t_flags & (TF_CORK|TF_NOOPT);
 
 			/* Compute proper scaling value from buffer space */
 			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
Index: netinet/tcp_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.39.2.6
diff -u -r1.39.2.6 tcp_output.c
--- netinet/tcp_output.c	2000/09/13 04:27:06	1.39.2.6
+++ netinet/tcp_output.c	2001/02/08 21:23:07
@@ -280,7 +280,7 @@
 			goto send;
 		if (!(tp->t_flags & TF_MORETOCOME) &&
 		    (idle || tp->t_flags & TF_NODELAY) &&
-		    (tp->t_flags & TF_NOPUSH) == 0 &&
+		    (tp->t_flags & TF_CORK) == 0 &&
 		    len + off >= so->so_snd.sb_cc)
 			goto send;
 		if (tp->t_force)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.51
diff -u -r1.51 tcp_usrreq.c
--- netinet/tcp_usrreq.c	2000/01/09 19:17:28	1.51
+++ netinet/tcp_usrreq.c	2001/02/08 21:22:35
@@ -896,6 +896,7 @@
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 		case TCP_NOOPT:
+		case TCP_CORK:
 		case TCP_NOPUSH:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
@@ -909,8 +910,11 @@
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
+			case TCP_CORK:
+				opt = TF_CORK;
+				break;
 			case TCP_NOPUSH:
-				opt = TF_NOPUSH;
+				opt = TF_NOPUSH | TF_CORK;
 				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
@@ -919,8 +923,11 @@
 
 			if (optval)
 				tp->t_flags |= opt;
-			else
+			else {
 				tp->t_flags &= ~opt;
+				if (opt & TF_CORK)
+					error = tcp_output(tp);
+			}
 			break;
 
 		case TCP_MAXSEG:
Index: netinet/tcp_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.56.2.2
diff -u -r1.56.2.2 tcp_var.h
--- netinet/tcp_var.h	2000/08/16 06:14:23	1.56.2.2
+++ netinet/tcp_var.h	2001/02/08 21:25:09
@@ -89,12 +89,13 @@
 #define	TF_SACK_PERMIT	0x00200		/* other side said I could SACK */
 #define	TF_NEEDSYN	0x00400		/* send SYN (implicit state) */
 #define	TF_NEEDFIN	0x00800		/* send FIN (implicit state) */
-#define	TF_NOPUSH	0x01000		/* don't push */
+#define	TF_NOPUSH	0x01000		/* enable TAO on listen */
 #define	TF_REQ_CC	0x02000		/* have/will request CC */
 #define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
 #define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
 #define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
 #define	TF_LQ_OVERFLOW	0x20000		/* listen queue overflow */
+#define	TF_CORK		0x40000		/* don't push */
 	int	t_force;		/* 1 if forcing out a byte */
 
 	tcp_seq	snd_una;		/* send unacknowledged */

--------------------------------------------------------------------------------
This patch is against HEAD

Index: alpha/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/linux/linux.h,v
retrieving revision 1.49
diff -u -r1.49 linux.h
--- alpha/linux/linux.h	2000/12/03 01:30:30	1.49
+++ alpha/linux/linux.h	2001/02/08 21:59:43
@@ -379,6 +379,10 @@
 #define	LINUX_IP_ADD_MEMBERSHIP		35
 #define	LINUX_IP_DROP_MEMBERSHIP	36
 
+#define LINUX_TCP_NODELAY	1
+#define LINUX_TCP_MAXSEG	2
+#define LINUX_TCP_CORK		3
+
 struct linux_sockaddr {
 	u_short	sa_family;
 	char	sa_data[14];
Index: compat/linux/linux_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.25
diff -u -r1.25 linux_socket.c
--- compat/linux/linux_socket.c	2000/12/19 00:24:25	1.25
+++ compat/linux/linux_socket.c	2001/02/08 21:58:19
@@ -46,6 +46,7 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
+#include <netinet/tcp.h>
 
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
@@ -86,6 +87,21 @@
 }
 
 static int
+linux_to_bsd_tcp_sockopt(int opt)
+{
+
+	switch (opt) {
+	case LINUX_TCP_NODELAY:
+		return (TCP_NODELAY);
+	case LINUX_TCP_MAXSEG:
+		return (TCP_MAXSEG);
+	case LINUX_TCP_CORK:
+		return (TCP_CORK);
+	}
+	return (-1);
+}
+
+static int
 linux_to_bsd_ip_sockopt(int opt)
 {
 
@@ -831,8 +847,7 @@
 		name = linux_to_bsd_ip_sockopt(linux_args.optname);
 		break;
 	case IPPROTO_TCP:
-		/* Linux TCP option values match BSD's */
-		name = linux_args.optname;
+		name = linux_to_bsd_tcp_sockopt(linux_args.optname);
 		break;
 	default:
 		name = -1;
Index: i386/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/linux/linux.h,v
retrieving revision 1.51
diff -u -r1.51 linux.h
--- i386/linux/linux.h	2000/11/30 05:23:47	1.51
+++ i386/linux/linux.h	2001/02/08 22:00:07
@@ -503,6 +503,10 @@
 #define	LINUX_IP_ADD_MEMBERSHIP		35
 #define	LINUX_IP_DROP_MEMBERSHIP	36
 
+#define LINUX_TCP_NODELAY	1
+#define LINUX_TCP_MAXSEG	2
+#define LINUX_TCP_CORK		3
+
 struct linux_sockaddr {
 	u_short	sa_family;
 	char	sa_data[14];
Index: netinet/tcp.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.16
diff -u -r1.16 tcp.h
--- netinet/tcp.h	2001/01/09 18:26:17	1.16
+++ netinet/tcp.h	2001/02/08 21:51:36
@@ -131,7 +131,8 @@
  */
 #define	TCP_NODELAY	0x01	/* don't delay send to coalesce packets */
 #define	TCP_MAXSEG	0x02	/* set maximum segment size */
-#define TCP_NOPUSH	0x04	/* don't push last block of write */
+#define TCP_CORK	0x03	/* don't push last block of write */
+#define TCP_NOPUSH	0x04	/* ditto and also allow TAO on listen */
 #define TCP_NOOPT	0x08	/* don't use TCP options */
 
 #endif
Index: netinet/tcp_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.122
diff -u -r1.122 tcp_input.c
--- netinet/tcp_input.c	2001/01/24 16:25:36	1.122
+++ netinet/tcp_input.c	2001/02/08 21:56:24
@@ -822,7 +822,7 @@
 #endif
 			tp = intotcpcb(inp);
 			tp->t_state = TCPS_LISTEN;
-			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+			tp->t_flags |= tp0->t_flags & (TF_CORK|TF_NOOPT);
 
 			/* Compute proper scaling value from buffer space */
 			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
Index: netinet/tcp_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.48
diff -u -r1.48 tcp_output.c
--- netinet/tcp_output.c	2000/10/27 11:45:41	1.48
+++ netinet/tcp_output.c	2001/02/08 21:56:04
@@ -282,7 +282,7 @@
 			goto send;
 		if (!(tp->t_flags & TF_MORETOCOME) &&
 		    (idle || tp->t_flags & TF_NODELAY) &&
-		    (tp->t_flags & TF_NOPUSH) == 0 &&
+		    (tp->t_flags & TF_CORK) == 0 &&
 		    len + off >= so->so_snd.sb_cc)
 			goto send;
 		if (tp->t_force)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.53
diff -u -r1.53 tcp_usrreq.c
--- netinet/tcp_usrreq.c	2001/02/02 18:48:25	1.53
+++ netinet/tcp_usrreq.c	2001/02/08 21:54:54
@@ -895,6 +895,8 @@
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 		case TCP_NOOPT:
+		case TCP_CORK:
+		case TCP_NOPUSH:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
@@ -907,6 +909,12 @@
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
+			case TCP_CORK:
+				opt = TF_CORK;
+				break;
+			case TCP_NOPUSH:
+				opt = TF_NOPUSH | TF_CORK;
+				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
 				break;
@@ -914,21 +922,10 @@
 
 			if (optval)
 				tp->t_flags |= opt;
-			else
-				tp->t_flags &= ~opt;
-			break;
-
-		case TCP_NOPUSH:
-			error = sooptcopyin(sopt, &optval, sizeof optval,
-					    sizeof optval);
-			if (error)
-				break;
-
-			if (optval)
-				tp->t_flags |= TF_NOPUSH;
 			else {
-				tp->t_flags &= ~TF_NOPUSH;
-				error = tcp_output(tp);
+				tp->t_flags &= ~opt;
+				if (opt & TF_CORK)
+					error = tcp_output(tp);
 			}
 			break;
 
Index: netinet/tcp_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.63
diff -u -r1.63 tcp_var.h
--- netinet/tcp_var.h	2000/12/24 10:57:21	1.63
+++ netinet/tcp_var.h	2001/02/08 21:54:10
@@ -89,12 +89,13 @@
 #define	TF_SACK_PERMIT	0x00200		/* other side said I could SACK */
 #define	TF_NEEDSYN	0x00400		/* send SYN (implicit state) */
 #define	TF_NEEDFIN	0x00800		/* send FIN (implicit state) */
-#define	TF_NOPUSH	0x01000		/* don't push */
+#define	TF_NOPUSH	0x01000		/* enable TAO on listen */
 #define	TF_REQ_CC	0x02000		/* have/will request CC */
 #define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
 #define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
 #define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
 #define	TF_LQ_OVERFLOW	0x20000		/* listen queue overflow */
+#define	TF_CORK		0x40000		/* don't push */
 	int	t_force;		/* 1 if forcing out a byte */
 
 	tcp_seq	snd_una;		/* send unacknowledged */
--------------------------------------------------------------------------------

>Release-Note:
>Audit-Trail:
>Unformatted:


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?E14QzXB-00024t-00>