Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 24 May 2019 08:42:34 +0000 (UTC)
From:      "Andrey V. Elsukov" <ae@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r348233 - in stable/12: sbin/ifconfig share/man/man4 sys/modules/if_gre sys/net sys/netinet sys/netinet6
Message-ID:  <201905240842.x4O8gYRa026791@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ae
Date: Fri May 24 08:42:34 2019
New Revision: 348233
URL: https://svnweb.freebsd.org/changeset/base/348233

Log:
  MFC r346630:
    Add GRE-in-UDP encapsulation support as defined in RFC8086.
  
    This GRE-in-UDP encapsulation allows the UDP source port field to be
    used as an entropy field for load-balancing of GRE traffic in transit
    networks. Also most of multiqueue network cards are able distribute
    incoming UDP datagrams to different NIC queues, while very little are
    able do this for GRE packets.
  
    When an administrator enables UDP encapsulation with command
    `ifconfig gre0 udpencap`, the driver creates kernel socket, that binds
    to tunnel source address and after udp_set_kernel_tunneling() starts
    receiving of all UDP packets destined to 4754 port. Each kernel socket
    maintains list of tunnels with different destination addresses. Thus
    when several tunnels use the same source address, they all handled by
    single socket.  The IP[V6]_BINDANY socket option is used to be able bind
    socket to source address even if it is not yet available in the system.
    This may happen on system boot, when gre(4) interface is created before
    source address become available. The encapsulation and sending of packets
    is done directly from gre(4) into ip[6]_output() without using sockets.
  
    Reviewed by:	eugen
    Relnotes:	yes
    Differential Revision:	https://reviews.freebsd.org/D19921

Modified:
  stable/12/sbin/ifconfig/ifgre.c
  stable/12/share/man/man4/gre.4
  stable/12/sys/modules/if_gre/Makefile
  stable/12/sys/net/if_gre.c
  stable/12/sys/net/if_gre.h
  stable/12/sys/netinet/ip_gre.c
  stable/12/sys/netinet6/ip6_gre.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sbin/ifconfig/ifgre.c
==============================================================================
--- stable/12/sbin/ifconfig/ifgre.c	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sbin/ifconfig/ifgre.c	Fri May 24 08:42:34 2019	(r348233)
@@ -44,15 +44,16 @@ __FBSDID("$FreeBSD$");
 
 #include "ifconfig.h"
 
-#define	GREBITS	"\020\01ENABLE_CSUM\02ENABLE_SEQ"
+#define	GREBITS	"\020\01ENABLE_CSUM\02ENABLE_SEQ\03UDPENCAP"
 
 static	void gre_status(int s);
 
 static void
 gre_status(int s)
 {
-	uint32_t opts = 0;
+	uint32_t opts, port;
 
+	opts = 0;
 	ifr.ifr_data = (caddr_t)&opts;
 	if (ioctl(s, GREGKEY, &ifr) == 0)
 		if (opts != 0)
@@ -60,6 +61,11 @@ gre_status(int s)
 	opts = 0;
 	if (ioctl(s, GREGOPTS, &ifr) != 0 || opts == 0)
 		return;
+
+	port = 0;
+	ifr.ifr_data = (caddr_t)&port;
+	if (ioctl(s, GREGPORT, &ifr) == 0 && port != 0)
+		printf("\tudpport: %u\n", port);
 	printb("\toptions", opts, GREBITS);
 	putchar('\n');
 }
@@ -77,6 +83,18 @@ setifgrekey(const char *val, int dummy __unused, int s
 }
 
 static void
+setifgreport(const char *val, int dummy __unused, int s,
+    const struct afswtch *afp)
+{
+	uint32_t udpport = strtol(val, NULL, 0);
+
+	strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
+	ifr.ifr_data = (caddr_t)&udpport;
+	if (ioctl(s, GRESPORT, (caddr_t)&ifr) < 0)
+		warn("ioctl (set udpport)");
+}
+
+static void
 setifgreopts(const char *val, int d, int s, const struct afswtch *afp)
 {
 	uint32_t opts;
@@ -101,10 +119,13 @@ setifgreopts(const char *val, int d, int s, const stru
 
 static struct cmd gre_cmds[] = {
 	DEF_CMD_ARG("grekey",			setifgrekey),
+	DEF_CMD_ARG("udpport",			setifgreport),
 	DEF_CMD("enable_csum", GRE_ENABLE_CSUM,	setifgreopts),
 	DEF_CMD("-enable_csum",-GRE_ENABLE_CSUM,setifgreopts),
 	DEF_CMD("enable_seq", GRE_ENABLE_SEQ,	setifgreopts),
 	DEF_CMD("-enable_seq",-GRE_ENABLE_SEQ,	setifgreopts),
+	DEF_CMD("udpencap", GRE_UDPENCAP,	setifgreopts),
+	DEF_CMD("-udpencap",-GRE_UDPENCAP,	setifgreopts),
 };
 static struct afswtch af_gre = {
 	.af_name	= "af_gre",

Modified: stable/12/share/man/man4/gre.4
==============================================================================
--- stable/12/share/man/man4/gre.4	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/share/man/man4/gre.4	Fri May 24 08:42:34 2019	(r348233)
@@ -29,7 +29,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 2, 2015
+.Dd April 24, 2019
 .Dt GRE 4
 .Os
 .Sh NAME
@@ -89,7 +89,45 @@ A value of 0 disables the key option.
 Enables checksum calculation for outgoing packets.
 .It Ar enable_seq
 Enables use of sequence number field in the GRE header for outgoing packets.
+.It Ar udpencap
+Enables UDP-in-GRE encapsulation (see the
+.Sx GRE-IN-UDP ENCAPSULATION
+Section below for details).
+.It Ar udpport
+Set the source UDP port for outgoing packets.
+A value of 0 disables the persistence of source UDP port for outgoing packets.
+See the
+.Sx GRE-IN-UDP ENCAPSULATION
+Section below for details.
 .El
+.Sh GRE-IN-UDP ENCAPSULATION
+The
+.Nm
+supports GRE in UDP encapsulation as defined in RFC 8086.
+A GRE in UDP tunnel offers the possibility of better performance for
+load-balancing GRE traffic in transit networks.
+Encapsulating GRE in UDP enables use of the UDP source port to provide
+entropy to ECMP hashing.
+.Pp
+The GRE in UDP tunnel uses single value 4754 as UDP destination port.
+The UDP source port contains a 14-bit entropy value that is generated
+by the encapsulator to identify a flow for the encapsulated packet.
+The
+.Ar udpport
+option can be used to disable this behaviour and use single source UDP
+port value.
+The value of
+.Ar udpport
+should be within the ephemeral port range, i.e., 49152 to 65535 by default.
+.Pp
+Note that a GRE in UDP tunnel is unidirectional; the tunnel traffic is not
+expected to be returned back to the UDP source port values used to generate
+entropy.
+This may impact NAPT (Network Address Port Translator) middleboxes.
+If such tunnels are expected to be used on a path with a middlebox,
+the tunnel can be configured either to disable use of the UDP source port
+for entropy or to enable middleboxes to pass packets with UDP source port
+entropy.
 .Sh EXAMPLES
 .Bd -literal
 192.168.1.* --- Router A  -------tunnel-------- Router B --- 192.168.2.*

Modified: stable/12/sys/modules/if_gre/Makefile
==============================================================================
--- stable/12/sys/modules/if_gre/Makefile	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sys/modules/if_gre/Makefile	Fri May 24 08:42:34 2019	(r348233)
@@ -5,7 +5,7 @@ SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 KMOD=	if_gre
-SRCS=	if_gre.c opt_inet.h opt_inet6.h
+SRCS=	if_gre.c opt_inet.h opt_inet6.h opt_rss.h
 SRCS.INET=	ip_gre.c
 SRCS.INET6=	ip6_gre.c
 

Modified: stable/12/sys/net/if_gre.c
==============================================================================
--- stable/12/sys/net/if_gre.c	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sys/net/if_gre.c	Fri May 24 08:42:34 2019	(r348233)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -49,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
@@ -65,19 +67,27 @@ __FBSDID("$FreeBSD$");
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_pcb.h>
 #ifdef INET
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
+#ifdef RSS
+#include <netinet/in_rss.h>
 #endif
+#endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
+#ifdef RSS
+#include <netinet6/in6_rss.h>
 #endif
+#endif
 
 #include <netinet/ip_encap.h>
+#include <netinet/udp.h>
 #include <net/bpf.h>
 #include <net/if_gre.h>
 
@@ -151,6 +161,7 @@ vnet_gre_uninit(const void *unused __unused)
 #ifdef INET6
 	in6_gre_uninit();
 #endif
+	/* XXX: epoch_call drain */
 }
 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_uninit, NULL);
@@ -266,6 +277,7 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		break;
 	case GRESKEY:
 	case GRESOPTS:
+	case GRESPORT:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
@@ -281,23 +293,45 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			}
 			if (sc->gre_options == opt)
 				break;
+		} else if (cmd == GRESPORT) {
+			if (opt != 0 && (opt < V_ipport_hifirstauto ||
+			    opt > V_ipport_hilastauto)) {
+				error = EINVAL;
+				break;
+			}
+			if (sc->gre_port == opt)
+				break;
+			if ((sc->gre_options & GRE_UDPENCAP) == 0) {
+				/*
+				 * UDP encapsulation is not enabled, thus
+				 * there is no need to reattach softc.
+				 */
+				sc->gre_port = opt;
+				break;
+			}
 		}
 		switch (sc->gre_family) {
 #ifdef INET
 		case AF_INET:
-			in_gre_setopts(sc, cmd, opt);
+			error = in_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			in6_gre_setopts(sc, cmd, opt);
+			error = in6_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 		default:
+			/*
+			 * Tunnel is not yet configured.
+			 * We can just change any parameters.
+			 */
 			if (cmd == GRESKEY)
 				sc->gre_key = opt;
-			else
+			if (cmd == GRESOPTS)
 				sc->gre_options = opt;
+			if (cmd == GRESPORT)
+				sc->gre_port = opt;
 			break;
 		}
 		/*
@@ -313,6 +347,10 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_options));
 		break;
+	case GREGPORT:
+		error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
+		    sizeof(sc->gre_port));
+		break;
 	default:
 		error = EINVAL;
 		break;
@@ -337,6 +375,7 @@ end:
 static void
 gre_delete_tunnel(struct gre_softc *sc)
 {
+	struct gre_socket *gs;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	if (sc->gre_family != 0) {
@@ -346,6 +385,16 @@ gre_delete_tunnel(struct gre_softc *sc)
 		free(sc->gre_hdr, M_GRE);
 		sc->gre_family = 0;
 	}
+	/*
+	 * If this Tunnel was the last one that could use UDP socket,
+	 * we should unlink socket from hash table and close it.
+	 */
+	if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
+		CK_LIST_REMOVE(gs, chain);
+		soclose(gs->so);
+		epoch_call(net_epoch_preempt, &gs->epoch_ctx, gre_sofree);
+		sc->gre_so = NULL;
+	}
 	GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
 }
@@ -372,8 +421,39 @@ gre_hashdestroy(struct gre_list *hash)
 }
 
 void
-gre_updatehdr(struct gre_softc *sc, struct grehdr *gh)
+gre_sofree(epoch_context_t ctx)
 {
+	struct gre_socket *gs;
+
+	gs = __containerof(ctx, struct gre_socket, epoch_ctx);
+	free(gs, M_GRE);
+}
+
+static __inline uint16_t
+gre_cksum_add(uint16_t sum, uint16_t a)
+{
+	uint16_t res;
+
+	res = sum + a;
+	return (res + (res < a));
+}
+
+void
+gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
+{
+
+	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+	MPASS(sc->gre_options & GRE_UDPENCAP);
+
+	udp->uh_dport = htons(GRE_UDPPORT);
+	udp->uh_sport = htons(sc->gre_port);
+	udp->uh_sum = csum;
+	udp->uh_ulen = 0;
+}
+
+void
+gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
+{
 	uint32_t *opts;
 	uint16_t flags;
 
@@ -539,6 +619,52 @@ gre_setseqn(struct grehdr *gh, uint32_t seq)
 	*opts = htonl(seq);
 }
 
+static uint32_t
+gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
+{
+	uint32_t flowid;
+
+	if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
+		return (0);
+#ifndef RSS
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		flowid = mtod(m, struct ip *)->ip_src.s_addr ^
+		    mtod(m, struct ip *)->ip_dst.s_addr;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
+		    mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
+		break;
+#endif
+	default:
+		flowid = 0;
+	}
+#else /* RSS */
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
+		    mtod(m, struct ip *)->ip_dst);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		flowid = rss_hash_ip6_2tuple(
+		    &mtod(m, struct ip6_hdr *)->ip6_src,
+		    &mtod(m, struct ip6_hdr *)->ip6_dst);
+		break;
+#endif
+	default:
+		flowid = 0;
+	}
+#endif
+	return (flowid);
+}
+
 #define	MTAG_GRE	1307983903
 static int
 gre_transmit(struct ifnet *ifp, struct mbuf *m)
@@ -546,7 +672,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct grehdr *gh;
-	uint32_t af;
+	struct udphdr *uh;
+	uint32_t af, flowid;
 	int error, len;
 	uint16_t proto;
 
@@ -573,6 +700,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
 	af = m->m_pkthdr.csum_data;
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
+	flowid = gre_flowid(sc, m, af);
 	M_SETFIB(m, sc->gre_fibnum);
 	M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
 	if (m == NULL) {
@@ -614,6 +742,19 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
 		error = ENETDOWN;
 		goto drop;
 	}
+	if (sc->gre_options & GRE_UDPENCAP) {
+		uh = (struct udphdr *)mtodo(m, len);
+		uh->uh_sport |= htons(V_ipport_hifirstauto) |
+		    (flowid >> 16) | (flowid & 0xFFFF);
+		uh->uh_sport = htons(ntohs(uh->uh_sport) %
+		    V_ipport_hilastauto);
+		uh->uh_ulen = htons(m->m_pkthdr.len - len);
+		uh->uh_sum = gre_cksum_add(uh->uh_sum,
+		    htons(m->m_pkthdr.len - len + IPPROTO_UDP));
+		m->m_pkthdr.csum_flags = sc->gre_csumflags;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+		len += sizeof(struct udphdr);
+	}
 	gh = (struct grehdr *)mtodo(m, len);
 	gh->gre_proto = proto;
 	if (sc->gre_options & GRE_ENABLE_SEQ)
@@ -631,7 +772,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
 #endif
 #ifdef INET6
 	case AF_INET6:
-		error = in6_gre_output(m, af, sc->gre_hlen);
+		error = in6_gre_output(m, af, sc->gre_hlen, flowid);
 		break;
 #endif
 	default:

Modified: stable/12/sys/net/if_gre.h
==============================================================================
--- stable/12/sys/net/if_gre.h	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sys/net/if_gre.h	Fri May 24 08:42:34 2019	(r348233)
@@ -53,15 +53,36 @@ struct greip {
 	struct ip	gi_ip;
 	struct grehdr	gi_gre;
 } __packed;
-#endif
 
+struct greudp {
+	struct ip	gi_ip;
+	struct udphdr	gi_udp;
+	struct grehdr	gi_gre;
+} __packed;
+#endif /* INET */
+
 #ifdef INET6
 struct greip6 {
 	struct ip6_hdr	gi6_ip6;
 	struct grehdr	gi6_gre;
 } __packed;
-#endif
 
+struct greudp6 {
+	struct ip6_hdr	gi6_ip6;
+	struct udphdr	gi6_udp;
+	struct grehdr	gi6_gre;
+} __packed;
+#endif /* INET6 */
+
+CK_LIST_HEAD(gre_list, gre_softc);
+CK_LIST_HEAD(gre_sockets, gre_socket);
+struct gre_socket {
+	struct socket		*so;
+	struct gre_list		list;
+	CK_LIST_ENTRY(gre_socket) chain;
+	struct epoch_context	epoch_ctx;
+};
+
 struct gre_softc {
 	struct ifnet		*gre_ifp;
 	int			gre_family;	/* AF of delivery header */
@@ -69,22 +90,26 @@ struct gre_softc {
 	uint32_t		gre_oseq;
 	uint32_t		gre_key;
 	uint32_t		gre_options;
+	uint32_t		gre_csumflags;
+	uint32_t		gre_port;
 	u_int			gre_fibnum;
 	u_int			gre_hlen;	/* header size */
 	union {
 		void		*hdr;
 #ifdef INET
-		struct greip	*gihdr;
+		struct greip	*iphdr;
+		struct greudp	*udphdr;
 #endif
 #ifdef INET6
-		struct greip6	*gi6hdr;
+		struct greip6	*ip6hdr;
+		struct greudp6	*udp6hdr;
 #endif
 	} gre_uhdr;
+	struct gre_socket	*gre_so;
 
 	CK_LIST_ENTRY(gre_softc) chain;
 	CK_LIST_ENTRY(gre_softc) srchash;
 };
-CK_LIST_HEAD(gre_list, gre_softc);
 MALLOC_DECLARE(M_GRE);
 
 #ifndef GRE_HASH_SIZE
@@ -98,28 +123,35 @@ MALLOC_DECLARE(M_GRE);
 #define	GRE_WAIT()		epoch_wait_preempt(net_epoch_preempt)
 
 #define	gre_hdr			gre_uhdr.hdr
-#define	gre_gihdr		gre_uhdr.gihdr
-#define	gre_gi6hdr		gre_uhdr.gi6hdr
-#define	gre_oip			gre_gihdr->gi_ip
-#define	gre_oip6		gre_gi6hdr->gi6_ip6
+#define	gre_iphdr		gre_uhdr.iphdr
+#define	gre_ip6hdr		gre_uhdr.ip6hdr
+#define	gre_udphdr		gre_uhdr.udphdr
+#define	gre_udp6hdr		gre_uhdr.udp6hdr
 
+#define	gre_oip			gre_iphdr->gi_ip
+#define	gre_udp			gre_udphdr->gi_udp
+#define	gre_oip6		gre_ip6hdr->gi6_ip6
+#define	gre_udp6		gre_udp6hdr->gi6_udp
+
 struct gre_list *gre_hashinit(void);
 void gre_hashdestroy(struct gre_list *);
 
 int	gre_input(struct mbuf *, int, int, void *);
-void	gre_updatehdr(struct gre_softc *, struct grehdr *);
+void	gre_update_hdr(struct gre_softc *, struct grehdr *);
+void	gre_update_udphdr(struct gre_softc *, struct udphdr *, uint16_t);
+void	gre_sofree(epoch_context_t);
 
 void	in_gre_init(void);
 void	in_gre_uninit(void);
-void	in_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int	in_gre_setopts(struct gre_softc *, u_long, uint32_t);
 int	in_gre_ioctl(struct gre_softc *, u_long, caddr_t);
 int	in_gre_output(struct mbuf *, int, int);
 
 void	in6_gre_init(void);
 void	in6_gre_uninit(void);
-void	in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int	in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
 int	in6_gre_ioctl(struct gre_softc *, u_long, caddr_t);
-int	in6_gre_output(struct mbuf *, int, int);
+int	in6_gre_output(struct mbuf *, int, int, uint32_t);
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
@@ -139,9 +171,15 @@ int	in6_gre_output(struct mbuf *, int, int);
 #define	GRESKEY		_IOW('i', 108, struct ifreq)
 #define	GREGOPTS	_IOWR('i', 109, struct ifreq)
 #define	GRESOPTS	_IOW('i', 110, struct ifreq)
+#define	GREGPORT	_IOWR('i', 111, struct ifreq)
+#define	GRESPORT	_IOW('i', 112, struct ifreq)
 
+/* GRE-in-UDP encapsulation destination port as defined in RFC8086 */
+#define	GRE_UDPPORT		4754
+
 #define	GRE_ENABLE_CSUM		0x0001
 #define	GRE_ENABLE_SEQ		0x0002
-#define	GRE_OPTMASK		(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
+#define	GRE_UDPENCAP		0x0004
+#define	GRE_OPTMASK		(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ|GRE_UDPENCAP)
 
 #endif /* _NET_IF_GRE_H_ */

Modified: stable/12/sys/netinet/ip_gre.c
==============================================================================
--- stable/12/sys/netinet/ip_gre.c	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sys/netinet/ip_gre.c	Fri May 24 08:42:34 2019	(r348233)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/jail.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
@@ -58,15 +59,19 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/if_gre.h>
+#include <machine/in_cksum.h>
 
 #define	GRE_TTL			30
 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
@@ -74,14 +79,22 @@ VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
 
+struct in_gre_socket {
+	struct gre_socket		base;
+	in_addr_t			addr;
+};
+VNET_DEFINE_STATIC(struct gre_sockets *, ipv4_sockets) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL;
+#define	V_ipv4_sockets		VNET(ipv4_sockets)
 #define	V_ipv4_hashtbl		VNET(ipv4_hashtbl)
 #define	V_ipv4_srchashtbl	VNET(ipv4_srchashtbl)
 #define	GRE_HASH(src, dst)	(V_ipv4_hashtbl[\
     in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
 #define	GRE_SRCHASH(src)	(V_ipv4_srchashtbl[\
     fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
+#define	GRE_SOCKHASH(src)	(V_ipv4_sockets[\
+    fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
 #define	GRE_HASH_SC(sc)		GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
     (sc)->gre_oip.ip_dst.s_addr)
 
@@ -94,17 +107,43 @@ in_gre_hashval(in_addr_t src, in_addr_t dst)
 	return (fnv_32_buf(&dst, sizeof(dst), ret));
 }
 
+static struct gre_socket*
+in_gre_lookup_socket(in_addr_t addr)
+{
+	struct gre_socket *gs;
+	struct in_gre_socket *s;
+
+	CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
+		s = __containerof(gs, struct in_gre_socket, base);
+		if (s->addr == addr)
+			break;
+	}
+	return (gs);
+}
+
 static int
-in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst)
+in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst,
+    uint32_t opts)
 {
+	struct gre_list *head;
 	struct gre_softc *tmp;
+	struct gre_socket *gs;
 
 	if (sc->gre_family == AF_INET &&
 	    sc->gre_oip.ip_src.s_addr == src &&
-	    sc->gre_oip.ip_dst.s_addr == dst)
+	    sc->gre_oip.ip_dst.s_addr == dst &&
+	    (sc->gre_options & GRE_UDPENCAP) == (opts & GRE_UDPENCAP))
 		return (EEXIST);
 
-	CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) {
+	if (opts & GRE_UDPENCAP) {
+		gs = in_gre_lookup_socket(src);
+		if (gs == NULL)
+			return (0);
+		head = &gs->list;
+	} else
+		head = &GRE_HASH(src, dst);
+
+	CK_LIST_FOREACH(tmp, head, chain) {
 		if (tmp == sc)
 			continue;
 		if (tmp->gre_oip.ip_src.s_addr == src &&
@@ -181,35 +220,228 @@ in_gre_srcaddr(void *arg __unused, const struct sockad
 }
 
 static void
+in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
+    const struct sockaddr *sa, void *ctx)
+{
+	struct epoch_tracker et;
+	struct gre_socket *gs;
+	struct gre_softc *sc;
+	in_addr_t dst;
+
+	NET_EPOCH_ENTER(et);
+	/*
+	 * udp_append() holds reference to inp, it is safe to check
+	 * inp_flags2 without INP_RLOCK().
+	 * If socket was closed before we have entered NET_EPOCH section,
+	 * INP_FREED flag should be set. Otherwise it should be safe to
+	 * make access to ctx data, because gre_so will be freed by
+	 * gre_sofree() via epoch_call().
+	 */
+	if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+		NET_EPOCH_EXIT(et);
+		m_freem(m);
+		return;
+	}
+
+	gs = (struct gre_socket *)ctx;
+	dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
+	CK_LIST_FOREACH(sc, &gs->list, chain) {
+		if (sc->gre_oip.ip_dst.s_addr == dst)
+			break;
+	}
+	if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
+		gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
+		NET_EPOCH_EXIT(et);
+		return;
+	}
+	m_freem(m);
+	NET_EPOCH_EXIT(et);
+}
+
+static int
+in_gre_setup_socket(struct gre_softc *sc)
+{
+	struct sockopt sopt;
+	struct sockaddr_in sin;
+	struct in_gre_socket *s;
+	struct gre_socket *gs;
+	in_addr_t addr;
+	int error, value;
+
+	/*
+	 * NOTE: we are protected with gre_ioctl_sx lock.
+	 *
+	 * First check that socket is already configured.
+	 * If so, check that source addres was not changed.
+	 * If address is different, check that there are no other tunnels
+	 * and close socket.
+	 */
+	addr = sc->gre_oip.ip_src.s_addr;
+	gs = sc->gre_so;
+	if (gs != NULL) {
+		s = __containerof(gs, struct in_gre_socket, base);
+		if (s->addr != addr) {
+			if (CK_LIST_EMPTY(&gs->list)) {
+				CK_LIST_REMOVE(gs, chain);
+				soclose(gs->so);
+				epoch_call(net_epoch_preempt, &gs->epoch_ctx,
+				    gre_sofree);
+			}
+			gs = sc->gre_so = NULL;
+		}
+	}
+
+	if (gs == NULL) {
+		/*
+		 * Check that socket for given address is already
+		 * configured.
+		 */
+		gs = in_gre_lookup_socket(addr);
+		if (gs == NULL) {
+			s = malloc(sizeof(*s), M_GRE, M_WAITOK | M_ZERO);
+			s->addr = addr;
+			gs = &s->base;
+
+			error = socreate(sc->gre_family, &gs->so,
+			    SOCK_DGRAM, IPPROTO_UDP, curthread->td_ucred,
+			    curthread);
+			if (error != 0) {
+				if_printf(GRE2IFP(sc),
+				    "cannot create socket: %d\n", error);
+				free(s, M_GRE);
+				return (error);
+			}
+
+			error = udp_set_kernel_tunneling(gs->so,
+			    in_gre_udp_input, NULL, gs);
+			if (error != 0) {
+				if_printf(GRE2IFP(sc),
+				    "cannot set UDP tunneling: %d\n", error);
+				goto fail;
+			}
+
+			memset(&sopt, 0, sizeof(sopt));
+			sopt.sopt_dir = SOPT_SET;
+			sopt.sopt_level = IPPROTO_IP;
+			sopt.sopt_name = IP_BINDANY;
+			sopt.sopt_val = &value;
+			sopt.sopt_valsize = sizeof(value);
+			value = 1;
+			error = sosetopt(gs->so, &sopt);
+			if (error != 0) {
+				if_printf(GRE2IFP(sc),
+				    "cannot set IP_BINDANY opt: %d\n", error);
+				goto fail;
+			}
+
+			memset(&sin, 0, sizeof(sin));
+			sin.sin_family = AF_INET;
+			sin.sin_len = sizeof(sin);
+			sin.sin_addr.s_addr = addr;
+			sin.sin_port = htons(GRE_UDPPORT);
+			error = sobind(gs->so, (struct sockaddr *)&sin,
+			    curthread);
+			if (error != 0) {
+				if_printf(GRE2IFP(sc),
+				    "cannot bind socket: %d\n", error);
+				goto fail;
+			}
+			/* Add socket to the chain */
+			CK_LIST_INSERT_HEAD(&GRE_SOCKHASH(addr), gs, chain);
+		}
+	}
+
+	/* Add softc to the socket's list */
+	CK_LIST_INSERT_HEAD(&gs->list, sc, chain);
+	sc->gre_so = gs;
+	return (0);
+fail:
+	soclose(gs->so);
+	free(s, M_GRE);
+	return (error);
+}
+
+static int
 in_gre_attach(struct gre_softc *sc)
 {
+	struct grehdr *gh;
+	int error;
 
-	sc->gre_hlen = sizeof(struct greip);
+	if (sc->gre_options & GRE_UDPENCAP) {
+		sc->gre_csumflags = CSUM_UDP;
+		sc->gre_hlen = sizeof(struct greudp);
+		sc->gre_oip.ip_p = IPPROTO_UDP;
+		gh = &sc->gre_udphdr->gi_gre;
+		gre_update_udphdr(sc, &sc->gre_udp,
+		    in_pseudo(sc->gre_oip.ip_src.s_addr,
+		    sc->gre_oip.ip_dst.s_addr, 0));
+	} else {
+		sc->gre_hlen = sizeof(struct greip);
+		sc->gre_oip.ip_p = IPPROTO_GRE;
+		gh = &sc->gre_iphdr->gi_gre;
+	}
 	sc->gre_oip.ip_v = IPVERSION;
 	sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
-	sc->gre_oip.ip_p = IPPROTO_GRE;
-	gre_updatehdr(sc, &sc->gre_gihdr->gi_gre);
-	CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
+	gre_update_hdr(sc, gh);
+
+	/*
+	 * If we return error, this means that sc is not linked,
+	 * and caller should reset gre_family and free(sc->gre_hdr).
+	 */
+	if (sc->gre_options & GRE_UDPENCAP) {
+		error = in_gre_setup_socket(sc);
+		if (error != 0)
+			return (error);
+	} else
+		CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
 	CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr),
 	    sc, srchash);
+
+	/* Set IFF_DRV_RUNNING if interface is ready */
+	in_gre_set_running(sc);
+	return (0);
 }
 
-void
+int
 in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
 {
+	int error;
 
-	MPASS(cmd == GRESKEY || cmd == GRESOPTS);
-
 	/* NOTE: we are protected with gre_ioctl_sx lock */
+	MPASS(cmd == GRESKEY || cmd == GRESOPTS || cmd == GRESPORT);
 	MPASS(sc->gre_family == AF_INET);
+
+	/*
+	 * If we are going to change encapsulation protocol, do check
+	 * for duplicate tunnels. Return EEXIST here to do not confuse
+	 * user.
+	 */
+	if (cmd == GRESOPTS &&
+	    (sc->gre_options & GRE_UDPENCAP) != (value & GRE_UDPENCAP) &&
+	    in_gre_checkdup(sc, sc->gre_oip.ip_src.s_addr,
+		sc->gre_oip.ip_dst.s_addr, value) == EADDRNOTAVAIL)
+		return (EEXIST);
+
 	CK_LIST_REMOVE(sc, chain);
 	CK_LIST_REMOVE(sc, srchash);
 	GRE_WAIT();
-	if (cmd == GRESKEY)
+	switch (cmd) {
+	case GRESKEY:
 		sc->gre_key = value;
-	else
+		break;
+	case GRESOPTS:
 		sc->gre_options = value;
-	in_gre_attach(sc);
+		break;
+	case GRESPORT:
+		sc->gre_port = value;
+		break;
+	}
+	error = in_gre_attach(sc);
+	if (error != 0) {
+		sc->gre_family = 0;
+		free(sc->gre_hdr, M_GRE);
+	}
+	return (error);
 }
 
 int
@@ -241,9 +473,10 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
 		if (V_ipv4_hashtbl == NULL) {
 			V_ipv4_hashtbl = gre_hashinit();
 			V_ipv4_srchashtbl = gre_hashinit();
+			V_ipv4_sockets = (struct gre_sockets *)gre_hashinit();
 		}
 		error = in_gre_checkdup(sc, src->sin_addr.s_addr,
-		    dst->sin_addr.s_addr);
+		    dst->sin_addr.s_addr, sc->gre_options);
 		if (error == EADDRNOTAVAIL)
 			break;
 		if (error == EEXIST) {
@@ -251,7 +484,7 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
 			error = 0;
 			break;
 		}
-		ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t),
+		ip = malloc(sizeof(struct greudp) + 3 * sizeof(uint32_t),
 		    M_GRE, M_WAITOK | M_ZERO);
 		ip->ip_src.s_addr = src->sin_addr.s_addr;
 		ip->ip_dst.s_addr = dst->sin_addr.s_addr;
@@ -267,8 +500,11 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
 		sc->gre_hdr = ip;
 		sc->gre_oseq = 0;
 		sc->gre_iseq = UINT32_MAX;
-		in_gre_attach(sc);
-		in_gre_set_running(sc);
+		error = in_gre_attach(sc);
+		if (error != 0) {
+			sc->gre_family = 0;
+			free(sc->gre_hdr, M_GRE);
+		}
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
@@ -354,5 +590,6 @@ in_gre_uninit(void)
 		V_ipv4_hashtbl = NULL;
 		GRE_WAIT();
 		gre_hashdestroy(V_ipv4_srchashtbl);
+		gre_hashdestroy((struct gre_list *)V_ipv4_sockets);
 	}
 }

Modified: stable/12/sys/netinet6/ip6_gre.c
==============================================================================
--- stable/12/sys/netinet6/ip6_gre.c	Fri May 24 08:40:37 2019	(r348232)
+++ stable/12/sys/netinet6/ip6_gre.c	Fri May 24 08:42:34 2019	(r348233)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/jail.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
@@ -51,8 +52,12 @@ __FBSDID("$FreeBSD$");
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #endif
+#include <netinet/in_pcb.h>
 #include <netinet/ip_encap.h>
+#include <netinet/ip_var.h>
 #include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/scope6_var.h>
@@ -65,14 +70,22 @@ SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets");
 
+struct in6_gre_socket {
+	struct gre_socket	base;
+	struct in6_addr		addr; /* scope zone id is embedded */
+};
+VNET_DEFINE_STATIC(struct gre_sockets *, ipv6_sockets) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv6_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv6_srchashtbl) = NULL;
+#define	V_ipv6_sockets		VNET(ipv6_sockets)
 #define	V_ipv6_hashtbl		VNET(ipv6_hashtbl)
 #define	V_ipv6_srchashtbl	VNET(ipv6_srchashtbl)
 #define	GRE_HASH(src, dst)	(V_ipv6_hashtbl[\
     in6_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
 #define	GRE_SRCHASH(src)	(V_ipv6_srchashtbl[\
     fnv_32_buf((src), sizeof(*src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
+#define	GRE_SOCKHASH(src)	(V_ipv6_sockets[\
+    fnv_32_buf((src), sizeof(*src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
 #define	GRE_HASH_SC(sc)		GRE_HASH(&(sc)->gre_oip6.ip6_src,\
     &(sc)->gre_oip6.ip6_dst)
 
@@ -85,18 +98,43 @@ in6_gre_hashval(const struct in6_addr *src, const stru
 	return (fnv_32_buf(dst, sizeof(*dst), ret));
 }
 
+static struct gre_socket*
+in6_gre_lookup_socket(const struct in6_addr *addr)
+{
+	struct gre_socket *gs;
+	struct in6_gre_socket *s;
+
+	CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
+		s = __containerof(gs, struct in6_gre_socket, base);
+		if (IN6_ARE_ADDR_EQUAL(&s->addr, addr))
+			break;
+	}
+	return (gs);
+}
+
 static int

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905240842.x4O8gYRa026791>