Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Jun 2009 15:44:35 +0000 (UTC)
From:      VANHULLEBUS Yvan <vanhu@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r194062 - in head/sys: conf net netinet netipsec sys
Message-ID:  <200906121544.n5CFiZTa085763@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: vanhu
Date: Fri Jun 12 15:44:35 2009
New Revision: 194062
URL: http://svn.freebsd.org/changeset/base/194062

Log:
  Added support for NAT-Traversal (RFC 3948) in IPsec stack.
  
  Thanks to (no special order) Emmanuel Dreyfus (manu@netbsd.org), Larry
  Baird (lab@gta.com), gnn, bz, and other FreeBSD devs, Julien Vanherzeele
  (julien.vanherzeele@netasq.com, for years of bug reporting), the PFSense
  team, and all people who used / tried the NAT-T patch for years and
  reported bugs, patches, etc...
  
  X-MFC: never
  
  Reviewed by:	bz
  Approved by:	gnn(mentor)
  Obtained from:	NETASQ

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/net/pfkeyv2.h
  head/sys/netinet/in_proto.c
  head/sys/netinet/udp.h
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet/udp_var.h
  head/sys/netipsec/ipsec_input.c
  head/sys/netipsec/ipsec_output.c
  head/sys/netipsec/key.c
  head/sys/netipsec/key.h
  head/sys/netipsec/keydb.h
  head/sys/sys/mbuf.h

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/conf/NOTES	Fri Jun 12 15:44:35 2009	(r194062)
@@ -534,6 +534,11 @@ options 	IPSEC			#IP security (requires 
 # using ipfw(8)'s 'ipsec' keyword, when this option is enabled.
 #
 #options 	IPSEC_FILTERTUNNEL	#filter ipsec packets from a tunnel
+#
+# Set IPSEC_NAT_T to enable NAT-Traversal support.  This enables
+# optional UDP encapsulation of ESP packets.
+#
+options		IPSEC_NAT_T		#NAT-T support, UDP encap of ESP
 
 options 	IPX			#IPX/SPX communications protocols
 

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/conf/options	Fri Jun 12 15:44:35 2009	(r194062)
@@ -401,6 +401,7 @@ IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPSEC			opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_FILTERTUNNEL	opt_ipsec.h
+IPSEC_NAT_T		opt_ipsec.h
 IPSTEALTH
 IPX
 KRPC

Modified: head/sys/net/pfkeyv2.h
==============================================================================
--- head/sys/net/pfkeyv2.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/net/pfkeyv2.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -255,6 +255,34 @@ struct sadb_x_ipsecrequest {
    */
 };
 
+/* NAT-Traversal type, see RFC 3948 (and drafts). */
+/* sizeof(struct sadb_x_nat_t_type) == 8 */
+struct sadb_x_nat_t_type {
+  u_int16_t sadb_x_nat_t_type_len;
+  u_int16_t sadb_x_nat_t_type_exttype;
+  u_int8_t sadb_x_nat_t_type_type;
+  u_int8_t sadb_x_nat_t_type_reserved[3];
+};
+
+/* NAT-Traversal source or destination port. */
+/* sizeof(struct sadb_x_nat_t_port) == 8 */
+struct sadb_x_nat_t_port { 
+  u_int16_t sadb_x_nat_t_port_len;
+  u_int16_t sadb_x_nat_t_port_exttype;
+  u_int16_t sadb_x_nat_t_port_port;
+  u_int16_t sadb_x_nat_t_port_reserved;
+};
+
+/* ESP fragmentation size. */
+/* sizeof(struct sadb_x_nat_t_frag) == 8 */
+struct sadb_x_nat_t_frag {
+  u_int16_t sadb_x_nat_t_frag_len;
+  u_int16_t sadb_x_nat_t_frag_exttype;
+  u_int16_t sadb_x_nat_t_frag_fraglen;
+  u_int16_t sadb_x_nat_t_frag_reserved;
+};
+
+
 #define SADB_EXT_RESERVED             0
 #define SADB_EXT_SA                   1
 #define SADB_EXT_LIFETIME_CURRENT     2
@@ -275,7 +303,14 @@ struct sadb_x_ipsecrequest {
 #define SADB_X_EXT_KMPRIVATE          17
 #define SADB_X_EXT_POLICY             18
 #define SADB_X_EXT_SA2                19
-#define SADB_EXT_MAX                  19
+#define SADB_X_EXT_NAT_T_TYPE         20
+#define SADB_X_EXT_NAT_T_SPORT        21
+#define SADB_X_EXT_NAT_T_DPORT        22
+#define SADB_X_EXT_NAT_T_OA           23	/* Deprecated. */
+#define SADB_X_EXT_NAT_T_OAI          23	/* Peer's NAT_OA for src of SA. */
+#define SADB_X_EXT_NAT_T_OAR          24	/* Peer's NAT_OA for dst of SA. */
+#define SADB_X_EXT_NAT_T_FRAG         25	/* Manual MTU override. */
+#define SADB_EXT_MAX                  25
 
 #define SADB_SATYPE_UNSPEC	0
 #define SADB_SATYPE_AH		2

Modified: head/sys/netinet/in_proto.c
==============================================================================
--- head/sys/netinet/in_proto.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/in_proto.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -124,7 +124,7 @@ struct protosw inetsw[] = {
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		udp_input,
 	.pr_ctlinput =		udp_ctlinput,
-	.pr_ctloutput =		ip_ctloutput,
+	.pr_ctloutput =		udp_ctloutput,
 	.pr_init =		udp_init,
 #ifdef VIMAGE
 	.pr_destroy =		udp_destroy,

Modified: head/sys/netinet/udp.h
==============================================================================
--- head/sys/netinet/udp.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -45,4 +45,23 @@ struct udphdr {
 	u_short	uh_sum;			/* udp checksum */
 };
 
+/* 
+ * User-settable options (used with setsockopt).
+ */
+#define	UDP_ENCAP			0x01
+
+
+/*
+ * UDP Encapsulation of IPsec Packets options.
+ */
+/* Encapsulation types. */
+#define	UDP_ENCAP_ESPINUDP_NON_IKE 	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
+#define	UDP_ENCAP_ESPINUDP		2 /* draft-ietf-ipsec-udp-encaps-02+ */
+
+/* Default ESP in UDP encapsulation port. */
+#define	UDP_ENCAP_ESPINUDP_PORT		500
+
+/* Maximum UDP fragment size for ESP over UDP. */
+#define	UDP_ENCAP_ESPINUDP_MAXFRAGLEN	552
+
 #endif

Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp_usrreq.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
+#include <netipsec/esp.h>
 #endif
 
 #include <machine/in_cksum.h>
@@ -151,6 +152,14 @@ SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_i
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
 		    struct mbuf *, struct thread *);
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#define	UF_ESPINUDP_ALL	(UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
+#ifdef INET
+static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
+#endif
+#endif /* IPSEC_NAT_T */
+#endif /* IPSEC */
 
 static void
 udp_zone_change(void *tag)
@@ -252,6 +261,13 @@ udp_append(struct inpcb *inp, struct ip 
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	struct udpcb *up;
+#endif
+#endif
+#endif
 
 	INP_RLOCK_ASSERT(inp);
 
@@ -263,6 +279,17 @@ udp_append(struct inpcb *inp, struct ip 
 		V_ipsec4stat.in_polvio++;
 		return;
 	}
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
+		n = udp4_espdecap(inp, n, off);
+		if (n == NULL)				/* Consumed. */
+			return;
+	}
+#endif /* INET */
+#endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
@@ -825,6 +852,99 @@ SYSCTL_PROC(_net_inet_udp, OID_AUTO, get
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
 
+int
+udp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	int error = 0, optval;
+	struct inpcb *inp;
+#ifdef IPSEC_NAT_T
+	struct udpcb *up;
+#endif
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+	INP_WLOCK(inp);
+	if (sopt->sopt_level != IPPROTO_UDP) {
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
+			INP_WUNLOCK(inp);
+			error = ip6_ctloutput(so, sopt);
+		} else {
+#endif
+			INP_WUNLOCK(inp);
+			error = ip_ctloutput(so, sopt);
+#ifdef INET6
+		}
+#endif
+		return (error);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case UDP_ENCAP:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+			inp = sotoinpcb(so);
+			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+			INP_WLOCK(inp);
+#ifdef IPSEC_NAT_T
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+#endif
+			switch (optval) {
+			case 0:
+				/* Clear all UDP encap. */
+#ifdef IPSEC_NAT_T
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+#endif
+				break;
+#ifdef IPSEC_NAT_T
+			case UDP_ENCAP_ESPINUDP:
+			case UDP_ENCAP_ESPINUDP_NON_IKE:
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+				if (optval == UDP_ENCAP_ESPINUDP)
+					up->u_flags |= UF_ESPINUDP;
+				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
+					up->u_flags |= UF_ESPINUDP_NON_IKE;
+				break;
+#endif
+			default:
+				error = EINVAL;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			break;
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+#ifdef IPSEC_NAT_T
+		case UDP_ENCAP:
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+			optval = up->u_flags & UF_ESPINUDP_ALL;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+#endif
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}	
+	return (error);
+}
+
 static int
 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
@@ -1136,6 +1256,144 @@ release:
 	return (error);
 }
 
+
+#if defined(IPSEC) && defined(IPSEC_NAT_T)
+#ifdef INET
+/*
+ * Potentially decap ESP in UDP frame.  Check for an ESP header
+ * and optional marker; if present, strip the UDP header and
+ * push the result through IPSec.
+ *
+ * Returns mbuf to be processed (potentially re-allocated) or
+ * NULL if consumed and/or processed.
+ */
+static struct mbuf *
+udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
+{
+	INIT_VNET_IPSEC(curvnet);
+	size_t minlen, payload, skip, iphlen;
+	caddr_t data;
+	struct udpcb *up;
+	struct m_tag *tag;
+	struct udphdr *udphdr;
+	struct ip *ip;
+
+	INP_RLOCK_ASSERT(inp);
+
+	/* 
+	 * Pull up data so the longest case is contiguous:
+	 *    IP/UDP hdr + non ESP marker + ESP hdr.
+	 */
+	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
+	if (minlen > m->m_pkthdr.len)
+		minlen = m->m_pkthdr.len;
+	if ((m = m_pullup(m, minlen)) == NULL) {
+		V_ipsec4stat.in_inval++;
+		return (NULL);		/* Bypass caller processing. */
+	}
+	data = mtod(m, caddr_t);	/* Points to ip header. */
+	payload = m->m_len - off;	/* Size of payload. */
+
+	if (payload == 1 && data[off] == '\xff')
+		return (m);		/* NB: keepalive packet, no decap. */
+
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
+	    ("u_flags 0x%x", up->u_flags));
+
+	/* 
+	 * Check that the payload is large enough to hold an
+	 * ESP header and compute the amount of data to remove.
+	 *
+	 * NB: the caller has already done a pullup for us.
+	 * XXX can we assume alignment and eliminate bcopys?
+	 */
+	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
+		/*
+		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
+		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
+		 * possible AH mode non-IKE marker+non-ESP marker
+		 * from draft-ietf-ipsec-udp-encaps-00.txt.
+		 */
+		uint64_t marker;
+
+		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
+			return (m);	/* NB: no decap. */
+		bcopy(data + off, &marker, sizeof(uint64_t));
+		if (marker != 0)	/* Non-IKE marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(uint64_t) + sizeof(struct udphdr);
+	} else {
+		uint32_t spi;
+
+		if (payload <= sizeof(struct esp)) {
+			V_ipsec4stat.in_inval++;
+			m_freem(m);
+			return (NULL);	/* Discard. */
+		}
+		bcopy(data + off, &spi, sizeof(uint32_t));
+		if (spi == 0)		/* Non-ESP marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(struct udphdr);
+	}
+
+	/*
+	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
+	 * the UDP ports. This is required if we want to select
+	 * the right SPD for multiple hosts behind same NAT.
+	 *
+	 * NB: ports are maintained in network byte order everywhere
+	 *     in the NAT-T code.
+	 */
+	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
+		2 * sizeof(uint16_t), M_NOWAIT);
+	if (tag == NULL) {
+		V_ipsec4stat.in_nomem++;
+		m_freem(m);
+		return (NULL);		/* Discard. */
+	}
+	iphlen = off - sizeof(struct udphdr);
+	udphdr = (struct udphdr *)(data + iphlen);
+	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
+	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
+	m_tag_prepend(m, tag);
+
+	/*
+	 * Remove the UDP header (and possibly the non ESP marker)
+	 * IP header length is iphlen
+	 * Before:
+	 *   <--- off --->
+	 *   +----+------+-----+
+	 *   | IP |  UDP | ESP |
+	 *   +----+------+-----+
+	 *        <-skip->
+	 * After:
+	 *          +----+-----+
+	 *          | IP | ESP |
+	 *          +----+-----+
+	 *   <-skip->
+	 */
+	ovbcopy(data, data + skip, iphlen);
+	m_adj(m, skip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_len -= skip;
+	ip->ip_p = IPPROTO_ESP;
+
+	/*
+	 * We cannot yet update the cksums so clear any
+	 * h/w cksum flags as they are no longer valid.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
+		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+
+	(void) ipsec4_common_input(m, iphlen, ip->ip_p);
+	return (NULL);			/* NB: consumed, bypass processing. */
+}
+#endif /* INET */
+#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
+
 static void
 udp_abort(struct socket *so)
 {

Modified: head/sys/netinet/udp_var.h
==============================================================================
--- head/sys/netinet/udp_var.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp_var.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -64,6 +64,12 @@ struct udpcb {
 #define	intoudpcb(ip)	((struct udpcb *)(ip)->inp_ppcb)
 #define	sotoudpcb(so)	(intoudpcb(sotoinpcb(so)))
 
+				/* IPsec: ESP in UDP tunneling: */
+#define	UF_ESPINUDP_NON_IKE	0x00000001	/* w/ non-IKE marker .. */
+	/* .. per draft-ietf-ipsec-nat-t-ike-0[01],
+	 * and draft-ietf-ipsec-udp-encaps-(00/)01.txt */
+#define	UF_ESPINUDP		0x00000002	/* w/ non-ESP marker. */
+
 struct udpstat {
 				/* input statistics: */
 	u_long	udps_ipackets;		/* total input packets */
@@ -127,6 +133,7 @@ int		 udp_newudpcb(struct inpcb *);
 void		 udp_discardcb(struct udpcb *);
 
 void		 udp_ctlinput(int, struct sockaddr *, void *);
+int	 	 udp_ctloutput(struct socket *, struct sockopt *);
 void		 udp_init(void);
 #ifdef VIMAGE
 void		 udp_destroy(void);

Modified: head/sys/netipsec/ipsec_input.c
==============================================================================
--- head/sys/netipsec/ipsec_input.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/ipsec_input.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -121,6 +121,9 @@ ipsec_common_input(struct mbuf *m, int s
 	struct secasvar *sav;
 	u_int32_t spi;
 	int error;
+#ifdef IPSEC_NAT_T
+	struct m_tag *tag;
+#endif
 
 	IPSEC_ISTAT(sproto, V_espstat.esps_input, V_ahstat.ahs_input,
 		V_ipcompstat.ipcomps_input);
@@ -175,6 +178,12 @@ ipsec_common_input(struct mbuf *m, int s
 		m_copydata(m, offsetof(struct ip, ip_dst),
 		    sizeof(struct in_addr),
 		    (caddr_t) &dst_address.sin.sin_addr);
+#ifdef IPSEC_NAT_T
+		/* Find the source port for NAT-T; see udp*_espdecap. */
+		tag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+		if (tag != NULL)
+			dst_address.sin.sin_port = ((u_int16_t *)(tag + 1))[1];
+#endif /* IPSEC_NAT_T */
 		break;
 #endif /* INET */
 #ifdef INET6

Modified: head/sys/netipsec/ipsec_output.c
==============================================================================
--- head/sys/netipsec/ipsec_output.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/ipsec_output.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -84,6 +84,10 @@
 
 #include <machine/in_cksum.h>
 
+#ifdef IPSEC_NAT_T
+#include <netinet/udp.h>
+#endif
+
 #ifdef DEV_ENC
 #include <net/if_enc.h>
 #endif
@@ -180,6 +184,57 @@ ipsec_process_done(struct mbuf *m, struc
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 
+#ifdef IPSEC_NAT_T
+		/*
+		 * If NAT-T is enabled, now that all IPsec processing is done
+		 * insert UDP encapsulation header after IP header.
+		 */
+		if (sav->natt_type) {
+#ifdef _IP_VHL
+			const int hlen = IP_VHL_HL(ip->ip_vhl);
+#else
+			const int hlen = (ip->ip_hl << 2);
+#endif
+			int size, off;
+			struct mbuf *mi;
+			struct udphdr *udp;
+
+			size = sizeof(struct udphdr);
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+				/*
+				 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
+				 * draft-ietf-ipsec-udp-encaps-(00/)01.txt,
+				 * ignoring possible AH mode
+				 * non-IKE marker + non-ESP marker
+				 * from draft-ietf-ipsec-udp-encaps-00.txt.
+				 */
+				size += sizeof(u_int64_t);
+			}
+			mi = m_makespace(m, hlen, size, &off);
+			if (mi == NULL) {
+				DPRINTF(("%s: m_makespace for udphdr failed\n",
+				    __func__));
+				error = ENOBUFS;
+				goto bad;
+			}
+
+			udp = (struct udphdr *)(mtod(mi, caddr_t) + off);
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
+				udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT);
+			else
+				udp->uh_sport =
+					KEY_PORTFROMSADDR(&sav->sah->saidx.src);
+			udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst);
+			udp->uh_sum = 0;
+			udp->uh_ulen = htons(m->m_pkthdr.len - hlen);
+			ip->ip_len = m->m_pkthdr.len;
+			ip->ip_p = IPPROTO_UDP;
+
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
+				*(u_int64_t *)(udp + 1) = 0;
+		}
+#endif /* IPSEC_NAT_T */
+
 		return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL);
 #endif /* INET */
 #ifdef INET6

Modified: head/sys/netipsec/key.c
==============================================================================
--- head/sys/netipsec/key.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/key.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -221,6 +221,12 @@ static const int minsize[] = {
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	sizeof(struct sadb_x_policy),	/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
+	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
+	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAI */
+	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAR */
+	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 static const int maxsize[] = {
 	sizeof(struct sadb_msg),	/* SADB_EXT_RESERVED */
@@ -243,6 +249,12 @@ static const int maxsize[] = {
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	0,				/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
+	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
+	0,				/* SADB_X_EXT_NAT_T_OAI */
+	0,				/* SADB_X_EXT_NAT_T_OAR */
+	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 
 #ifdef SYSCTL_DECL
@@ -425,6 +437,13 @@ static struct mbuf *key_setsadbmsg __P((
 static struct mbuf *key_setsadbsa __P((struct secasvar *));
 static struct mbuf *key_setsadbaddr __P((u_int16_t,
 	const struct sockaddr *, u_int8_t, u_int16_t));
+#ifdef IPSEC_NAT_T
+static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t);
+static struct mbuf *key_setsadbxtype(u_int16_t);
+#endif
+static void key_porttosaddr(struct sockaddr *, u_int16_t);
+#define	KEY_PORTTOSADDR(saddr, port)				\
+	key_porttosaddr((struct sockaddr *)(saddr), (port))
 static struct mbuf *key_setsadbxsa2 __P((u_int8_t, u_int32_t, u_int32_t));
 static struct mbuf *key_setsadbxpolicy __P((u_int16_t, u_int8_t,
 	u_int32_t));
@@ -1067,12 +1086,21 @@ key_allocsa(
 	struct secasvar *sav;
 	u_int stateidx, arraysize, state;
 	const u_int *saorder_state_valid;
+	int chkport;
 
 	IPSEC_ASSERT(dst != NULL, ("null dst address"));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
+#ifdef IPSEC_NAT_T
+        chkport = (dst->sa.sa_family == AF_INET &&
+	    dst->sa.sa_len == sizeof(struct sockaddr_in) &&
+	    dst->sin.sin_port != 0);
+#else
+	chkport = 0;
+#endif
+
 	/*
 	 * searching SAD.
 	 * XXX: to be checked internal IP header somewhere.  Also when
@@ -1104,11 +1132,11 @@ key_allocsa(
 					continue;
 #if 0	/* don't check src */
 				/* check src address */
-				if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, 0) != 0)
+				if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, chkport) != 0)
 					continue;
 #endif
 				/* check dst address */
-				if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, 0) != 0)
+				if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, chkport) != 0)
 					continue;
 				sa_addref(sav);
 				goto done;
@@ -1798,6 +1826,11 @@ key_spdadd(so, m, mhp)
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
+	/* 
+	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
+	 * we are processing traffic endpoints.
+	 */
+
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
@@ -1931,6 +1964,11 @@ key_spdadd(so, m, mhp)
 	struct sadb_msg *newmsg;
 	int off;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* create new sadb_msg to reply. */
 	if (lft) {
 		n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED,
@@ -2053,6 +2091,11 @@ key_spddelete(so, m, mhp)
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
+	/*
+	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
+	 * we are processing traffic endpoints.
+	 */
+
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
@@ -2089,6 +2132,11 @@ key_spddelete(so, m, mhp)
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* create new sadb_msg to reply. */
 	n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
 	    SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
@@ -2435,6 +2483,10 @@ key_setdumpsp(struct secpolicy *sp, u_in
 		goto fail;
 	result = m;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa, sp->spidx.prefs,
 	    sp->spidx.ul_proto);
@@ -2581,6 +2633,11 @@ key_spdexpire(sp)
 	lt->sadb_lifetime_usetime = sp->validtime;
 	m_cat(result, m);
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* set sadb_address for source */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa,
@@ -3034,6 +3091,9 @@ key_setsaval(sav, m, mhp)
 	sav->tdb_encalgxform = NULL;	/* encoding algorithm */
 	sav->tdb_authalgxform = NULL;	/* authentication algorithm */
 	sav->tdb_compalgxform = NULL;	/* compression algorithm */
+	/*  Initialize even if NAT-T not compiled in: */
+	sav->natt_type = 0;
+	sav->natt_esp_frag_len = 0;
 
 	/* SA */
 	if (mhp->ext[SADB_EXT_SA] != NULL) {
@@ -3343,6 +3403,12 @@ key_setdumpsa(struct secasvar *sav, u_in
 		SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH,
 		SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC,
 		SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY,
+#ifdef IPSEC_NAT_T
+		SADB_X_EXT_NAT_T_TYPE,
+		SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT,
+		SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR,
+		SADB_X_EXT_NAT_T_FRAG,
+#endif
 	};
 
 	m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt);
@@ -3427,6 +3493,36 @@ key_setdumpsa(struct secasvar *sav, u_in
 				goto fail;
 			break;
 
+#ifdef IPSEC_NAT_T
+		case SADB_X_EXT_NAT_T_TYPE:
+			m = key_setsadbxtype(sav->natt_type);
+			if (!m)
+				goto fail;
+			break;
+		
+		case SADB_X_EXT_NAT_T_DPORT:
+			m = key_setsadbxport(
+			    KEY_PORTFROMSADDR(&sav->sah->saidx.dst),
+			    SADB_X_EXT_NAT_T_DPORT);
+			if (!m)
+				goto fail;
+			break;
+
+		case SADB_X_EXT_NAT_T_SPORT:
+			m = key_setsadbxport(
+			    KEY_PORTFROMSADDR(&sav->sah->saidx.src),
+			    SADB_X_EXT_NAT_T_SPORT);
+			if (!m)
+				goto fail;
+			break;
+
+		case SADB_X_EXT_NAT_T_OAI:
+		case SADB_X_EXT_NAT_T_OAR:
+		case SADB_X_EXT_NAT_T_FRAG:
+			/* We do not (yet) support those. */
+			continue;
+#endif
+
 		case SADB_EXT_ADDRESS_PROXY:
 		case SADB_EXT_IDENTITY_SRC:
 		case SADB_EXT_IDENTITY_DST:
@@ -3621,6 +3717,116 @@ key_setsadbxsa2(u_int8_t mode, u_int32_t
 	return m;
 }
 
+#ifdef IPSEC_NAT_T
+/*
+ * Set a type in sadb_x_nat_t_type.
+ */
+static struct mbuf *
+key_setsadbxtype(u_int16_t type)
+{
+	struct mbuf *m;
+	size_t len;
+	struct sadb_x_nat_t_type *p;
+
+	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type));
+
+	m = key_alloc_mbuf(len);
+	if (!m || m->m_next) {	/*XXX*/
+		if (m)
+			m_freem(m);
+		return (NULL);
+	}
+
+	p = mtod(m, struct sadb_x_nat_t_type *);
+
+	bzero(p, len);
+	p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len);
+	p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
+	p->sadb_x_nat_t_type_type = type;
+
+	return (m);
+}
+/*
+ * Set a port in sadb_x_nat_t_port.
+ * In contrast to default RFC 2367 behaviour, port is in network byte order.
+ */
+static struct mbuf *
+key_setsadbxport(u_int16_t port, u_int16_t type)
+{
+	struct mbuf *m;
+	size_t len;
+	struct sadb_x_nat_t_port *p;
+
+	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port));
+
+	m = key_alloc_mbuf(len);
+	if (!m || m->m_next) {	/*XXX*/
+		if (m)
+			m_freem(m);
+		return (NULL);
+	}
+
+	p = mtod(m, struct sadb_x_nat_t_port *);
+
+	bzero(p, len);
+	p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len);
+	p->sadb_x_nat_t_port_exttype = type;
+	p->sadb_x_nat_t_port_port = port;
+
+	return (m);
+}
+
+/* 
+ * Get port from sockaddr. Port is in network byte order.
+ */
+u_int16_t 
+key_portfromsaddr(struct sockaddr *sa)
+{
+	INIT_VNET_IPSEC(curvnet);
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		return ((struct sockaddr_in *)sa)->sin_port;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		return ((struct sockaddr_in6 *)sa)->sin6_port;
+#endif
+	}
+	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
+		printf("DP %s unexpected address family %d\n",
+			__func__, sa->sa_family));
+	return (0);
+}
+#endif /* IPSEC_NAT_T */
+
+/*
+ * Set port in struct sockaddr. Port is in network byte order.
+ */
+static void
+key_porttosaddr(struct sockaddr *sa, u_int16_t port)
+{
+	INIT_VNET_IPSEC(curvnet);
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		((struct sockaddr_in *)sa)->sin_port = port;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		((struct sockaddr_in6 *)sa)->sin6_port = port;
+		break;
+#endif
+	default:
+		ipseclog((LOG_DEBUG, "%s: unexpected address family %d.\n",
+			__func__, sa->sa_family));
+		break;
+	}
+}
+
 /*
  * set data into sadb_x_policy
  */
@@ -3818,6 +4024,8 @@ key_cmpsaidx(
 	const struct secasindex *saidx1,
 	int flag)
 {
+	int chkport = 0;
+
 	/* sanity */
 	if (saidx0 == NULL && saidx1 == NULL)
 		return 1;
@@ -3855,10 +4063,25 @@ key_cmpsaidx(
 				return 0;
 		}
 
-		if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, 0) != 0) {
+#ifdef IPSEC_NAT_T
+		/*
+		 * If NAT-T is enabled, check ports for tunnel mode.
+		 * Do not check ports if they are set to zero in the SPD.
+		 * Also do not do it for transport mode, as there is no
+		 * port information available in the SP.
+		 */
+		if (saidx1->mode == IPSEC_MODE_TUNNEL &&
+		    saidx1->src.sa.sa_family == AF_INET &&
+		    saidx1->dst.sa.sa_family == AF_INET &&
+		    ((const struct sockaddr_in *)(&saidx1->src))->sin_port &&
+		    ((const struct sockaddr_in *)(&saidx1->dst))->sin_port)
+			chkport = 1;
+#endif /* IPSEC_NAT_T */
+
+		if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) {
 			return 0;
 		}
-		if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, 0) != 0) {
+		if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) {
 			return 0;
 		}
 	}
@@ -4492,7 +4715,10 @@ key_getspi(so, m, mhp)
 		return key_senderror(so, m, EINVAL);
 	}
 
-	/* make sure if port number is zero. */
+	/*
+	 * Make sure the port numbers are zero.
+	 * In case of NAT-T we will update them later if needed.
+	 */
 	switch (((struct sockaddr *)(src0 + 1))->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr *)(src0 + 1))->sa_len !=
@@ -4529,6 +4755,43 @@ key_getspi(so, m, mhp)
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
+#ifdef IPSEC_NAT_T
+	/*
+	 * Handle NAT-T info if present.
+	 * We made sure the port numbers are zero above, so we do
+	 * not have to worry in case we do not update them.
+	 */
+	if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL)
+		ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__));
+	if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL)
+		ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__));
+
+	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
+		struct sadb_x_nat_t_type *type;
+		struct sadb_x_nat_t_port *sport, *dport;
+
+		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
+			ipseclog((LOG_DEBUG, "%s: invalid nat-t message "
+			    "passed.\n", __func__));
+			return key_senderror(so, m, EINVAL);
+		}
+
+		sport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
+		dport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
+
+		if (sport)
+			KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port);
+		if (dport)
+			KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port);
+	}
+#endif
+
 	/* SPI allocation */
 	spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE],
 	                       &saidx);
@@ -4726,6 +4989,11 @@ key_update(so, m, mhp)
 	INIT_VNET_IPSEC(curvnet);
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
+#ifdef IPSEC_NAT_T
+	struct sadb_x_nat_t_type *type;
+	struct sadb_address *iaddr, *raddr;
+	struct sadb_x_nat_t_frag *frag;
+#endif
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav;
@@ -4784,6 +5052,73 @@ key_update(so, m, mhp)
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
+	/*
+	 * Make sure the port numbers are zero.
+	 * In case of NAT-T we will update them later if needed.
+	 */
+	KEY_PORTTOSADDR(&saidx.src, 0);
+	KEY_PORTTOSADDR(&saidx.dst, 0);
+
+#ifdef IPSEC_NAT_T
+	/*
+	 * Handle NAT-T info if present.
+	 */
+	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
+		struct sadb_x_nat_t_port *sport, *dport;
+
+		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
+			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
+			    __func__));
+			return key_senderror(so, m, EINVAL);
+		}
+
+		type = (struct sadb_x_nat_t_type *)
+		    mhp->ext[SADB_X_EXT_NAT_T_TYPE];
+		sport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
+		dport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200906121544.n5CFiZTa085763>