From owner-p4-projects Mon Aug 12 19:27:12 2002 Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 84DEC37B401; Mon, 12 Aug 2002 19:26:28 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.FreeBSD.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 20B3A37B400 for ; Mon, 12 Aug 2002 19:26:28 -0700 (PDT) Received: from freefall.freebsd.org (freefall.FreeBSD.org [216.136.204.21]) by mx1.FreeBSD.org (Postfix) with ESMTP id 91D9943E4A for ; Mon, 12 Aug 2002 19:26:27 -0700 (PDT) (envelope-from peter@freebsd.org) Received: from freefall.freebsd.org (perforce@localhost [127.0.0.1]) by freefall.freebsd.org (8.12.4/8.12.4) with ESMTP id g7D2QRJU067428 for ; Mon, 12 Aug 2002 19:26:27 -0700 (PDT) (envelope-from peter@freebsd.org) Received: (from perforce@localhost) by freefall.freebsd.org (8.12.4/8.12.4/Submit) id g7D2QRpH067425 for perforce@freebsd.org; Mon, 12 Aug 2002 19:26:27 -0700 (PDT) Date: Mon, 12 Aug 2002 19:26:27 -0700 (PDT) Message-Id: <200208130226.g7D2QRpH067425@freefall.freebsd.org> X-Authentication-Warning: freefall.freebsd.org: perforce set sender to peter@freebsd.org using -f From: Peter Wemm Subject: PERFORCE change 15891 for review To: Perforce Change Reviews Sender: owner-p4-projects@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG http://people.freebsd.org/~peter/p4db/chv.cgi?CH=15891 Change 15891 by peter@peter_daintree on 2002/08/12 19:26:09 un-fubar Affected files ... .. //depot/projects/ia64/sys/netinet/tcp_subr.c#18 integrate Differences ... ==== //depot/projects/ia64/sys/netinet/tcp_subr.c#18 (text+ko) ==== @@ -743,157 +743,178 @@ } inp->inp_ppcb = NULL; soisdisconnected(so); -#c = IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_TCP; - ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + - tlen)); - tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); - } else -#endif - { - tlen += sizeof (struct tcpiphdr); - ip->ip_len = tlen; - ip->ip_ttl = ip_defttl; - } - m->m_len = tlen; - m->m_pkthdr.len = tlen; - m->m_pkthdr.rcvif = (struct ifnet *) 0; - nth->th_seq = htonl(seq); - nth->th_ack = htonl(ack); - nth->th_x2 = 0; - nth->th_off = sizeof (struct tcphdr) >> 2; - nth->th_flags = flags; - if (tp) - nth->th_win = htons((u_short) (win >> tp->rcv_scale)); +#ifdef INET6 + if (INP_CHECK_SOCKAF(so, AF_INET6)) + in6_pcbdetach(inp); else - nth->th_win = htons((u_short)win); - nth->th_urp = 0; -#ifdef INET6 - if (isipv6) { - nth->th_sum = 0; - nth->th_sum = in6_cksum(m, IPPROTO_TCP, - sizeof(struct ip6_hdr), - tlen - sizeof(struct ip6_hdr)); - ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, - ro6 && ro6->ro_rt ? - ro6->ro_rt->rt_ifp : - NULL); - } else #endif /* INET6 */ - { - nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); - m->m_pkthdr.csum_flags = CSUM_TCP; - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); - } -#ifdef TCPDEBUG - if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) - tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); -#endif -#ifdef IPSEC - if (ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) { - m_freem(m); - return; - } -#endif -#ifdef INET6 - if (isipv6) { - (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); - if (ro6 == &sro6 && ro6->ro_rt) { - RTFREE(ro6->ro_rt); - ro6->ro_rt = NULL; + in_pcbdetach(inp); + tcpstat.tcps_closed++; + return ((struct tcpcb *)0); +} + +void +tcp_drain() +{ + if (do_tcpdrain) + { + struct inpcb *inpb; + struct tcpcb *tcpb; + struct tseg_qent *te; + + /* + * Walk the tcpbs, if existing, and flush the reassembly queue, + * if there is one... + * XXX: The "Net/3" implementation doesn't imply that the TCP + * reassembly queue should be flushed, but in a situation + * where we're really low on mbufs, this is potentially + * usefull. + */ + INP_INFO_RLOCK(&tcbinfo); + LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { + INP_LOCK(inpb); + if ((tcpb = intotcpcb(inpb))) { + while ((te = LIST_FIRST(&tcpb->t_segq)) + != NULL) { + LIST_REMOVE(te, tqe_q); + m_freem(te->tqe_m); + FREE(te, M_TSEGQ); + } + } + INP_UNLOCK(inpb); } - } else -#endif /* INET6 */ - { - (void) ip_output(m, NULL, ro, ipflags, NULL); - if (ro == &sro && ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; + INP_INFO_RUNLOCK(&tcbinfo); } - } } /* - * Create a new TCP control block, making an - * empty reassembly queue and hooking it to the argument - * protocol control block. The `inp' parameter must have - * come from the zone allocator set up in tcp_init(). + * Notify a tcp user of an asynchronous error; + * store error as soft error, but wake up user + * (for now, won't do anything until can select for soft error). + * + * Do not wake up user since there currently is no mechanism for + * reporting soft errors (yet - a kqueue filter may be added). */ -struct tcpcb * -tcp_newtcpcb(inp) +static struct inpcb * +tcp_notify(inp, error) struct inpcb *inp; + int error; { - struct inp_tp *it; - register struct tcpcb *tp; -#ifdef INET6 - int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; -#endif /* INET6 */ + struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; - it = (struct inp_tp *)inp; - tp = &it->tcb; - bzero((char *) tp, sizeof(struct tcpcb)); - LIST_INIT(&tp->t_segq); - tp->t_maxseg = tp->t_maxopd = -#ifdef INET6 - isipv6 ? tcp_v6mssdflt : -#endif /* INET6 */ - tcp_mssdflt; + /* + * Ignore some errors if we are hooked up. + * If connection hasn't completed, has retransmitted several times, + * and receives a second error, give up now. This is better + * than waiting a long time to establish a connection that + * can never complete. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (error == EHOSTUNREACH || error == ENETUNREACH || + error == EHOSTDOWN)) { + return inp; + } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && + tp->t_softerror) { + tcp_drop(tp, error); + return (struct inpcb *)0; + } else { + tp->t_softerror = error; + return inp; + } +#if 0 + wakeup((caddr_t) &so->so_timeo); + sorwakeup(so); + sowwakeup(so); +#endif +} - /* Set up our timeouts. */ - callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0); - callout_init(tp->tt_persist = &it->inp_tp_persist, 0); - callout_init(tp->tt_keep = &it->inp_tp_keep, 0); - callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0); - callout_init(tp->tt_delack = &it->inp_tp_delack, 0); +static int +tcp_pcblist(SYSCTL_HANDLER_ARGS) +{ + int error, i, n, s; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; - if (tcp_do_rfc1323) - tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); - if (tcp_do_rfc1644) - tp->t_flags |= TF_REQ_CC; - tp->t_inpcb = inp; /* XXX */ /* - * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no - * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives - * reasonable initial retransmit time. + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. */ - tp->t_srtt = TCPTV_SRTTBASE; - tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; - tp->t_rttmin = tcp_rexmit_min; - tp->t_rxtcur = TCPTV_RTOBASE; - tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; - tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; - tp->t_rcvtime = ticks; - /* - * IPv4 TTL initialization is necessary for an IPv6 socket as well, - * because the socket may be bound to an IPv6 wildcard address, - * which may match an IPv4-mapped IPv6 address. + if (req->oldptr == 0) { + n = tcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xtcpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. */ - inp->inp_ip_ttl = ip_defttl; - inp->inp_ppcb = (caddr_t)tp; - return (tp); /* XXX */ -} + s = splnet(); + INP_INFO_RLOCK(&tcbinfo); + gencnt = tcbinfo.ipi_gencnt; + n = tcbinfo.ipi_count; + INP_INFO_RUNLOCK(&tcbinfo); + splx(s); + + sysctl_wire_old_buffer(req, 2 * (sizeof xig) + + n * sizeof(struct xtcpcb)); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return error; -/* - * Drop a TCP connection, reporting - * the specified error. If connection is synchronized, - * then send a RST to peer. - */ -struct tcpcb * -tcp_drop(tp, errno) - register struct tcpcb *tp; - int errno; -{ - struct socket *so = tp->t_inpcb->inp_socket; + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return ENOMEM; + + s = splnet(); + INP_INFO_RLOCK(&tcbinfo); + for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; + inp = LIST_NEXT(inp, inp_list)) { + INP_LOCK(inp); + if (inp->inp_gencnt <= gencnt && + cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) + inp_list[i++] = inp; + INP_UNLOCK(inp); + } + INP_INFO_RUNLOCK(&tcbinfo); + splx(s); + n = i; - if (TCPS_HAVERCVDSYN(tp->t_state)) { - tp->t_state = TCPS_CLOSED; - (void) tcp_output(tp); - tcpstat.tcps_drops++; - } else - tcpstat.tcps_conndrops++; - if (errno == ETIMEDOUT && tp->t_softerror) - errno = nows that something happened + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_LOCK(inp); + if (inp->inp_gencnt <= gencnt) { + struct xtcpcb xt; + caddr_t inp_ppcb; + xt.xt_len = sizeof xt; + /* XXX should avoid extra copy */ + bcopy(inp, &xt.xt_inp, sizeof *inp); + inp_ppcb = inp->inp_ppcb; + if (inp_ppcb != NULL) + bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); + else + bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xt.xt_socket); + error = SYSCTL_OUT(req, &xt, sizeof xt); + } + INP_UNLOCK(inp); + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ @@ -1369,127 +1390,144 @@ tcpstat.tcps_mturesent++; tp->t_rtttime = 0; - tof(mss)); - optlen = TCPOLEN_MAXSEG; + tp->snd_nxt = tp->snd_una; + tcp_output(tp); + } + return inp; +} + +/* + * Look-up the routing entry to the peer of this inpcb. If no route + * is found and it cannot be allocated the return NULL. This routine + * is called by TCP routines that access the rmx structure and by tcp_mss + * to get the interface MTU. + */ +struct rtentry * +tcp_rtlookup(inc) + struct in_conninfo *inc; +{ + struct route *ro; + struct rtentry *rt; - if ((tp->t_flags & TF_REQ_SCALE) && - ((flags & TH_ACK) == 0 || - (tp->t_flags & TF_RCVD_SCALE))) { - *((u_int32_t *)(opt + optlen)) = htonl( - TCPOPT_NOP << 24 | - TCPOPT_WINDOW << 16 | - TCPOLEN_WINDOW << 8 | - tp->request_r_scale); - optlen += 4; - } + ro = &inc->inc_route; + rt = ro->ro_rt; + if (rt == NULL || !(rt->rt_flags & RTF_UP)) { + /* No route yet, so try to acquire one */ + if (inc->inc_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(struct sockaddr_in); + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inc->inc_faddr; + rtalloc(ro); + rt = ro->ro_rt; } - } + } + return rt; +} - /* - * Send a timestamp and echo-reply if this is a SYN and our side - * wants to use timestamps (TF_REQ_TSTMP is set) or both our side - * and our peer have sent timestamps in our SYN's. - */ - if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && - (flags & TH_RST) == 0 && - ((flags & TH_ACK) == 0 || - (tp->t_flags & TF_RCVD_TSTMP))) { - u_int32_t *lp = (u_int32_t *)(opt + optlen); +#ifdef INET6 +struct rtentry * +tcp_rtlookup6(inc) + struct in_conninfo *inc; +{ + struct route_in6 *ro6; + struct rtentry *rt; - /* Form timestamp option as shown in appendix A of RFC 1323. */ - *lp++ = htonl(TCPOPT_TSTAMP_HDR); - *lp++ = htonl(ticks); - *lp = htonl(tp->ts_recent); - optlen += TCPOLEN_TSTAMP_APPA; - } + ro6 = &inc->inc6_route; + rt = ro6->ro_rt; + if (rt == NULL || !(rt->rt_flags & RTF_UP)) { + /* No route yet, so try to acquire one */ + if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { + ro6->ro_dst.sin6_family = AF_INET6; + ro6->ro_dst.sin6_len = sizeof(struct sockaddr_in6); + ro6->ro_dst.sin6_addr = inc->inc6_faddr; + rtalloc((struct route *)ro6); + rt = ro6->ro_rt; + } + } + return rt; +} +#endif /* INET6 */ - /* - * Send `CC-family' options if our side wants to use them (TF_REQ_CC), - * options are allowed (!TF_NOOPT) and it's not a RST. - */ - if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && - (flags & TH_RST) == 0) { - switch (flags & (TH_SYN|TH_ACK)) { - /* - * This is a normal ACK, send CC if we received CC before - * from our peer. - */ - case TH_ACK: - if (!(tp->t_flags & TF_RCVD_CC)) - break; - /*FALLTHROUGH*/ +#ifdef IPSEC +/* compute ESP/AH header size for TCP, including outer IP header. */ +size_t +ipsec_hdrsiz_tcp(tp) + struct tcpcb *tp; +{ + struct inpcb *inp; + struct mbuf *m; + size_t hdrsiz; + struct ip *ip; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif /* INET6 */ + struct tcphdr *th; - /* - * We can only get here in T/TCP's SYN_SENT* state, when - * we're a sending a non-SYN segment without waiting for - * the ACK of our SYN. A check above assures that we only - * do this if our peer understands T/TCP. - */ - case 0: - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_CC; - opt[optlen++] = TCPOLEN_CC; - *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); + if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL)) + return 0; + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (!m) + return 0; - optlen += 4; - break; +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) { + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)(ip6 + 1); + m->m_pkthdr.len = m->m_len = + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + tcp_fillheaders(tp, ip6, th); + hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); + } else +#endif /* INET6 */ + { + ip = mtod(m, struct ip *); + th = (struct tcphdr *)(ip + 1); + m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); + tcp_fillheaders(tp, ip, th); + hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); + } - /* - * This is our initial SYN, check whether we have to use - * CC or CC.new. - */ - case TH_SYN: - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = tp->t_flags & TF_SENDCCNEW ? - TCPOPT_CCNEW : TCPOPT_CC; - opt[optlen++] = TCPOLEN_CC; - *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); - optlen += 4; - break; + m_free(m); + return hdrsiz; +} +#endif /*IPSEC*/ - /* - * This is a SYN,ACK; send CC and CC.echo if we received - * CC from our peer. - */ - case (TH_SYN|TH_ACK): - if (tp->t_flags & TF_RCVD_CC) { - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_CC; - opt[optlen++] = TCPOLEN_CC; - *(u_int32_t *)&opt[optlen] = - htonl(tp->cc_send); - optlen += 4; - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_NOP; - opt[optlen++] = TCPOPT_CCECHO; - opt[optlen++] = TCPOLEN_CC; - *(u_int32_t *)&opt[optlen] = - htonl(tp->cc_recv); - optlen += 4; - } - break; - } - } +/* + * Return a pointer to the cached information about the remote host. + * The cached information is stored in the protocol specific part of + * the route metrics. + */ +struct rmxp_tao * +tcp_gettaocache(inc) + struct in_conninfo *inc; +{ + struct rtentry *rt; - hdrlen += optlen; - #ifdef INET6 - if (isipv6) - ipoptlen = ip6_optlen(tp->t_inpcb); + if (inc->inc_isipv6) + rt = tcp_rtlookup6(inc); else -#endif - if (tp->t_inpcb->inp_options) - ipoptlen = tp->t_inpcb->inp_options->m_len - - offsetof(struct ipoption, ipopt_list); - else - ipoptlen = 0; -#ifdef IPSEC - ipoptlen += ipsec_hdrsiz_tcp(tp); -#endif +#endif /* INET6 */ + rt = tcp_rtlookup(inc); + + /* Make sure this is a host route and is up. */ + if (rt == NULL || + (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) + return NULL; + + return rmx_taop(rt->rt_rmx); +} - /* - * Adjust data length if insertion of options will - * bump the packet length beyond the t_maxo+/* + * Clear all the TAO cache entries, called from tcp_init. + * + * XXX + * This routine is just an empty one, because we assume that the routing + * routing tables are initialized at the same time when TCP, so there is + * nothing in the cache left over. + */ +static void +tcp_cleartaocache() +{ +} To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe p4-projects" in the body of the message