From owner-svn-src-user@FreeBSD.ORG Mon Nov 5 09:23:22 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 3B02AD88; Mon, 5 Nov 2012 09:23:22 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 20CC38FC0A; Mon, 5 Nov 2012 09:23:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA59NLrT068771; Mon, 5 Nov 2012 09:23:21 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA59NLSe068769; Mon, 5 Nov 2012 09:23:21 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211050923.qA59NLSe068769@svn.freebsd.org> From: Andre Oppermann Date: Mon, 5 Nov 2012 09:23:21 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242602 - user/andre/tcp_workqueue/sys/netinet X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Nov 2012 09:23:22 -0000 Author: andre Date: Mon Nov 5 09:23:21 2012 New Revision: 242602 URL: http://svnweb.freebsd.org/changeset/base/242602 Log: Partial fix for window update problems. Modified: user/andre/tcp_workqueue/sys/netinet/tcp_output.c Modified: user/andre/tcp_workqueue/sys/netinet/tcp_output.c ============================================================================== --- user/andre/tcp_workqueue/sys/netinet/tcp_output.c Mon Nov 5 09:13:06 2012 (r242601) +++ user/andre/tcp_workqueue/sys/netinet/tcp_output.c Mon Nov 5 09:23:21 2012 (r242602) @@ -228,7 +228,7 @@ again: tso = 0; mtu = 0; off = tp->snd_nxt - tp->snd_una; - sendwin = min(tp->snd_wnd, tp->snd_cwnd); + sendwin = ulmax(ulmin(tp->snd_wnd - off, tp->snd_cwnd), 0); flags = tcp_outflags[tp->t_state]; /* @@ -249,7 +249,7 @@ again: (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { long cwin; - cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; + cwin = ulmin(tp->snd_wnd - off, tp->snd_cwnd) - sack_bytes_rxmt; if (cwin < 0) cwin = 0; /* Do not retransmit SACK segments beyond snd_recover */ @@ -355,7 +355,7 @@ after_sack_rexmit: * sending new data, having retransmitted all the * data possible in the scoreboard. */ - len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) + len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd - off) - off); /* * Don't remove this (len > 0) check ! From owner-svn-src-user@FreeBSD.ORG Mon Nov 5 09:26:28 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 49349F9D; Mon, 5 Nov 2012 09:26:28 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 16C578FC14; Mon, 5 Nov 2012 09:26:28 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA59QRwN069288; Mon, 5 Nov 2012 09:26:27 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA59QR14069284; Mon, 5 Nov 2012 09:26:27 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211050926.qA59QR14069284@svn.freebsd.org> From: Andre Oppermann Date: Mon, 5 Nov 2012 09:26:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242603 - in user/andre/tcp_workqueue/sys: kern netinet X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Nov 2012 09:26:28 -0000 Author: andre Date: Mon Nov 5 09:26:27 2012 New Revision: 242603 URL: http://svnweb.freebsd.org/changeset/base/242603 Log: Annotate possible socket locking issues for investigation. Modified: user/andre/tcp_workqueue/sys/kern/uipc_socket.c user/andre/tcp_workqueue/sys/netinet/tcp_input.c user/andre/tcp_workqueue/sys/netinet/tcp_syncache.c Modified: user/andre/tcp_workqueue/sys/kern/uipc_socket.c ============================================================================== --- user/andre/tcp_workqueue/sys/kern/uipc_socket.c Mon Nov 5 09:23:21 2012 (r242602) +++ user/andre/tcp_workqueue/sys/kern/uipc_socket.c Mon Nov 5 09:26:27 2012 (r242603) @@ -519,6 +519,7 @@ sonewconn(struct socket *head, int conns __func__, head->so_pcb); return (NULL); } + /* XXXAO: "so" is not locked. */ if ((head->so_options & SO_ACCEPTFILTER) != 0) connstatus = 0; so->so_head = head; Modified: user/andre/tcp_workqueue/sys/netinet/tcp_input.c ============================================================================== --- user/andre/tcp_workqueue/sys/netinet/tcp_input.c Mon Nov 5 09:23:21 2012 (r242602) +++ user/andre/tcp_workqueue/sys/netinet/tcp_input.c Mon Nov 5 09:26:27 2012 (r242603) @@ -1034,6 +1034,8 @@ relocked: * attempt or the completion of a previous one. Because listen * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be * held in this case. + * + * XXXAO: "so" is not locked! */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; Modified: user/andre/tcp_workqueue/sys/netinet/tcp_syncache.c ============================================================================== --- user/andre/tcp_workqueue/sys/netinet/tcp_syncache.c Mon Nov 5 09:23:21 2012 (r242602) +++ user/andre/tcp_workqueue/sys/netinet/tcp_syncache.c Mon Nov 5 09:26:27 2012 (r242603) @@ -642,6 +642,8 @@ done: /* * Build a new TCP socket structure from a syncache entry. + * + * XXXAO: Neither "lso" nor "so" is locked! */ static struct socket * syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) @@ -907,6 +909,8 @@ abort2: * in the syncache, and if its there, we pull it out of * the cache and turn it into a full-blown connection in * the SYN-RECEIVED state. + * + * XXXAO: Neither "lso" nor "lsop" is locked! */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 00:03:54 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 6E1776FB; Tue, 6 Nov 2012 00:03:54 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 3AD8B8FC08; Tue, 6 Nov 2012 00:03:54 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA603sxP024052; Tue, 6 Nov 2012 00:03:54 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA603sFM024051; Tue, 6 Nov 2012 00:03:54 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211060003.qA603sFM024051@svn.freebsd.org> From: Alfred Perlstein Date: Tue, 6 Nov 2012 00:03:54 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242636 - user/alfred/so_discard X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 00:03:54 -0000 Author: alfred Date: Tue Nov 6 00:03:53 2012 New Revision: 242636 URL: http://svnweb.freebsd.org/changeset/base/242636 Log: branch for discarding socketbuffers. Added: - copied from r242635, head/ Directory Properties: user/alfred/so_discard/ (props changed) From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 00:08:09 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id D746B876; Tue, 6 Nov 2012 00:08:09 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id BCB5D8FC0C; Tue, 6 Nov 2012 00:08:09 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA6089GX024993; Tue, 6 Nov 2012 00:08:09 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA6089AQ024990; Tue, 6 Nov 2012 00:08:09 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211060008.qA6089AQ024990@svn.freebsd.org> From: Alfred Perlstein Date: Tue, 6 Nov 2012 00:08:09 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242637 - in user/alfred/so_discard/sys: kern sys X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 00:08:09 -0000 Author: alfred Date: Tue Nov 6 00:08:09 2012 New Revision: 242637 URL: http://svnweb.freebsd.org/changeset/base/242637 Log: Implement a socket option SO_DISCARD_RECV, this will discard any data that arrives. I've found this very useful for testing streaming services and want to share it with community as a whole as well as stash it someplace I can pull up if needed for benching. Modified: user/alfred/so_discard/sys/kern/uipc_socket.c user/alfred/so_discard/sys/sys/socket.h Modified: user/alfred/so_discard/sys/kern/uipc_socket.c ============================================================================== --- user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:03:53 2012 (r242636) +++ user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:08:09 2012 (r242637) @@ -2479,6 +2479,26 @@ so_setsockopt(struct socket *so, int lev return (sosetopt(so, &sopt)); } +static int so_discard_rcv_calls; + +SYSCTL_INT(_kern_ipc, OID_AUTO, so_discard_rcv_calls, CTLFLAG_RD, + &so_discard_rcv_calls, 0, "Number of open sockets"); + + + + +static int +so_discard_rcv(struct socket *so, void *arg, int waitflag) +{ + struct sockbuf *sb; + + so_discard_rcv_calls++; + sb = &so->so_rcv; + SOCKBUF_LOCK_ASSERT(sb); + sbflush_locked(sb); + return (SU_OK); +} + int sosetopt(struct socket *so, struct sockopt *sopt) { @@ -2681,7 +2701,31 @@ sosetopt(struct socket *so, struct socko error = EOPNOTSUPP; #endif break; - + case SO_DISCARD_RECV: { + struct sockbuf *sb = &so->so_rcv; + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + SOCKBUF_LOCK(&so->so_rcv); + if (optval == 1) { + if (sb->sb_upcall != NULL) { + error = EBUSY; + } else { + soupcall_set(so, SO_RCV, + &so_discard_rcv, NULL); + } + } else if (optval == 0) { + if (sb->sb_upcall == so_discard_rcv) + soupcall_clear(so, SO_RCV); + else + error = EINVAL; + } else { + error = ENOPROTOOPT; + } + SOCKBUF_UNLOCK(&so->so_rcv); + } + break; default: error = ENOPROTOOPT; break; @@ -2869,6 +2913,10 @@ integer: optval = so->so_incqlen; goto integer; + case SO_DISCARD_RECV: + optval = (so->so_rcv.sb_upcall == so_discard_rcv) ? 1 : 0; + goto integer; + default: error = ENOPROTOOPT; break; Modified: user/alfred/so_discard/sys/sys/socket.h ============================================================================== --- user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:03:53 2012 (r242636) +++ user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:08:09 2012 (r242637) @@ -140,6 +140,7 @@ typedef __uid_t uid_t; #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */ +#define SO_DISCARD_RECV 0x1017 /* discard recieved data */ #endif /* From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 07:57:13 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 09AF4D21 for ; Tue, 6 Nov 2012 07:57:13 +0000 (UTC) (envelope-from andrey@zonov.org) Received: from mail-la0-f54.google.com (mail-la0-f54.google.com [209.85.215.54]) by mx1.freebsd.org (Postfix) with ESMTP id 6FAA28FC16 for ; Tue, 6 Nov 2012 07:57:11 +0000 (UTC) Received: by mail-la0-f54.google.com with SMTP id e12so134473lag.13 for ; Mon, 05 Nov 2012 23:57:10 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=sender:message-id:date:from:user-agent:mime-version:to:cc:subject :references:in-reply-to:x-enigmail-version:content-type :x-gm-message-state; bh=oAG94dO31k9Dte2jYTvJLf5xCAEBth4gVk6QL39jGR8=; b=dHF7B+9b3HN6r8GKAYui0BR+QoLoTLOo7iXoLK2ifvmnm7l5d2pBLswPnZE216hvDW +GRnHHzGP+2vi+ik2d1Hssmtt7pe+KUzCZZ8BmlsBtgyGQRZtEKeHEBpvsXbbFWxCFNf MZrx9gwGmleGhsgRgetfkz6eHfMuy83aV5Pvil/xWadA+UF2DkvBWscGEmLhfJceJoVz LxZhfkJp910PWcvwnSJO4H9uVDmBr65C39prHmLQJnA09530X9d8qul+od4GJQ4+gL/B rbcGXaxDYcMWcKmcRvr1vMcmdOWDDkwT8hOcHMkrk2PUfbJPVVVAZywNiI7HEN1yfWOD BElQ== Received: by 10.112.26.131 with SMTP id l3mr164428lbg.26.1352188630787; Mon, 05 Nov 2012 23:57:10 -0800 (PST) Received: from zont-osx.local (ppp95-165-147-157.pppoe.spdop.ru. [95.165.147.157]) by mx.google.com with ESMTPS id fp7sm6478589lab.4.2012.11.05.23.57.09 (version=SSLv3 cipher=OTHER); Mon, 05 Nov 2012 23:57:10 -0800 (PST) Sender: Andrey Zonov Message-ID: <5098C2D2.5060002@FreeBSD.org> Date: Tue, 06 Nov 2012 11:57:06 +0400 From: Andrey Zonov User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:16.0) Gecko/20121026 Thunderbird/16.0.2 MIME-Version: 1.0 To: Alfred Perlstein Subject: Re: svn commit: r242637 - in user/alfred/so_discard/sys: kern sys References: <201211060008.qA6089AQ024990@svn.freebsd.org> In-Reply-To: <201211060008.qA6089AQ024990@svn.freebsd.org> X-Enigmail-Version: 1.4.5 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="------------enig9F46194DB7AF2C0FA2CBD060" X-Gm-Message-State: ALoCoQk84luERYFdIgFkyPCTVu51D3WNNIJ/ACHSN0YY9kdJLxtkQEy1+C041u+c2/DqL5ukElXy Cc: src-committers@freebsd.org, svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 07:57:13 -0000 This is an OpenPGP/MIME signed message (RFC 2440 and 3156) --------------enig9F46194DB7AF2C0FA2CBD060 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable On 11/6/12 4:08 AM, Alfred Perlstein wrote: > Author: alfred > Date: Tue Nov 6 00:08:09 2012 > New Revision: 242637 > URL: http://svnweb.freebsd.org/changeset/base/242637 >=20 > Log: > Implement a socket option SO_DISCARD_RECV, this will discard any > data that arrives. I've found this very useful for testing streaming= > services and want to share it with community as a whole as well as > stash it someplace I can pull up if needed for benching. >=20 > Modified: > user/alfred/so_discard/sys/kern/uipc_socket.c > user/alfred/so_discard/sys/sys/socket.h >=20 > Modified: user/alfred/so_discard/sys/kern/uipc_socket.c > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D > --- user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:03:53 2= 012 (r242636) > +++ user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:08:09 2= 012 (r242637) > @@ -2479,6 +2479,26 @@ so_setsockopt(struct socket *so, int lev > return (sosetopt(so, &sopt)); > } > =20 > +static int so_discard_rcv_calls; > + > +SYSCTL_INT(_kern_ipc, OID_AUTO, so_discard_rcv_calls, CTLFLAG_RD, > + &so_discard_rcv_calls, 0, "Number of open sockets"); ^^^^^^^^^^^^^^^^^^^^^^ Is the comment correct? > + > + > + > + > +static int > +so_discard_rcv(struct socket *so, void *arg, int waitflag) > +{ > + struct sockbuf *sb; > + > + so_discard_rcv_calls++; It seems this incrementing is not safe in multi-threaded world. > + sb =3D &so->so_rcv; > + SOCKBUF_LOCK_ASSERT(sb); > + sbflush_locked(sb); > + return (SU_OK); > +} > + > int > sosetopt(struct socket *so, struct sockopt *sopt) > { > @@ -2681,7 +2701,31 @@ sosetopt(struct socket *so, struct socko > error =3D EOPNOTSUPP; > #endif > break; > - > + case SO_DISCARD_RECV: { > + struct sockbuf *sb =3D &so->so_rcv; > + error =3D sooptcopyin(sopt, &optval, sizeof optval, > + sizeof optval); > + if (error) > + goto bad; > + SOCKBUF_LOCK(&so->so_rcv); > + if (optval =3D=3D 1) { > + if (sb->sb_upcall !=3D NULL) { > + error =3D EBUSY; > + } else { > + soupcall_set(so, SO_RCV, > + &so_discard_rcv, NULL); > + } > + } else if (optval =3D=3D 0) { > + if (sb->sb_upcall =3D=3D so_discard_rcv) > + soupcall_clear(so, SO_RCV); > + else > + error =3D EINVAL; > + } else { > + error =3D ENOPROTOOPT; > + } > + SOCKBUF_UNLOCK(&so->so_rcv); > + } > + break; > default: > error =3D ENOPROTOOPT; > break; > @@ -2869,6 +2913,10 @@ integer: > optval =3D so->so_incqlen; > goto integer; > =20 > + case SO_DISCARD_RECV: > + optval =3D (so->so_rcv.sb_upcall =3D=3D so_discard_rcv) ? 1 : 0; > + goto integer; > + > default: > error =3D ENOPROTOOPT; > break; >=20 > Modified: user/alfred/so_discard/sys/sys/socket.h > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D > --- user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:03:53 2012 (r= 242636) > +++ user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:08:09 2012 (r= 242637) > @@ -140,6 +140,7 @@ typedef __uid_t uid_t; > #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ > #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ > #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name)= */ > +#define SO_DISCARD_RECV 0x1017 /* discard recieved data */ > #endif > =20 > /* >=20 --=20 Andrey Zonov --------------enig9F46194DB7AF2C0FA2CBD060 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG/MacGPG2 v2.0.18 (Darwin) Comment: GPGTools - http://gpgtools.org iQEcBAEBAgAGBQJQmMLWAAoJEBWLemxX/CvTAp0H/jMsQUZNZoDmkI3p4eYUZCH6 w0cP2jRZfzx4jfgqujr2uOlIQNkhaF8LxnMYKWVLqyjA9W9anSILhO7xTQIcNzuU qFQy+27s5p3J0oeTxnQfj9OYE9jwCuSzKmnvTkM50AQ1o4Ay34DCHv/YOR27m99A 7DcDlJbfVn3377rLZ0wWXURTOrnu3nJCCit+XugK1UoJAHK6eJGoH0SMzDNZhhds 7RKIIZO+QfkXm1qSjLrMubLMMd7W+sCYlTmoYNSuPLzgJHvB+fGns9psa/7H0V3G RR8+ECxvJ9LSVESHghIIoWItPMwJrWvC+hoJys8RiCyS1DW/5zXbH9VYSq+pCS4= =3aaM -----END PGP SIGNATURE----- --------------enig9F46194DB7AF2C0FA2CBD060-- From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 18:07:49 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 57452902; Tue, 6 Nov 2012 18:07:49 +0000 (UTC) (envelope-from bright@mu.org) Received: from elvis.mu.org (elvis.mu.org [192.203.228.196]) by mx1.freebsd.org (Postfix) with ESMTP id 245198FC14; Tue, 6 Nov 2012 18:07:49 +0000 (UTC) Received: from Alfreds-MacBook-Pro-5.local (c-67-180-208-218.hsd1.ca.comcast.net [67.180.208.218]) by elvis.mu.org (Postfix) with ESMTPSA id AFFE21A3C22; Tue, 6 Nov 2012 10:07:43 -0800 (PST) Message-ID: <509951EF.1020602@mu.org> Date: Tue, 06 Nov 2012 10:07:43 -0800 From: Alfred Perlstein User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:16.0) Gecko/20121026 Thunderbird/16.0.2 MIME-Version: 1.0 To: Andrey Zonov Subject: Re: svn commit: r242637 - in user/alfred/so_discard/sys: kern sys References: <201211060008.qA6089AQ024990@svn.freebsd.org> <5098C2D2.5060002@FreeBSD.org> In-Reply-To: <5098C2D2.5060002@FreeBSD.org> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Cc: Alfred Perlstein , src-committers@freebsd.org, svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 18:07:49 -0000 Thank you. As this is a just a user project I am not too concerned about SMP safety for the stats, I will update the comment though. Changes/suggestions are welcome though! -Alfred On 11/5/12 11:57 PM, Andrey Zonov wrote: > On 11/6/12 4:08 AM, Alfred Perlstein wrote: >> Author: alfred >> Date: Tue Nov 6 00:08:09 2012 >> New Revision: 242637 >> URL: http://svnweb.freebsd.org/changeset/base/242637 >> >> Log: >> Implement a socket option SO_DISCARD_RECV, this will discard any >> data that arrives. I've found this very useful for testing streaming >> services and want to share it with community as a whole as well as >> stash it someplace I can pull up if needed for benching. >> >> Modified: >> user/alfred/so_discard/sys/kern/uipc_socket.c >> user/alfred/so_discard/sys/sys/socket.h >> >> Modified: user/alfred/so_discard/sys/kern/uipc_socket.c >> ============================================================================== >> --- user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:03:53 2012 (r242636) >> +++ user/alfred/so_discard/sys/kern/uipc_socket.c Tue Nov 6 00:08:09 2012 (r242637) >> @@ -2479,6 +2479,26 @@ so_setsockopt(struct socket *so, int lev >> return (sosetopt(so, &sopt)); >> } >> >> +static int so_discard_rcv_calls; >> + >> +SYSCTL_INT(_kern_ipc, OID_AUTO, so_discard_rcv_calls, CTLFLAG_RD, >> + &so_discard_rcv_calls, 0, "Number of open sockets"); > ^^^^^^^^^^^^^^^^^^^^^^ > Is the comment correct? > >> + >> + >> + >> + >> +static int >> +so_discard_rcv(struct socket *so, void *arg, int waitflag) >> +{ >> + struct sockbuf *sb; >> + >> + so_discard_rcv_calls++; > It seems this incrementing is not safe in multi-threaded world. > >> + sb = &so->so_rcv; >> + SOCKBUF_LOCK_ASSERT(sb); >> + sbflush_locked(sb); >> + return (SU_OK); >> +} >> + >> int >> sosetopt(struct socket *so, struct sockopt *sopt) >> { >> @@ -2681,7 +2701,31 @@ sosetopt(struct socket *so, struct socko >> error = EOPNOTSUPP; >> #endif >> break; >> - >> + case SO_DISCARD_RECV: { >> + struct sockbuf *sb = &so->so_rcv; >> + error = sooptcopyin(sopt, &optval, sizeof optval, >> + sizeof optval); >> + if (error) >> + goto bad; >> + SOCKBUF_LOCK(&so->so_rcv); >> + if (optval == 1) { >> + if (sb->sb_upcall != NULL) { >> + error = EBUSY; >> + } else { >> + soupcall_set(so, SO_RCV, >> + &so_discard_rcv, NULL); >> + } >> + } else if (optval == 0) { >> + if (sb->sb_upcall == so_discard_rcv) >> + soupcall_clear(so, SO_RCV); >> + else >> + error = EINVAL; >> + } else { >> + error = ENOPROTOOPT; >> + } >> + SOCKBUF_UNLOCK(&so->so_rcv); >> + } >> + break; >> default: >> error = ENOPROTOOPT; >> break; >> @@ -2869,6 +2913,10 @@ integer: >> optval = so->so_incqlen; >> goto integer; >> >> + case SO_DISCARD_RECV: >> + optval = (so->so_rcv.sb_upcall == so_discard_rcv) ? 1 : 0; >> + goto integer; >> + >> default: >> error = ENOPROTOOPT; >> break; >> >> Modified: user/alfred/so_discard/sys/sys/socket.h >> ============================================================================== >> --- user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:03:53 2012 (r242636) >> +++ user/alfred/so_discard/sys/sys/socket.h Tue Nov 6 00:08:09 2012 (r242637) >> @@ -140,6 +140,7 @@ typedef __uid_t uid_t; >> #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ >> #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ >> #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */ >> +#define SO_DISCARD_RECV 0x1017 /* discard recieved data */ >> #endif >> >> /* >> > From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 19:54:24 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 9C6C9CE0; Tue, 6 Nov 2012 19:54:24 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 823988FC14; Tue, 6 Nov 2012 19:54:24 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA6JsOHu038452; Tue, 6 Nov 2012 19:54:24 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA6JsOP3038450; Tue, 6 Nov 2012 19:54:24 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211061954.qA6JsOP3038450@svn.freebsd.org> From: Andre Oppermann Date: Tue, 6 Nov 2012 19:54:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 19:54:24 -0000 Author: andre Date: Tue Nov 6 19:54:24 2012 New Revision: 242670 URL: http://svnweb.freebsd.org/changeset/base/242670 Log: Remove polling support from em in preparation to try a different approach. Modified: user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Modified: user/andre/tcp_workqueue/sys/dev/e1000/if_em.c ============================================================================== --- user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 19:51:54 2012 (r242669) +++ user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 19:54:24 2012 (r242670) @@ -33,7 +33,6 @@ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_device_polling.h" #include "opt_inet.h" #include "opt_inet6.h" #endif @@ -293,10 +292,6 @@ static int em_sysctl_eee(SYSCTL_HANDLER_ static __inline void em_rx_discard(struct rx_ring *, int); -#ifdef DEVICE_POLLING -static poll_handler_t em_poll; -#endif /* POLLING */ - /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ @@ -772,11 +767,6 @@ em_detach(device_t dev) return (EBUSY); } -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); @@ -1162,10 +1152,7 @@ em_ioctl(struct ifnet *ifp, u_long comma EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); -#ifdef DEVICE_POLLING - if (!(ifp->if_capenable & IFCAP_POLLING)) -#endif - em_enable_intr(adapter); + em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; @@ -1192,26 +1179,7 @@ em_ioctl(struct ifnet *ifp, u_long comma IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(em_poll, ifp); - if (error) - return (error); - EM_CORE_LOCK(adapter); - em_disable_intr(adapter); - ifp->if_capenable |= IFCAP_POLLING; - EM_CORE_UNLOCK(adapter); - } else { - error = ether_poll_deregister(ifp); - /* Enable interrupt even in error case */ - EM_CORE_LOCK(adapter); - em_enable_intr(adapter); - ifp->if_capenable &= ~IFCAP_POLLING; - EM_CORE_UNLOCK(adapter); - } - } -#endif + if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; @@ -1373,16 +1341,7 @@ em_init_locked(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } -#ifdef DEVICE_POLLING - /* - * Only enable interrupts if we are not polling, make sure - * they are off otherwise. - */ - if (ifp->if_capenable & IFCAP_POLLING) - em_disable_intr(adapter); - else -#endif /* DEVICE_POLLING */ - em_enable_intr(adapter); + em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) @@ -1399,58 +1358,6 @@ em_init(void *arg) EM_CORE_UNLOCK(adapter); } - -#ifdef DEVICE_POLLING -/********************************************************************* - * - * Legacy polling routine: note this only works with single queue - * - *********************************************************************/ -static int -em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - u32 reg_icr; - int rx_done; - - EM_CORE_LOCK(adapter); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - EM_CORE_UNLOCK(adapter); - return (0); - } - - if (cmd == POLL_AND_CHECK_STATUS) { - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); - adapter->hw.mac.get_link_status = 1; - em_update_link_status(adapter); - callout_reset(&adapter->timer, hz, - em_local_timer, adapter); - } - } - EM_CORE_UNLOCK(adapter); - - em_rxeof(rxr, count, &rx_done); - - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp, txr); -#endif - EM_TX_UNLOCK(txr); - - return (rx_done); -} -#endif /* DEVICE_POLLING */ - - /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine @@ -2256,10 +2163,8 @@ em_local_timer(void *arg) adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, em_local_timer, adapter); -#ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); -#endif return; hung: /* Looks like we're hung */ @@ -2980,10 +2885,6 @@ em_setup_interface(device_t dev, struct */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; -#ifdef DEVICE_POLLING - ifp->if_capabilities |= IFCAP_POLLING; -#endif - /* Enable only WOL MAGIC by default */ if (adapter->wol) { ifp->if_capabilities |= IFCAP_WOL; @@ -4388,7 +4289,6 @@ em_initialize_receive_unit(struct adapte * We loop at most count times if count is > 0, or until done if * count < 0. * - * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 20:30:24 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 2CB06AC6; Tue, 6 Nov 2012 20:30:24 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id EE09C8FC0C; Tue, 6 Nov 2012 20:30:23 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA6KUNMQ044195; Tue, 6 Nov 2012 20:30:23 GMT (envelope-from np@svn.freebsd.org) Received: (from np@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA6KUNCd044192; Tue, 6 Nov 2012 20:30:23 GMT (envelope-from np@svn.freebsd.org) Message-Id: <201211062030.qA6KUNCd044192@svn.freebsd.org> From: Navdeep Parhar Date: Tue, 6 Nov 2012 20:30:23 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242672 - user/np/stable_9_toe/sys/dev/cxgbe/tom X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 20:30:24 -0000 Author: np Date: Tue Nov 6 20:30:23 2012 New Revision: 242672 URL: http://svnweb.freebsd.org/changeset/base/242672 Log: Pull in a couple of fixes from head (r242666 and r242671) Modified: user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_cpl_io.c user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_listen.c Directory Properties: user/np/stable_9_toe/sys/ (props changed) user/np/stable_9_toe/sys/dev/ (props changed) Modified: user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_cpl_io.c ============================================================================== --- user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_cpl_io.c Tue Nov 6 20:22:39 2012 (r242671) +++ user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_cpl_io.c Tue Nov 6 20:30:23 2012 (r242672) @@ -982,7 +982,6 @@ do_abort_req(struct sge_iq *iq, const st struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; - struct socket *so; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -1008,7 +1007,6 @@ do_abort_req(struct sge_iq *iq, const st INP_WLOCK(inp); tp = intotcpcb(inp); - so = inp->inp_socket; CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", @@ -1026,10 +1024,16 @@ do_abort_req(struct sge_iq *iq, const st } toep->flags |= TPF_ABORT_SHUTDOWN; - so_error_set(so, abort_status_to_errno(tp, cpl->status)); - tp = tcp_close(tp); - if (tp == NULL) - INP_WLOCK(inp); /* re-acquire */ + if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { + struct socket *so = inp->inp_socket; + + if (so != NULL) + so_error_set(so, abort_status_to_errno(tp, + cpl->status)); + tp = tcp_close(tp); + if (tp == NULL) + INP_WLOCK(inp); /* re-acquire */ + } final_cpl_received(toep); done: Modified: user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_listen.c ============================================================================== --- user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_listen.c Tue Nov 6 20:22:39 2012 (r242671) +++ user/np/stable_9_toe/sys/dev/cxgbe/tom/t4_listen.c Tue Nov 6 20:30:23 2012 (r242672) @@ -1202,6 +1202,7 @@ do_pass_accept_req(struct sge_iq *iq, co if (m) m->m_pkthdr.rcvif = ifp; + remove_tid(sc, synqe->tid); release_synqe(synqe); /* about to exit function */ free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 21:48:59 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id B8FB3683; Tue, 6 Nov 2012 21:48:59 +0000 (UTC) (envelope-from luigi@onelab2.iet.unipi.it) Received: from onelab2.iet.unipi.it (onelab2.iet.unipi.it [131.114.59.238]) by mx1.freebsd.org (Postfix) with ESMTP id 1F8B98FC17; Tue, 6 Nov 2012 21:48:58 +0000 (UTC) Received: by onelab2.iet.unipi.it (Postfix, from userid 275) id 848167300A; Tue, 6 Nov 2012 23:09:52 +0100 (CET) Date: Tue, 6 Nov 2012 23:09:52 +0100 From: Luigi Rizzo To: Andre Oppermann Subject: Re: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 Message-ID: <20121106220952.GA32652@onelab2.iet.unipi.it> References: <201211061954.qA6JsOP3038450@svn.freebsd.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <201211061954.qA6JsOP3038450@svn.freebsd.org> User-Agent: Mutt/1.4.2.3i Cc: src-committers@freebsd.org, svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 21:48:59 -0000 One thing: while i believe device polling should go, removing the conditional blocks from device drivers is both useless and a mistake as it gratuitously introduces disalignment between device drivers. I suggest to revert this (and similar) commits. The code is already conditional, and it suffices to remove the option from files/options and if you want to remove the DEVICE_POLLING from the main code. cheers luigi On Tue, Nov 06, 2012 at 07:54:24PM +0000, Andre Oppermann wrote: > Author: andre > Date: Tue Nov 6 19:54:24 2012 > New Revision: 242670 > URL: http://svnweb.freebsd.org/changeset/base/242670 > > Log: > Remove polling support from em in preparation to try a different > approach. > > Modified: > user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > > Modified: user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > ============================================================================== > --- user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 19:51:54 2012 (r242669) > +++ user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 19:54:24 2012 (r242670) > @@ -33,7 +33,6 @@ > /*$FreeBSD$*/ > > #ifdef HAVE_KERNEL_OPTION_HEADERS > -#include "opt_device_polling.h" > #include "opt_inet.h" > #include "opt_inet6.h" > #endif > @@ -293,10 +292,6 @@ static int em_sysctl_eee(SYSCTL_HANDLER_ > > static __inline void em_rx_discard(struct rx_ring *, int); > > -#ifdef DEVICE_POLLING > -static poll_handler_t em_poll; > -#endif /* POLLING */ > - > /********************************************************************* > * FreeBSD Device Interface Entry Points > *********************************************************************/ > @@ -772,11 +767,6 @@ em_detach(device_t dev) > return (EBUSY); > } > > -#ifdef DEVICE_POLLING > - if (ifp->if_capenable & IFCAP_POLLING) > - ether_poll_deregister(ifp); > -#endif > - > if (adapter->led_dev != NULL) > led_destroy(adapter->led_dev); > > @@ -1162,10 +1152,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > EM_CORE_LOCK(adapter); > em_disable_intr(adapter); > em_set_multi(adapter); > -#ifdef DEVICE_POLLING > - if (!(ifp->if_capenable & IFCAP_POLLING)) > -#endif > - em_enable_intr(adapter); > + em_enable_intr(adapter); > EM_CORE_UNLOCK(adapter); > } > break; > @@ -1192,26 +1179,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); > reinit = 0; > mask = ifr->ifr_reqcap ^ ifp->if_capenable; > -#ifdef DEVICE_POLLING > - if (mask & IFCAP_POLLING) { > - if (ifr->ifr_reqcap & IFCAP_POLLING) { > - error = ether_poll_register(em_poll, ifp); > - if (error) > - return (error); > - EM_CORE_LOCK(adapter); > - em_disable_intr(adapter); > - ifp->if_capenable |= IFCAP_POLLING; > - EM_CORE_UNLOCK(adapter); > - } else { > - error = ether_poll_deregister(ifp); > - /* Enable interrupt even in error case */ > - EM_CORE_LOCK(adapter); > - em_enable_intr(adapter); > - ifp->if_capenable &= ~IFCAP_POLLING; > - EM_CORE_UNLOCK(adapter); > - } > - } > -#endif > + > if (mask & IFCAP_HWCSUM) { > ifp->if_capenable ^= IFCAP_HWCSUM; > reinit = 1; > @@ -1373,16 +1341,7 @@ em_init_locked(struct adapter *adapter) > E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); > } > > -#ifdef DEVICE_POLLING > - /* > - * Only enable interrupts if we are not polling, make sure > - * they are off otherwise. > - */ > - if (ifp->if_capenable & IFCAP_POLLING) > - em_disable_intr(adapter); > - else > -#endif /* DEVICE_POLLING */ > - em_enable_intr(adapter); > + em_enable_intr(adapter); > > /* AMT based hardware can now take control from firmware */ > if (adapter->has_manage && adapter->has_amt) > @@ -1399,58 +1358,6 @@ em_init(void *arg) > EM_CORE_UNLOCK(adapter); > } > > - > -#ifdef DEVICE_POLLING > -/********************************************************************* > - * > - * Legacy polling routine: note this only works with single queue > - * > - *********************************************************************/ > -static int > -em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) > -{ > - struct adapter *adapter = ifp->if_softc; > - struct tx_ring *txr = adapter->tx_rings; > - struct rx_ring *rxr = adapter->rx_rings; > - u32 reg_icr; > - int rx_done; > - > - EM_CORE_LOCK(adapter); > - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { > - EM_CORE_UNLOCK(adapter); > - return (0); > - } > - > - if (cmd == POLL_AND_CHECK_STATUS) { > - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); > - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { > - callout_stop(&adapter->timer); > - adapter->hw.mac.get_link_status = 1; > - em_update_link_status(adapter); > - callout_reset(&adapter->timer, hz, > - em_local_timer, adapter); > - } > - } > - EM_CORE_UNLOCK(adapter); > - > - em_rxeof(rxr, count, &rx_done); > - > - EM_TX_LOCK(txr); > - em_txeof(txr); > -#ifdef EM_MULTIQUEUE > - if (!drbr_empty(ifp, txr->br)) > - em_mq_start_locked(ifp, txr, NULL); > -#else > - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) > - em_start_locked(ifp, txr); > -#endif > - EM_TX_UNLOCK(txr); > - > - return (rx_done); > -} > -#endif /* DEVICE_POLLING */ > - > - > /********************************************************************* > * > * Fast Legacy/MSI Combined Interrupt Service routine > @@ -2256,10 +2163,8 @@ em_local_timer(void *arg) > > adapter->pause_frames = 0; > callout_reset(&adapter->timer, hz, em_local_timer, adapter); > -#ifndef DEVICE_POLLING > /* Trigger an RX interrupt to guarantee mbuf refresh */ > E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); > -#endif > return; > hung: > /* Looks like we're hung */ > @@ -2980,10 +2885,6 @@ em_setup_interface(device_t dev, struct > */ > ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; > > -#ifdef DEVICE_POLLING > - ifp->if_capabilities |= IFCAP_POLLING; > -#endif > - > /* Enable only WOL MAGIC by default */ > if (adapter->wol) { > ifp->if_capabilities |= IFCAP_WOL; > @@ -4388,7 +4289,6 @@ em_initialize_receive_unit(struct adapte > * We loop at most count times if count is > 0, or until done if > * count < 0. > * > - * For polling we also now return the number of cleaned packets > *********************************************************************/ > static bool > em_rxeof(struct rx_ring *rxr, int count, int *done) From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 22:07:40 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id B0FBF210; Tue, 6 Nov 2012 22:07:40 +0000 (UTC) (envelope-from jfvogel@gmail.com) Received: from mail-vb0-f54.google.com (mail-vb0-f54.google.com [209.85.212.54]) by mx1.freebsd.org (Postfix) with ESMTP id 39C5D8FC18; Tue, 6 Nov 2012 22:07:39 +0000 (UTC) Received: by mail-vb0-f54.google.com with SMTP id l1so1183043vba.13 for ; Tue, 06 Nov 2012 14:07:39 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:in-reply-to:references:date:message-id:subject:from:to :cc:content-type; bh=924A7yxp5BydW0m2S9LABSZNbZDo+2xbvsw5/QNgQ6A=; b=VCs9gjBHuEYJ7y1jl6M8roDodcF/Op3i0QBHa5BOfxD2iuCgoJHP1RdSF557p5p6LW t/ix4PXVjIDwOd3x6E6WpY2f3u6UUcgdJqUkihIZZkVpVo2tLnr+fj1s2yBChpTRhkfa i5Bsj6xkt/DTsDetyqhQn2RrotxinSjL6RASf1rROcMjx0iAwpgY2VVH5Ll6Oqz1HlJM 5RCg6M709vewUM0CrilhH/5Ewih7cnKCVofZFQm6MJmEB8Y6ZSN87ZfdZpc3/UMR1kK9 IPmrtn95Bt9FXrYLk+YRqMyj9ZmvnAHg/kPtmo/dOG7kze7COwlQ7Eu9yw6KHo8XbziN DblQ== MIME-Version: 1.0 Received: by 10.52.180.40 with SMTP id dl8mr1976020vdc.51.1352239659168; Tue, 06 Nov 2012 14:07:39 -0800 (PST) Received: by 10.59.3.165 with HTTP; Tue, 6 Nov 2012 14:07:39 -0800 (PST) In-Reply-To: <20121106220952.GA32652@onelab2.iet.unipi.it> References: <201211061954.qA6JsOP3038450@svn.freebsd.org> <20121106220952.GA32652@onelab2.iet.unipi.it> Date: Tue, 6 Nov 2012 14:07:39 -0800 Message-ID: Subject: Re: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 From: Jack Vogel To: Luigi Rizzo Content-Type: text/plain; charset=ISO-8859-1 X-Content-Filtered-By: Mailman/MimeDel 2.1.14 Cc: src-committers@freebsd.org, Andre Oppermann , svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 22:07:40 -0000 Its his own branch :) Jack On Tue, Nov 6, 2012 at 2:09 PM, Luigi Rizzo wrote: > One thing: > > while i believe device polling should go, > removing the conditional blocks from device drivers is both useless > and a mistake as it gratuitously introduces disalignment between > device drivers. > > I suggest to revert this (and similar) commits. > > The code is already conditional, and it suffices to > remove the option from files/options and if you want > to remove the DEVICE_POLLING from the main code. > > cheers > luigi > > > On Tue, Nov 06, 2012 at 07:54:24PM +0000, Andre Oppermann wrote: > > Author: andre > > Date: Tue Nov 6 19:54:24 2012 > > New Revision: 242670 > > URL: http://svnweb.freebsd.org/changeset/base/242670 > > > > Log: > > Remove polling support from em in preparation to try a different > > approach. > > > > Modified: > > user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > > > > Modified: user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > > > ============================================================================== > > --- user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 > 19:51:54 2012 (r242669) > > +++ user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 > 19:54:24 2012 (r242670) > > @@ -33,7 +33,6 @@ > > /*$FreeBSD$*/ > > > > #ifdef HAVE_KERNEL_OPTION_HEADERS > > -#include "opt_device_polling.h" > > #include "opt_inet.h" > > #include "opt_inet6.h" > > #endif > > @@ -293,10 +292,6 @@ static int em_sysctl_eee(SYSCTL_HANDLER_ > > > > static __inline void em_rx_discard(struct rx_ring *, int); > > > > -#ifdef DEVICE_POLLING > > -static poll_handler_t em_poll; > > -#endif /* POLLING */ > > - > > /********************************************************************* > > * FreeBSD Device Interface Entry Points > > *********************************************************************/ > > @@ -772,11 +767,6 @@ em_detach(device_t dev) > > return (EBUSY); > > } > > > > -#ifdef DEVICE_POLLING > > - if (ifp->if_capenable & IFCAP_POLLING) > > - ether_poll_deregister(ifp); > > -#endif > > - > > if (adapter->led_dev != NULL) > > led_destroy(adapter->led_dev); > > > > @@ -1162,10 +1152,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > > EM_CORE_LOCK(adapter); > > em_disable_intr(adapter); > > em_set_multi(adapter); > > -#ifdef DEVICE_POLLING > > - if (!(ifp->if_capenable & IFCAP_POLLING)) > > -#endif > > - em_enable_intr(adapter); > > + em_enable_intr(adapter); > > EM_CORE_UNLOCK(adapter); > > } > > break; > > @@ -1192,26 +1179,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > > IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set > Capabilities)"); > > reinit = 0; > > mask = ifr->ifr_reqcap ^ ifp->if_capenable; > > -#ifdef DEVICE_POLLING > > - if (mask & IFCAP_POLLING) { > > - if (ifr->ifr_reqcap & IFCAP_POLLING) { > > - error = ether_poll_register(em_poll, ifp); > > - if (error) > > - return (error); > > - EM_CORE_LOCK(adapter); > > - em_disable_intr(adapter); > > - ifp->if_capenable |= IFCAP_POLLING; > > - EM_CORE_UNLOCK(adapter); > > - } else { > > - error = ether_poll_deregister(ifp); > > - /* Enable interrupt even in error case */ > > - EM_CORE_LOCK(adapter); > > - em_enable_intr(adapter); > > - ifp->if_capenable &= ~IFCAP_POLLING; > > - EM_CORE_UNLOCK(adapter); > > - } > > - } > > -#endif > > + > > if (mask & IFCAP_HWCSUM) { > > ifp->if_capenable ^= IFCAP_HWCSUM; > > reinit = 1; > > @@ -1373,16 +1341,7 @@ em_init_locked(struct adapter *adapter) > > E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); > > } > > > > -#ifdef DEVICE_POLLING > > - /* > > - * Only enable interrupts if we are not polling, make sure > > - * they are off otherwise. > > - */ > > - if (ifp->if_capenable & IFCAP_POLLING) > > - em_disable_intr(adapter); > > - else > > -#endif /* DEVICE_POLLING */ > > - em_enable_intr(adapter); > > + em_enable_intr(adapter); > > > > /* AMT based hardware can now take control from firmware */ > > if (adapter->has_manage && adapter->has_amt) > > @@ -1399,58 +1358,6 @@ em_init(void *arg) > > EM_CORE_UNLOCK(adapter); > > } > > > > - > > -#ifdef DEVICE_POLLING > > -/********************************************************************* > > - * > > - * Legacy polling routine: note this only works with single queue > > - * > > - *********************************************************************/ > > -static int > > -em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) > > -{ > > - struct adapter *adapter = ifp->if_softc; > > - struct tx_ring *txr = adapter->tx_rings; > > - struct rx_ring *rxr = adapter->rx_rings; > > - u32 reg_icr; > > - int rx_done; > > - > > - EM_CORE_LOCK(adapter); > > - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { > > - EM_CORE_UNLOCK(adapter); > > - return (0); > > - } > > - > > - if (cmd == POLL_AND_CHECK_STATUS) { > > - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); > > - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { > > - callout_stop(&adapter->timer); > > - adapter->hw.mac.get_link_status = 1; > > - em_update_link_status(adapter); > > - callout_reset(&adapter->timer, hz, > > - em_local_timer, adapter); > > - } > > - } > > - EM_CORE_UNLOCK(adapter); > > - > > - em_rxeof(rxr, count, &rx_done); > > - > > - EM_TX_LOCK(txr); > > - em_txeof(txr); > > -#ifdef EM_MULTIQUEUE > > - if (!drbr_empty(ifp, txr->br)) > > - em_mq_start_locked(ifp, txr, NULL); > > -#else > > - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) > > - em_start_locked(ifp, txr); > > -#endif > > - EM_TX_UNLOCK(txr); > > - > > - return (rx_done); > > -} > > -#endif /* DEVICE_POLLING */ > > - > > - > > /********************************************************************* > > * > > * Fast Legacy/MSI Combined Interrupt Service routine > > @@ -2256,10 +2163,8 @@ em_local_timer(void *arg) > > > > adapter->pause_frames = 0; > > callout_reset(&adapter->timer, hz, em_local_timer, adapter); > > -#ifndef DEVICE_POLLING > > /* Trigger an RX interrupt to guarantee mbuf refresh */ > > E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); > > -#endif > > return; > > hung: > > /* Looks like we're hung */ > > @@ -2980,10 +2885,6 @@ em_setup_interface(device_t dev, struct > > */ > > ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; > > > > -#ifdef DEVICE_POLLING > > - ifp->if_capabilities |= IFCAP_POLLING; > > -#endif > > - > > /* Enable only WOL MAGIC by default */ > > if (adapter->wol) { > > ifp->if_capabilities |= IFCAP_WOL; > > @@ -4388,7 +4289,6 @@ em_initialize_receive_unit(struct adapte > > * We loop at most count times if count is > 0, or until done if > > * count < 0. > > * > > - * For polling we also now return the number of cleaned packets > > *********************************************************************/ > > static bool > > em_rxeof(struct rx_ring *rxr, int count, int *done) > From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 23:42:54 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 8A8BCCCA; Tue, 6 Nov 2012 23:42:54 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 700A48FC0A; Tue, 6 Nov 2012 23:42:54 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA6Ngslk089542; Tue, 6 Nov 2012 23:42:54 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA6NgsIa089539; Tue, 6 Nov 2012 23:42:54 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211062342.qA6NgsIa089539@svn.freebsd.org> From: Andre Oppermann Date: Tue, 6 Nov 2012 23:42:54 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242682 - user/andre/tcp_workqueue/sys/dev/bge X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 23:42:54 -0000 Author: andre Date: Tue Nov 6 23:42:54 2012 New Revision: 242682 URL: http://svnweb.freebsd.org/changeset/base/242682 Log: Change the bge(4) driver to use an interrupt filter and an ithread to handle RX and TX packets. Taskqueue is completely removed. The interrupt filter runs in interrupt context and only masks the NIC interrupt. Or for bge(4) the interrupt is only one-shot anyway so nothing has to be done. The step is left in place for reference. When the filter returns FILTER_SCHEDULE_THREAD the correspoding ithread is run and does the heavy packet lifting and DMA descriptor refilling. The entire setup of the interrupt filter and ithread is done with bus_setup_intr(). To prevent live-lock the ithread tries to yield after an arbitrary number of packets, 10 in this case. The function maybe_yield() takes a look at the number of consumed cycles/ticks and decides whether the ithread still has quantum left or not. If not it gets put onto the run queue and continues after other threads had their fair share. This work isn't complete yet and bge_ithr[_msix] and bge_rxeof need better coordination to be able to run in polling mode under load. Locking may be longer be necessary as there is only ever one ithread that services the DMA queues at least for RX. Depending on how TX is triggered locking may still be required. Theory of operation: intr_filter() disables the interrupt and lets the ithread get scheduled ithr_rxeof() does: while (new packets available in DMA ring) { dequeue and process packets; after a couple packets call maybe_yield(); after a couple packets re-sync DMA ring with HW; /* continue as long as new packets are available. */ } re-enable interrupt; return; This gives us polling efficiency under load while not going into live-lock and at the same time interrupt fast low latency when not under load. This change is not tested yet and committed as checkpoint. Discussed with and explained by: attilio Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c user/andre/tcp_workqueue/sys/dev/bge/if_bgereg.h Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c ============================================================================== --- user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Tue Nov 6 23:25:06 2012 (r242681) +++ user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Tue Nov 6 23:42:54 2012 (r242682) @@ -78,9 +78,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include -#include #include #include @@ -403,9 +403,9 @@ static struct mbuf *bge_setup_tso(struct uint16_t *, uint16_t *); static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); -static void bge_intr(void *); -static int bge_msi_intr(void *); -static void bge_intr_task(void *, int); +static int bge_intr_filter(void *); +static void bge_ithr_msix(void *); +static void bge_ithr(void *); static void bge_start_locked(struct ifnet *); static void bge_start(struct ifnet *); static int bge_ioctl(struct ifnet *, u_long, caddr_t); @@ -3221,7 +3221,6 @@ bge_attach(device_t dev) sc->bge_dev = dev; BGE_LOCK_INIT(sc, device_get_nameunit(dev)); - TASK_INIT(&sc->bge_intr_task, 0, bge_intr_task, sc); callout_init_mtx(&sc->bge_stat_ch, &sc->bge_mtx, 0); /* @@ -3837,23 +3836,13 @@ again: /* Take advantage of single-shot MSI. */ CSR_WRITE_4(sc, BGE_MSI_MODE, CSR_READ_4(sc, BGE_MSI_MODE) & ~BGE_MSIMODE_ONE_SHOT_DISABLE); - sc->bge_tq = taskqueue_create_fast("bge_taskq", M_WAITOK, - taskqueue_thread_enqueue, &sc->bge_tq); - if (sc->bge_tq == NULL) { - device_printf(dev, "could not create taskqueue.\n"); - ether_ifdetach(ifp); - error = ENOMEM; - goto fail; - } - taskqueue_start_threads(&sc->bge_tq, 1, PI_NET, "%s taskq", - device_get_nameunit(sc->bge_dev)); error = bus_setup_intr(dev, sc->bge_irq, - INTR_TYPE_NET | INTR_MPSAFE, bge_msi_intr, NULL, sc, - &sc->bge_intrhand); + INTR_TYPE_NET | INTR_MPSAFE, bge_intr_filter, + bge_ithr_msix, sc, &sc->bge_intrhand); } else error = bus_setup_intr(dev, sc->bge_irq, - INTR_TYPE_NET | INTR_MPSAFE, NULL, bge_intr, sc, - &sc->bge_intrhand); + INTR_TYPE_NET | INTR_MPSAFE, bge_intr_filter, + bge_ithr, sc, &sc->bge_intrhand); if (error) { ether_ifdetach(ifp); @@ -3888,9 +3877,6 @@ bge_detach(device_t dev) callout_drain(&sc->bge_stat_ch); } - if (sc->bge_tq) - taskqueue_drain(sc->bge_tq, &sc->bge_intr_task); - if (sc->bge_flags & BGE_FLAG_TBI) { ifmedia_removeall(&sc->bge_ifmedia); } else { @@ -3910,9 +3896,6 @@ bge_release_resources(struct bge_softc * dev = sc->bge_dev; - if (sc->bge_tq != NULL) - taskqueue_free(sc->bge_tq); - if (sc->bge_intrhand != NULL) bus_teardown_intr(dev, sc->bge_irq, sc->bge_intrhand); @@ -4221,6 +4204,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t { struct ifnet *ifp; int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; + int pkts = 0; uint16_t rx_cons; rx_cons = sc->bge_rx_saved_considx; @@ -4325,6 +4309,10 @@ bge_rxeof(struct bge_softc *sc, uint16_t if (holdlck != 0) { BGE_UNLOCK(sc); (*ifp->if_input)(ifp, m); + if (++pkts > 10) { + maybe_yield(); + pkts = 0; + } BGE_LOCK(sc); } else (*ifp->if_input)(ifp, m); @@ -4499,7 +4487,7 @@ bge_poll(struct ifnet *ifp, enum poll_cm #endif /* DEVICE_POLLING */ static int -bge_msi_intr(void *arg) +bge_intr_filter(void *arg) { struct bge_softc *sc; @@ -4508,12 +4496,11 @@ bge_msi_intr(void *arg) * This interrupt is not shared and controller already * disabled further interrupt. */ - taskqueue_enqueue(sc->bge_tq, &sc->bge_intr_task); - return (FILTER_HANDLED); + return (FILTER_SCHEDULE_THREAD); } static void -bge_intr_task(void *arg, int pending) +bge_ithr_msix(void *arg) { struct bge_softc *sc; struct ifnet *ifp; @@ -4567,10 +4554,11 @@ bge_intr_task(void *arg, int pending) bge_start_locked(ifp); } BGE_UNLOCK(sc); + return; } static void -bge_intr(void *xsc) +bge_ithr(void *xsc) { struct bge_softc *sc; struct ifnet *ifp; @@ -4648,6 +4636,7 @@ bge_intr(void *xsc) bge_start_locked(ifp); BGE_UNLOCK(sc); + return; } static void Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bgereg.h ============================================================================== --- user/andre/tcp_workqueue/sys/dev/bge/if_bgereg.h Tue Nov 6 23:25:06 2012 (r242681) +++ user/andre/tcp_workqueue/sys/dev/bge/if_bgereg.h Tue Nov 6 23:42:54 2012 (r242682) @@ -3024,8 +3024,6 @@ struct bge_softc { int rxcycles; #endif /* DEVICE_POLLING */ struct bge_mac_stats bge_mac_stats; - struct task bge_intr_task; - struct taskqueue *bge_tq; }; #define BGE_LOCK_INIT(_sc, _name) \ From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 23:46:09 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 06DF4F35; Tue, 6 Nov 2012 23:46:09 +0000 (UTC) (envelope-from luigi@onelab2.iet.unipi.it) Received: from onelab2.iet.unipi.it (onelab2.iet.unipi.it [131.114.59.238]) by mx1.freebsd.org (Postfix) with ESMTP id 60C5E8FC0C; Tue, 6 Nov 2012 23:46:07 +0000 (UTC) Received: by onelab2.iet.unipi.it (Postfix, from userid 275) id 980347300A; Wed, 7 Nov 2012 01:07:07 +0100 (CET) Date: Wed, 7 Nov 2012 01:07:07 +0100 From: Luigi Rizzo To: Jack Vogel Subject: Re: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 Message-ID: <20121107000707.GB32652@onelab2.iet.unipi.it> References: <201211061954.qA6JsOP3038450@svn.freebsd.org> <20121106220952.GA32652@onelab2.iet.unipi.it> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.4.2.3i Cc: src-committers@freebsd.org, Andre Oppermann , svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 23:46:09 -0000 On Tue, Nov 06, 2012 at 02:07:39PM -0800, Jack Vogel wrote: > Its his own branch :) true, and i totally missed this important detail :) But i do hope the removal lands into HEAD so i'd prefer that device drivers be left unchanged when this happens. cheers luigi > Jack > > > On Tue, Nov 6, 2012 at 2:09 PM, Luigi Rizzo wrote: > > > One thing: > > > > while i believe device polling should go, > > removing the conditional blocks from device drivers is both useless > > and a mistake as it gratuitously introduces disalignment between > > device drivers. > > > > I suggest to revert this (and similar) commits. > > > > The code is already conditional, and it suffices to > > remove the option from files/options and if you want > > to remove the DEVICE_POLLING from the main code. > > > > cheers > > luigi > > > > > > On Tue, Nov 06, 2012 at 07:54:24PM +0000, Andre Oppermann wrote: > > > Author: andre > > > Date: Tue Nov 6 19:54:24 2012 > > > New Revision: 242670 > > > URL: http://svnweb.freebsd.org/changeset/base/242670 > > > > > > Log: > > > Remove polling support from em in preparation to try a different > > > approach. > > > > > > Modified: > > > user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > > > > > > Modified: user/andre/tcp_workqueue/sys/dev/e1000/if_em.c > > > > > ============================================================================== > > > --- user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 > > 19:51:54 2012 (r242669) > > > +++ user/andre/tcp_workqueue/sys/dev/e1000/if_em.c Tue Nov 6 > > 19:54:24 2012 (r242670) > > > @@ -33,7 +33,6 @@ > > > /*$FreeBSD$*/ > > > > > > #ifdef HAVE_KERNEL_OPTION_HEADERS > > > -#include "opt_device_polling.h" > > > #include "opt_inet.h" > > > #include "opt_inet6.h" > > > #endif > > > @@ -293,10 +292,6 @@ static int em_sysctl_eee(SYSCTL_HANDLER_ > > > > > > static __inline void em_rx_discard(struct rx_ring *, int); > > > > > > -#ifdef DEVICE_POLLING > > > -static poll_handler_t em_poll; > > > -#endif /* POLLING */ > > > - > > > /********************************************************************* > > > * FreeBSD Device Interface Entry Points > > > *********************************************************************/ > > > @@ -772,11 +767,6 @@ em_detach(device_t dev) > > > return (EBUSY); > > > } > > > > > > -#ifdef DEVICE_POLLING > > > - if (ifp->if_capenable & IFCAP_POLLING) > > > - ether_poll_deregister(ifp); > > > -#endif > > > - > > > if (adapter->led_dev != NULL) > > > led_destroy(adapter->led_dev); > > > > > > @@ -1162,10 +1152,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > > > EM_CORE_LOCK(adapter); > > > em_disable_intr(adapter); > > > em_set_multi(adapter); > > > -#ifdef DEVICE_POLLING > > > - if (!(ifp->if_capenable & IFCAP_POLLING)) > > > -#endif > > > - em_enable_intr(adapter); > > > + em_enable_intr(adapter); > > > EM_CORE_UNLOCK(adapter); > > > } > > > break; > > > @@ -1192,26 +1179,7 @@ em_ioctl(struct ifnet *ifp, u_long comma > > > IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set > > Capabilities)"); > > > reinit = 0; > > > mask = ifr->ifr_reqcap ^ ifp->if_capenable; > > > -#ifdef DEVICE_POLLING > > > - if (mask & IFCAP_POLLING) { > > > - if (ifr->ifr_reqcap & IFCAP_POLLING) { > > > - error = ether_poll_register(em_poll, ifp); > > > - if (error) > > > - return (error); > > > - EM_CORE_LOCK(adapter); > > > - em_disable_intr(adapter); > > > - ifp->if_capenable |= IFCAP_POLLING; > > > - EM_CORE_UNLOCK(adapter); > > > - } else { > > > - error = ether_poll_deregister(ifp); > > > - /* Enable interrupt even in error case */ > > > - EM_CORE_LOCK(adapter); > > > - em_enable_intr(adapter); > > > - ifp->if_capenable &= ~IFCAP_POLLING; > > > - EM_CORE_UNLOCK(adapter); > > > - } > > > - } > > > -#endif > > > + > > > if (mask & IFCAP_HWCSUM) { > > > ifp->if_capenable ^= IFCAP_HWCSUM; > > > reinit = 1; > > > @@ -1373,16 +1341,7 @@ em_init_locked(struct adapter *adapter) > > > E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); > > > } > > > > > > -#ifdef DEVICE_POLLING > > > - /* > > > - * Only enable interrupts if we are not polling, make sure > > > - * they are off otherwise. > > > - */ > > > - if (ifp->if_capenable & IFCAP_POLLING) > > > - em_disable_intr(adapter); > > > - else > > > -#endif /* DEVICE_POLLING */ > > > - em_enable_intr(adapter); > > > + em_enable_intr(adapter); > > > > > > /* AMT based hardware can now take control from firmware */ > > > if (adapter->has_manage && adapter->has_amt) > > > @@ -1399,58 +1358,6 @@ em_init(void *arg) > > > EM_CORE_UNLOCK(adapter); > > > } > > > > > > - > > > -#ifdef DEVICE_POLLING > > > -/********************************************************************* > > > - * > > > - * Legacy polling routine: note this only works with single queue > > > - * > > > - *********************************************************************/ > > > -static int > > > -em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) > > > -{ > > > - struct adapter *adapter = ifp->if_softc; > > > - struct tx_ring *txr = adapter->tx_rings; > > > - struct rx_ring *rxr = adapter->rx_rings; > > > - u32 reg_icr; > > > - int rx_done; > > > - > > > - EM_CORE_LOCK(adapter); > > > - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { > > > - EM_CORE_UNLOCK(adapter); > > > - return (0); > > > - } > > > - > > > - if (cmd == POLL_AND_CHECK_STATUS) { > > > - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); > > > - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { > > > - callout_stop(&adapter->timer); > > > - adapter->hw.mac.get_link_status = 1; > > > - em_update_link_status(adapter); > > > - callout_reset(&adapter->timer, hz, > > > - em_local_timer, adapter); > > > - } > > > - } > > > - EM_CORE_UNLOCK(adapter); > > > - > > > - em_rxeof(rxr, count, &rx_done); > > > - > > > - EM_TX_LOCK(txr); > > > - em_txeof(txr); > > > -#ifdef EM_MULTIQUEUE > > > - if (!drbr_empty(ifp, txr->br)) > > > - em_mq_start_locked(ifp, txr, NULL); > > > -#else > > > - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) > > > - em_start_locked(ifp, txr); > > > -#endif > > > - EM_TX_UNLOCK(txr); > > > - > > > - return (rx_done); > > > -} > > > -#endif /* DEVICE_POLLING */ > > > - > > > - > > > /********************************************************************* > > > * > > > * Fast Legacy/MSI Combined Interrupt Service routine > > > @@ -2256,10 +2163,8 @@ em_local_timer(void *arg) > > > > > > adapter->pause_frames = 0; > > > callout_reset(&adapter->timer, hz, em_local_timer, adapter); > > > -#ifndef DEVICE_POLLING > > > /* Trigger an RX interrupt to guarantee mbuf refresh */ > > > E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); > > > -#endif > > > return; > > > hung: > > > /* Looks like we're hung */ > > > @@ -2980,10 +2885,6 @@ em_setup_interface(device_t dev, struct > > > */ > > > ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; > > > > > > -#ifdef DEVICE_POLLING > > > - ifp->if_capabilities |= IFCAP_POLLING; > > > -#endif > > > - > > > /* Enable only WOL MAGIC by default */ > > > if (adapter->wol) { > > > ifp->if_capabilities |= IFCAP_WOL; > > > @@ -4388,7 +4289,6 @@ em_initialize_receive_unit(struct adapte > > > * We loop at most count times if count is > 0, or until done if > > > * count < 0. > > > * > > > - * For polling we also now return the number of cleaned packets > > > *********************************************************************/ > > > static bool > > > em_rxeof(struct rx_ring *rxr, int count, int *done) > > From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 23:49:28 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 4A0E1348 for ; Tue, 6 Nov 2012 23:49:28 +0000 (UTC) (envelope-from andre@freebsd.org) Received: from c00l3r.networx.ch (c00l3r.networx.ch [62.48.2.2]) by mx1.freebsd.org (Postfix) with ESMTP id A56668FC14 for ; Tue, 6 Nov 2012 23:49:27 +0000 (UTC) Received: (qmail 55386 invoked from network); 7 Nov 2012 01:24:49 -0000 Received: from c00l3r.networx.ch (HELO [127.0.0.1]) ([62.48.2.2]) (envelope-sender ) by c00l3r.networx.ch (qmail-ldap-1.03) with SMTP for ; 7 Nov 2012 01:24:49 -0000 Message-ID: <5099A201.8060103@freebsd.org> Date: Wed, 07 Nov 2012 00:49:21 +0100 From: Andre Oppermann User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:16.0) Gecko/20121010 Thunderbird/16.0.1 MIME-Version: 1.0 To: Luigi Rizzo Subject: Re: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 References: <201211061954.qA6JsOP3038450@svn.freebsd.org> <20121106220952.GA32652@onelab2.iet.unipi.it> <20121107000707.GB32652@onelab2.iet.unipi.it> In-Reply-To: <20121107000707.GB32652@onelab2.iet.unipi.it> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: src-committers@freebsd.org, Jack Vogel , svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 23:49:28 -0000 On 07.11.2012 01:07, Luigi Rizzo wrote: > On Tue, Nov 06, 2012 at 02:07:39PM -0800, Jack Vogel wrote: >> Its his own branch :) > > true, and i totally missed this important detail :) It's my playground and I'm trying out the right approach. > But i do hope the removal lands into HEAD so i'd prefer that > device drivers be left unchanged when this happens. Yes. I'll commit the change as you suggested. We can discuss actual removal of code when we have a firm, working and proven alternative. -- Andre From owner-svn-src-user@FreeBSD.ORG Tue Nov 6 23:52:11 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 5B0B3511; Tue, 6 Nov 2012 23:52:11 +0000 (UTC) (envelope-from luigi@onelab2.iet.unipi.it) Received: from onelab2.iet.unipi.it (onelab2.iet.unipi.it [131.114.59.238]) by mx1.freebsd.org (Postfix) with ESMTP id 113D88FC0A; Tue, 6 Nov 2012 23:52:11 +0000 (UTC) Received: by onelab2.iet.unipi.it (Postfix, from userid 275) id 6A7F87300B; Wed, 7 Nov 2012 01:13:11 +0100 (CET) Date: Wed, 7 Nov 2012 01:13:11 +0100 From: Luigi Rizzo To: Andre Oppermann Subject: Re: svn commit: r242670 - user/andre/tcp_workqueue/sys/dev/e1000 Message-ID: <20121107001311.GD32652@onelab2.iet.unipi.it> References: <201211061954.qA6JsOP3038450@svn.freebsd.org> <20121106220952.GA32652@onelab2.iet.unipi.it> <20121107000707.GB32652@onelab2.iet.unipi.it> <5099A201.8060103@freebsd.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <5099A201.8060103@freebsd.org> User-Agent: Mutt/1.4.2.3i Cc: src-committers@freebsd.org, Jack Vogel , svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Nov 2012 23:52:11 -0000 On Wed, Nov 07, 2012 at 12:49:21AM +0100, Andre Oppermann wrote: > On 07.11.2012 01:07, Luigi Rizzo wrote: > >On Tue, Nov 06, 2012 at 02:07:39PM -0800, Jack Vogel wrote: > >>Its his own branch :) > > > >true, and i totally missed this important detail :) > > It's my playground and I'm trying out the right approach. > > >But i do hope the removal lands into HEAD so i'd prefer that > >device drivers be left unchanged when this happens. > > Yes. I'll commit the change as you suggested. We can discuss > actual removal of code when we have a firm, working and proven > alternative. thank you and my apologies for confusing your branch with head. cheers luigi From owner-svn-src-user@FreeBSD.ORG Thu Nov 8 20:15:13 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 9366CC21; Thu, 8 Nov 2012 20:15:13 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 751B08FC17; Thu, 8 Nov 2012 20:15:13 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA8KFDO6015306; Thu, 8 Nov 2012 20:15:13 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA8KFD1O015304; Thu, 8 Nov 2012 20:15:13 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211082015.qA8KFD1O015304@svn.freebsd.org> From: Alfred Perlstein Date: Thu, 8 Nov 2012 20:15:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242783 - in user/alfred/9-alfred/sys: i386/include kern X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Nov 2012 20:15:13 -0000 Author: alfred Date: Thu Nov 8 20:15:12 2012 New Revision: 242783 URL: http://svnweb.freebsd.org/changeset/base/242783 Log: Divorce autotune nmbclusters from maxusers. Provide arch specific override maximum. Suggested by: peter Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h user/alfred/9-alfred/sys/kern/kern_mbuf.c Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h ============================================================================== --- user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 18:11:31 2012 (r242782) +++ user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 20:15:12 2012 (r242783) @@ -202,4 +202,9 @@ #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +#ifndef MAX_AUTOTUNE_NMBCLUSTERS +/* old maxusers max value. */ +#define MAX_AUTOTUNE_NMBCLUSTERS (1024 + 384 * 64) +#endif + #endif /* _MACHINE_VMPARAM_H_ */ Modified: user/alfred/9-alfred/sys/kern/kern_mbuf.c ============================================================================== --- user/alfred/9-alfred/sys/kern/kern_mbuf.c Thu Nov 8 18:11:31 2012 (r242782) +++ user/alfred/9-alfred/sys/kern/kern_mbuf.c Thu Nov 8 20:15:12 2012 (r242783) @@ -102,6 +102,30 @@ int nmbjumbo9; /* limits number of 9k int nmbjumbo16; /* limits number of 16k jumbo clusters */ struct mbstat mbstat; +static int +nmbclusters_from_physpages(void) +{ + long factor; + long rv; + + factor = physmem / (2 * 1024 * 1024 / PAGE_SIZE); + if (factor < 32) + factor = 32; + /* after 384, switch scale to 1/4 */ + if (factor > 384) + factor = 384 + (factor - 384) / 4; + rv = 1024 + factor * 64; + /* + * allow a platform specific override to prevent exhausting + * kernel memory on large memory + small address space machines. + */ +#ifdef MAX_AUTOTUNE_NMBCLUSTERS + if (rv > MAX_AUTOTUNE_NMBCLUSTERS) + rv = MAX_AUTOTUNE_NMBCLUSTERS +#endif + return (rv); +} + /* * tunable_mbinit() has to be run before init_maxsockets() thus * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() @@ -114,7 +138,7 @@ tunable_mbinit(void *dummy) /* This has to be done before VM init. */ TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) - nmbclusters = 1024 + maxusers * 64; + nmbclusters = nmbclusters_from_physpages(); TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) From owner-svn-src-user@FreeBSD.ORG Thu Nov 8 22:40:24 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 934E02F5; Thu, 8 Nov 2012 22:40:24 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 7A7958FC1A; Thu, 8 Nov 2012 22:40:24 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA8MeOdS039621; Thu, 8 Nov 2012 22:40:24 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA8MeOQe039620; Thu, 8 Nov 2012 22:40:24 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211082240.qA8MeOQe039620@svn.freebsd.org> From: Andre Oppermann Date: Thu, 8 Nov 2012 22:40:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242802 - user/andre/tcp_workqueue/sys/net X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Nov 2012 22:40:24 -0000 Author: andre Date: Thu Nov 8 22:40:24 2012 New Revision: 242802 URL: http://svnweb.freebsd.org/changeset/base/242802 Log: Save this for later continuation: Different approach on ECMP (equal cost multi path) routing. Instead of having shadow rtentry's linked behind the one in the trie, just have an array of different egress interface and next-hops. This simplifies the code significantly. A normal add to the routing table then sets the main next- hop. A ECMP aware add or modify can change any of the other next hops in the rtentry. The next-hop list is sorted by priority (distance in Cisco parlance) with the highest first. Equal priority next-hops are next to each other sorted by next-hop IP address. If the highest priority has more than one next-hop load is equally shared among them. All other routes with lower weight are not used. When a higher priority next-hop is removed or flagged as unavailable (eg. interface link down) the next higher priority prefix will become active. When only unavailable next-hops are in the rtentry it is ignored and a less specific match is searched for. If no route is found the lookup will fail. When an unavailable next-hop becomes available again, the rtentry is valid again and will match on lookups. This way we can implement suppression of routes on link state down interfaces without having to actually remove the rtentry. Additionally it is automatically reinstated when the link comes back. This way for example a OSPF route can take precedence and NLRI reachability is ensured. This is the same behavior as of Cisco, Juniper and other router vendors. Modified: user/andre/tcp_workqueue/sys/net/route.h Modified: user/andre/tcp_workqueue/sys/net/route.h ============================================================================== --- user/andre/tcp_workqueue/sys/net/route.h Thu Nov 8 21:40:05 2012 (r242801) +++ user/andre/tcp_workqueue/sys/net/route.h Thu Nov 8 22:40:24 2012 (r242802) @@ -112,6 +112,14 @@ struct mbuf; #include #endif #endif +struct rtgw { + struct ifnet *rtgw_ifp; /* the answer: interface to use */ + struct sockaddr *rtgw_gateway; /* value */ + uint16_t rtgw_flags; /* nexthop flags */ + uint8_t rtgw_priority; /* nexthop weight */ +}; +#define RTGW_VALID 0x00000001 + struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* @@ -121,18 +129,19 @@ struct rtentry { */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) - struct sockaddr *rt_gateway; /* value */ int rt_flags; /* up/down?, host/net */ - int rt_refcnt; /* # held references */ - struct ifnet *rt_ifp; /* the answer: interface to use */ + struct rtgw rt_gw[8]; /* equal cost multipath */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ u_int rt_fibnum; /* which FIB */ + int rt_refcnt; /* # held references */ #ifdef _KERNEL /* XXX ugly, user apps use this definition but don't have a mtx def */ struct mtx rt_mtx; /* mutex for routing entry */ #endif }; +#define rt_ifp rt_gw[0].rtgw_ifp +#define rt_gateway rt_gw[0].rtgw_gateway /* * Following structure necessary for 4.3 compatibility; @@ -141,11 +150,11 @@ struct rtentry { struct ortentry { u_long rt_hash; /* to speed lookups */ struct sockaddr rt_dst; /* key */ - struct sockaddr rt_gateway; /* value */ + struct sockaddr rt_gateway_o; /* value */ short rt_flags; /* up/down?, host/net */ short rt_refcnt; /* # held references */ u_long rt_use; /* raw # packets forwarded */ - struct ifnet *rt_ifp; /* the answer: interface to use */ + struct ifnet *rt_ifp_o; /* the answer: interface to use */ }; #define rt_use rt_rmx.rmx_pksent From owner-svn-src-user@FreeBSD.ORG Thu Nov 8 23:24:03 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 6CC51B09; Thu, 8 Nov 2012 23:24:03 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 383F88FC16; Thu, 8 Nov 2012 23:24:03 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA8NO3Yb046826; Thu, 8 Nov 2012 23:24:03 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA8NO3h5046824; Thu, 8 Nov 2012 23:24:03 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211082324.qA8NO3h5046824@svn.freebsd.org> From: Alfred Perlstein Date: Thu, 8 Nov 2012 23:24:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242805 - in user/alfred/9-alfred/sys: i386/include kern X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Nov 2012 23:24:03 -0000 Author: alfred Date: Thu Nov 8 23:24:02 2012 New Revision: 242805 URL: http://svnweb.freebsd.org/changeset/base/242805 Log: clip maxusers based on MD VM_MAX_AUTOTUNE_MAXUSERS. Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h user/alfred/9-alfred/sys/kern/subr_param.c Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h ============================================================================== --- user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:21:02 2012 (r242804) +++ user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:24:02 2012 (r242805) @@ -202,9 +202,14 @@ #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +#ifndef VM_MAX_AUTOTUNE_MAXUSERS +#define VM_MAX_AUTOTUNE_MAXUSERS 384 +#endif + #ifndef MAX_AUTOTUNE_NMBCLUSTERS /* old maxusers max value. */ #define MAX_AUTOTUNE_NMBCLUSTERS (1024 + 384 * 64) #endif + #endif /* _MACHINE_VMPARAM_H_ */ Modified: user/alfred/9-alfred/sys/kern/subr_param.c ============================================================================== --- user/alfred/9-alfred/sys/kern/subr_param.c Thu Nov 8 23:21:02 2012 (r242804) +++ user/alfred/9-alfred/sys/kern/subr_param.c Thu Nov 8 23:24:02 2012 (r242805) @@ -278,16 +278,16 @@ init_param2(long physpages) maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE); if (maxusers < 32) maxusers = 32; +#ifdef VM_MAX_AUTOTUNE_MAXUSERS + if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS) + maxusers = VM_MAX_AUTOTUNE_MAXUSERS; +#endif /* - * Clips maxusers to 384 on machines with <= 4GB RAM or 32bit. - * Scales it down 6x for large memory machines. + * Scales down the function in which maxusers grows once + * we hit 384 */ - if (maxusers > 384) { - if (sizeof(void *) <= 4) - maxusers = 384; - else - maxusers = 384 + ((maxusers - 384) / 6); - } + if (maxusers > 384) + maxusers = 384 + ((maxusers - 384) / 4); } /* From owner-svn-src-user@FreeBSD.ORG Thu Nov 8 23:42:23 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 49AFB392; Thu, 8 Nov 2012 23:42:23 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 133428FC08; Thu, 8 Nov 2012 23:42:23 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA8NgMhk049437; Thu, 8 Nov 2012 23:42:22 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA8NgMEP049435; Thu, 8 Nov 2012 23:42:22 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211082342.qA8NgMEP049435@svn.freebsd.org> From: Alfred Perlstein Date: Thu, 8 Nov 2012 23:42:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242806 - in user/alfred/9-alfred/sys: i386/include kern X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Nov 2012 23:42:23 -0000 Author: alfred Date: Thu Nov 8 23:42:22 2012 New Revision: 242806 URL: http://svnweb.freebsd.org/changeset/base/242806 Log: Go back to basing nmbclusters on maxusers, however allow platform override of both limit and function. Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h user/alfred/9-alfred/sys/kern/kern_mbuf.c Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h ============================================================================== --- user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:24:02 2012 (r242805) +++ user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:42:22 2012 (r242806) @@ -206,10 +206,9 @@ #define VM_MAX_AUTOTUNE_MAXUSERS 384 #endif -#ifndef MAX_AUTOTUNE_NMBCLUSTERS +#ifndef VM_MAX_AUTOTUNE_NMBCLUSTERS /* old maxusers max value. */ -#define MAX_AUTOTUNE_NMBCLUSTERS (1024 + 384 * 64) +#define VM_MAX_AUTOTUNE_NMBCLUSTERS (1024 + VM_MAX_AUTOTUNE_MAXUSERS * 64) #endif - #endif /* _MACHINE_VMPARAM_H_ */ Modified: user/alfred/9-alfred/sys/kern/kern_mbuf.c ============================================================================== --- user/alfred/9-alfred/sys/kern/kern_mbuf.c Thu Nov 8 23:24:02 2012 (r242805) +++ user/alfred/9-alfred/sys/kern/kern_mbuf.c Thu Nov 8 23:42:22 2012 (r242806) @@ -102,30 +102,6 @@ int nmbjumbo9; /* limits number of 9k int nmbjumbo16; /* limits number of 16k jumbo clusters */ struct mbstat mbstat; -static int -nmbclusters_from_physpages(void) -{ - long factor; - long rv; - - factor = physmem / (2 * 1024 * 1024 / PAGE_SIZE); - if (factor < 32) - factor = 32; - /* after 384, switch scale to 1/4 */ - if (factor > 384) - factor = 384 + (factor - 384) / 4; - rv = 1024 + factor * 64; - /* - * allow a platform specific override to prevent exhausting - * kernel memory on large memory + small address space machines. - */ -#ifdef MAX_AUTOTUNE_NMBCLUSTERS - if (rv > MAX_AUTOTUNE_NMBCLUSTERS) - rv = MAX_AUTOTUNE_NMBCLUSTERS -#endif - return (rv); -} - /* * tunable_mbinit() has to be run before init_maxsockets() thus * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() @@ -137,8 +113,17 @@ tunable_mbinit(void *dummy) /* This has to be done before VM init. */ TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); - if (nmbclusters == 0) - nmbclusters = nmbclusters_from_physpages(); + if (nmbclusters == 0) { +#ifdef VM_AUTOTUNE_NMBCLUSTERS + nmbclusters = VM_AUTOTUNE_NMBCLUSTERS; +#else + nmbclusters = 1024 + maxusers * 64; +#endif +#ifdef VM_MAX_AUTOTUNE_NMBCLUSTERS + if (rv > VM_MAX_AUTOTUNE_NMBCLUSTERS) + rv = VM_MAX_AUTOTUNE_NMBCLUSTERS; +#endif + } TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) From owner-svn-src-user@FreeBSD.ORG Fri Nov 9 00:40:38 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 72FAFFDA for ; Fri, 9 Nov 2012 00:40:38 +0000 (UTC) (envelope-from lists@eitanadler.com) Received: from mail-la0-f54.google.com (mail-la0-f54.google.com [209.85.215.54]) by mx1.freebsd.org (Postfix) with ESMTP id D68C98FC15 for ; Fri, 9 Nov 2012 00:40:37 +0000 (UTC) Received: by mail-la0-f54.google.com with SMTP id e12so3282671lag.13 for ; Thu, 08 Nov 2012 16:40:31 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=eitanadler.com; s=0xdeadbeef; h=mime-version:sender:in-reply-to:references:from:date :x-google-sender-auth:message-id:subject:to:cc:content-type; bh=yPYEU+9/hzXvd5wvg+oyOpavs5F4JxQRafefHp6V1DM=; b=orZL3F89mojVTS0Yzu2ySSvo+3hWctRBnLX5zftUVOqmSjmbgN9z1C6wRGItSg5CXw 1JA8NKLEj3WOnGZg7ZFH3+tbOAC3berQKOPJrLOsFKJVKwwMIbQGFcEUqxA/TwiPi+Me KX9KhY6WOawVv+mgfkXYn4aoUcHCbih3ijgf0= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=mime-version:sender:in-reply-to:references:from:date :x-google-sender-auth:message-id:subject:to:cc:content-type :x-gm-message-state; bh=yPYEU+9/hzXvd5wvg+oyOpavs5F4JxQRafefHp6V1DM=; b=fW1w6pJrOQR1OG6SO74+C93HDjax04RIuduEIYYxbkJUlLdhD61tP74NV4tFWB/Sls osgd0w3Lioieq9bYKY/POa0djoS+wV5uT15YCF6bQY0sCnqYnkC5lnIR2+Il5SZexmXb RGQMKmPA/Sbp2E8Z5BCvF48RhQEKph5QD24WxUTscwWZXIZ4gUszHsuqvjNRfRd/J1HT zPo7OsmTZ/LmDgzYCI9FIzSN2Xa7T5WPC2Se4l8356Tm5RoPyrZsEqav+dOvS4QSVbDh nDg1U0sFERXI+t1JAQuJ+bX1CCipwP65ErR3iCIDmgkGn5DWrJb6ecb31p3IgYjqDc7+ TTDg== Received: by 10.112.104.2 with SMTP id ga2mr4037280lbb.48.1352421631299; Thu, 08 Nov 2012 16:40:31 -0800 (PST) MIME-Version: 1.0 Sender: lists@eitanadler.com Received: by 10.112.25.166 with HTTP; Thu, 8 Nov 2012 16:40:00 -0800 (PST) In-Reply-To: <201211082324.qA8NO3h5046824@svn.freebsd.org> References: <201211082324.qA8NO3h5046824@svn.freebsd.org> From: Eitan Adler Date: Thu, 8 Nov 2012 19:40:00 -0500 X-Google-Sender-Auth: ZD8WhNsciqiAxeAxrRCuQGccCKA Message-ID: Subject: Re: svn commit: r242805 - in user/alfred/9-alfred/sys: i386/include kern To: Alfred Perlstein Content-Type: text/plain; charset=UTF-8 X-Gm-Message-State: ALoCoQmUMbooxKWur+uzT9ZK5VzeH56cBBDwlr2ENHonHd+ps3S/90X8bU2sqL1fT9CSIWNxEhhl Cc: src-committers@freebsd.org, svn-src-user@freebsd.org X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Nov 2012 00:40:38 -0000 On 8 November 2012 18:24, Alfred Perlstein wrote: > Author: alfred > Date: Thu Nov 8 23:24:02 2012 > New Revision: 242805 > URL: http://svnweb.freebsd.org/changeset/base/242805 > > Log: > clip maxusers based on MD VM_MAX_AUTOTUNE_MAXUSERS. > > Modified: > user/alfred/9-alfred/sys/i386/include/vmparam.h > user/alfred/9-alfred/sys/kern/subr_param.c > > Modified: user/alfred/9-alfred/sys/i386/include/vmparam.h > ============================================================================== > --- user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:21:02 2012 (r242804) > +++ user/alfred/9-alfred/sys/i386/include/vmparam.h Thu Nov 8 23:24:02 2012 (r242805) > @@ -202,9 +202,14 @@ > > #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ > > +#ifndef VM_MAX_AUTOTUNE_MAXUSERS > +#define VM_MAX_AUTOTUNE_MAXUSERS 384 > +#endif > + > #ifndef MAX_AUTOTUNE_NMBCLUSTERS > /* old maxusers max value. */ > #define MAX_AUTOTUNE_NMBCLUSTERS (1024 + 384 * 64) > #endif > > + > #endif /* _MACHINE_VMPARAM_H_ */ > > Modified: user/alfred/9-alfred/sys/kern/subr_param.c > ============================================================================== > --- user/alfred/9-alfred/sys/kern/subr_param.c Thu Nov 8 23:21:02 2012 (r242804) > +++ user/alfred/9-alfred/sys/kern/subr_param.c Thu Nov 8 23:24:02 2012 (r242805) > @@ -278,16 +278,16 @@ init_param2(long physpages) > maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE); > if (maxusers < 32) > maxusers = 32; > +#ifdef VM_MAX_AUTOTUNE_MAXUSERS > + if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS) > + maxusers = VM_MAX_AUTOTUNE_MAXUSERS; > +#endif > /* > - * Clips maxusers to 384 on machines with <= 4GB RAM or 32bit. > - * Scales it down 6x for large memory machines. > + * Scales down the function in which maxusers grows once > + * we hit 384 > */ Could the number 384 be clarified here. Where is it coming from? -- Eitan Adler Source, Ports, Doc committer Bugmeister, Ports Security teams From owner-svn-src-user@FreeBSD.ORG Fri Nov 9 00:59:50 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 009E265F; Fri, 9 Nov 2012 00:59:49 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id D91918FC0A; Fri, 9 Nov 2012 00:59:49 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA90xnCa062101; Fri, 9 Nov 2012 00:59:49 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA90xn88062100; Fri, 9 Nov 2012 00:59:49 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201211090059.qA90xn88062100@svn.freebsd.org> From: Alfred Perlstein Date: Fri, 9 Nov 2012 00:59:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242812 - user/alfred/9-alfred/sys/kern X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Nov 2012 00:59:50 -0000 Author: alfred Date: Fri Nov 9 00:59:49 2012 New Revision: 242812 URL: http://svnweb.freebsd.org/changeset/base/242812 Log: cleanup holdover from non-maxusers version Modified: user/alfred/9-alfred/sys/kern/kern_mbuf.c Modified: user/alfred/9-alfred/sys/kern/kern_mbuf.c ============================================================================== --- user/alfred/9-alfred/sys/kern/kern_mbuf.c Fri Nov 9 00:35:55 2012 (r242811) +++ user/alfred/9-alfred/sys/kern/kern_mbuf.c Fri Nov 9 00:59:49 2012 (r242812) @@ -120,8 +120,8 @@ tunable_mbinit(void *dummy) nmbclusters = 1024 + maxusers * 64; #endif #ifdef VM_MAX_AUTOTUNE_NMBCLUSTERS - if (rv > VM_MAX_AUTOTUNE_NMBCLUSTERS) - rv = VM_MAX_AUTOTUNE_NMBCLUSTERS; + if (nmbclusters > VM_MAX_AUTOTUNE_NMBCLUSTERS) + nmbclusters = VM_MAX_AUTOTUNE_NMBCLUSTERS; #endif } From owner-svn-src-user@FreeBSD.ORG Fri Nov 9 17:47:55 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 6386492A; Fri, 9 Nov 2012 17:47:55 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 467478FC12; Fri, 9 Nov 2012 17:47:55 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA9HltCU023663; Fri, 9 Nov 2012 17:47:55 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA9HltMn023656; Fri, 9 Nov 2012 17:47:55 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211091747.qA9HltMn023656@svn.freebsd.org> From: Andre Oppermann Date: Fri, 9 Nov 2012 17:47:55 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242831 - in user/andre/tcp_workqueue/sys: amd64/amd64 arm/arm arm/at91 arm/broadcom/bcm2835 arm/include arm/lpc arm/mv arm/tegra arm/ti boot/common boot/forth boot/i386/boot2 boot/i386... X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Nov 2012 17:47:55 -0000 Author: andre Date: Fri Nov 9 17:47:54 2012 New Revision: 242831 URL: http://svnweb.freebsd.org/changeset/base/242831 Log: Integrate from HEAD @242829, except for r242601 which is the backout of r242262. Added: user/andre/tcp_workqueue/sys/boot/forth/menusets.4th - copied unchanged from r242829, head/sys/boot/forth/menusets.4th user/andre/tcp_workqueue/sys/boot/forth/menusets.4th.8 - copied unchanged from r242829, head/sys/boot/forth/menusets.4th.8 user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/powerpc/ - copied from r242829, head/sys/cddl/contrib/opensolaris/uts/powerpc/ user/andre/tcp_workqueue/sys/cddl/dev/dtrace/powerpc/ - copied from r242829, head/sys/cddl/dev/dtrace/powerpc/ user/andre/tcp_workqueue/sys/dev/ath/if_ath_alq.c - copied unchanged from r242829, head/sys/dev/ath/if_ath_alq.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_alq.h - copied unchanged from r242829, head/sys/dev/ath/if_ath_alq.h Modified: user/andre/tcp_workqueue/sys/amd64/amd64/identcpu.c user/andre/tcp_workqueue/sys/amd64/amd64/pmap.c user/andre/tcp_workqueue/sys/arm/arm/machdep.c user/andre/tcp_workqueue/sys/arm/at91/at91_machdep.c user/andre/tcp_workqueue/sys/arm/broadcom/bcm2835/bcm2835_machdep.c user/andre/tcp_workqueue/sys/arm/include/machdep.h user/andre/tcp_workqueue/sys/arm/lpc/lpc_gpio.c user/andre/tcp_workqueue/sys/arm/lpc/lpc_machdep.c user/andre/tcp_workqueue/sys/arm/mv/mv_machdep.c user/andre/tcp_workqueue/sys/arm/tegra/tegra2_machdep.c user/andre/tcp_workqueue/sys/arm/ti/ti_machdep.c user/andre/tcp_workqueue/sys/boot/common/Makefile.inc user/andre/tcp_workqueue/sys/boot/forth/loader.4th user/andre/tcp_workqueue/sys/boot/forth/menu-commands.4th user/andre/tcp_workqueue/sys/boot/forth/menu.4th user/andre/tcp_workqueue/sys/boot/i386/boot2/sio.S user/andre/tcp_workqueue/sys/boot/i386/loader/Makefile user/andre/tcp_workqueue/sys/boot/ia64/common/Makefile user/andre/tcp_workqueue/sys/boot/pc98/boot2/Makefile user/andre/tcp_workqueue/sys/boot/pc98/cdboot/Makefile user/andre/tcp_workqueue/sys/boot/pc98/loader/Makefile user/andre/tcp_workqueue/sys/boot/powerpc/ofw/Makefile user/andre/tcp_workqueue/sys/boot/powerpc/ps3/Makefile user/andre/tcp_workqueue/sys/boot/sparc64/loader/Makefile user/andre/tcp_workqueue/sys/cam/scsi/scsi_enc_ses.c user/andre/tcp_workqueue/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c user/andre/tcp_workqueue/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c user/andre/tcp_workqueue/sys/cddl/dev/lockstat/lockstat.c user/andre/tcp_workqueue/sys/cddl/dev/profile/profile.c user/andre/tcp_workqueue/sys/conf/files.powerpc user/andre/tcp_workqueue/sys/conf/kern.pre.mk user/andre/tcp_workqueue/sys/conf/options user/andre/tcp_workqueue/sys/contrib/ngatm/netnatm/msg/uni_ie.c user/andre/tcp_workqueue/sys/dev/aac/aac_debug.c user/andre/tcp_workqueue/sys/dev/ahci/ahciem.c user/andre/tcp_workqueue/sys/dev/ale/if_ale.c user/andre/tcp_workqueue/sys/dev/asmc/asmc.c user/andre/tcp_workqueue/sys/dev/ata/ata-card.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-acard.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-acerlabs.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-adaptec.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-amd.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-ati.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-highpoint.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-intel.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-ite.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-jmicron.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-marvell.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-nvidia.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-promise.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-serverworks.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-siliconimage.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-sis.c user/andre/tcp_workqueue/sys/dev/ata/chipsets/ata-via.c user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ah.h user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ah_debug.h user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ah_internal.h user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ar5212/ar5212.h user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ar5416/ar5416.h user/andre/tcp_workqueue/sys/dev/ath/ath_hal/ar5416/ar5416_xmit.c user/andre/tcp_workqueue/sys/dev/ath/if_ath.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_debug.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_debug.h user/andre/tcp_workqueue/sys/dev/ath/if_ath_rx.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_rx_edma.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_sysctl.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_tx.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_tx_edma.c user/andre/tcp_workqueue/sys/dev/ath/if_ath_tx_ht.c user/andre/tcp_workqueue/sys/dev/ath/if_athvar.h user/andre/tcp_workqueue/sys/dev/bge/if_bge.c user/andre/tcp_workqueue/sys/dev/bktr/bktr_audio.c user/andre/tcp_workqueue/sys/dev/cas/if_cas.c user/andre/tcp_workqueue/sys/dev/cpuctl/cpuctl.c user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_cpl_io.c user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_listen.c user/andre/tcp_workqueue/sys/dev/dc/if_dc.c user/andre/tcp_workqueue/sys/dev/drm/via_dma.c user/andre/tcp_workqueue/sys/dev/drm/via_dmablit.c user/andre/tcp_workqueue/sys/dev/etherswitch/arswitch/arswitch.c user/andre/tcp_workqueue/sys/dev/flash/at45d.c user/andre/tcp_workqueue/sys/dev/fxp/if_fxp.c user/andre/tcp_workqueue/sys/dev/gem/if_gem_pci.c user/andre/tcp_workqueue/sys/dev/lge/if_lge.c user/andre/tcp_workqueue/sys/dev/md/md.c user/andre/tcp_workqueue/sys/dev/mfi/mfi.c user/andre/tcp_workqueue/sys/dev/mfi/mfi_cam.c user/andre/tcp_workqueue/sys/dev/mfi/mfi_disk.c user/andre/tcp_workqueue/sys/dev/mfi/mfi_syspd.c user/andre/tcp_workqueue/sys/dev/mfi/mfi_tbolt.c user/andre/tcp_workqueue/sys/dev/mfi/mfivar.h user/andre/tcp_workqueue/sys/dev/mii/mii.c user/andre/tcp_workqueue/sys/dev/mn/if_mn.c user/andre/tcp_workqueue/sys/dev/nge/if_nge.c user/andre/tcp_workqueue/sys/dev/nxge/xgehal/xgehal-device.c user/andre/tcp_workqueue/sys/dev/pci/pci.c user/andre/tcp_workqueue/sys/dev/puc/pucdata.c user/andre/tcp_workqueue/sys/dev/re/if_re.c user/andre/tcp_workqueue/sys/dev/sis/if_sis.c user/andre/tcp_workqueue/sys/dev/sound/pci/emu10kx.c user/andre/tcp_workqueue/sys/dev/ste/if_ste.c user/andre/tcp_workqueue/sys/dev/stge/if_stge.c user/andre/tcp_workqueue/sys/dev/syscons/scvidctl.c user/andre/tcp_workqueue/sys/dev/ti/if_ti.c user/andre/tcp_workqueue/sys/dev/tl/if_tl.c user/andre/tcp_workqueue/sys/dev/twa/tw_cl_misc.c user/andre/tcp_workqueue/sys/dev/uart/uart_bus_acpi.c user/andre/tcp_workqueue/sys/dev/usb/controller/dwc_otg.c user/andre/tcp_workqueue/sys/dev/usb/controller/dwc_otg.h user/andre/tcp_workqueue/sys/dev/usb/controller/dwc_otgreg.h user/andre/tcp_workqueue/sys/dev/usb/controller/ehci.c user/andre/tcp_workqueue/sys/dev/usb/controller/ehci.h user/andre/tcp_workqueue/sys/dev/usb/net/if_udav.c user/andre/tcp_workqueue/sys/dev/usb/quirk/usb_quirk.c user/andre/tcp_workqueue/sys/dev/usb/quirk/usb_quirk.h user/andre/tcp_workqueue/sys/dev/usb/serial/usb_serial.c user/andre/tcp_workqueue/sys/dev/usb/serial/usb_serial.h user/andre/tcp_workqueue/sys/dev/usb/storage/umass.c user/andre/tcp_workqueue/sys/dev/usb/usbdevs user/andre/tcp_workqueue/sys/dev/vr/if_vr.c user/andre/tcp_workqueue/sys/dev/wb/if_wb.c user/andre/tcp_workqueue/sys/dev/xl/if_xl.c user/andre/tcp_workqueue/sys/fs/fuse/fuse_file.c user/andre/tcp_workqueue/sys/fs/fuse/fuse_internal.c user/andre/tcp_workqueue/sys/fs/fuse/fuse_internal.h user/andre/tcp_workqueue/sys/fs/fuse/fuse_io.c user/andre/tcp_workqueue/sys/fs/fuse/fuse_node.c user/andre/tcp_workqueue/sys/fs/fuse/fuse_node.h user/andre/tcp_workqueue/sys/fs/fuse/fuse_vnops.c user/andre/tcp_workqueue/sys/i386/i386/pmap.c user/andre/tcp_workqueue/sys/ia64/ia64/pmap.c user/andre/tcp_workqueue/sys/kern/kern_malloc.c user/andre/tcp_workqueue/sys/kern/kern_rwlock.c user/andre/tcp_workqueue/sys/kern/sched_ule.c user/andre/tcp_workqueue/sys/kern/tty.c user/andre/tcp_workqueue/sys/kern/vfs_subr.c user/andre/tcp_workqueue/sys/libkern/strlcpy.c user/andre/tcp_workqueue/sys/libkern/strlen.c user/andre/tcp_workqueue/sys/mips/conf/AP91.hints user/andre/tcp_workqueue/sys/mips/conf/AP93.hints user/andre/tcp_workqueue/sys/mips/conf/AP96.hints user/andre/tcp_workqueue/sys/mips/conf/RSPRO.hints user/andre/tcp_workqueue/sys/mips/mips/pmap.c user/andre/tcp_workqueue/sys/modules/Makefile user/andre/tcp_workqueue/sys/modules/dtrace/Makefile user/andre/tcp_workqueue/sys/modules/nxge/Makefile user/andre/tcp_workqueue/sys/net/bpf.c user/andre/tcp_workqueue/sys/netinet/sctp_constants.h user/andre/tcp_workqueue/sys/netinet/sctp_indata.c user/andre/tcp_workqueue/sys/netinet/sctp_input.c user/andre/tcp_workqueue/sys/netinet/sctp_output.c user/andre/tcp_workqueue/sys/netinet/sctp_pcb.c user/andre/tcp_workqueue/sys/netinet/sctp_structs.h user/andre/tcp_workqueue/sys/netinet/sctp_timer.c user/andre/tcp_workqueue/sys/netinet/sctputil.c user/andre/tcp_workqueue/sys/netinet/tcp_output.c user/andre/tcp_workqueue/sys/netinet/tcp_subr.c user/andre/tcp_workqueue/sys/netpfil/ipfw/ip_fw_dynamic.c user/andre/tcp_workqueue/sys/netpfil/ipfw/ip_fw_nat.c user/andre/tcp_workqueue/sys/netpfil/ipfw/ip_fw_private.h user/andre/tcp_workqueue/sys/netpfil/pf/if_pfsync.c user/andre/tcp_workqueue/sys/pci/if_rl.c user/andre/tcp_workqueue/sys/powerpc/aim/locore32.S user/andre/tcp_workqueue/sys/powerpc/aim/locore64.S user/andre/tcp_workqueue/sys/powerpc/aim/mmu_oea.c user/andre/tcp_workqueue/sys/powerpc/aim/trap.c user/andre/tcp_workqueue/sys/powerpc/aim/trap_subr32.S user/andre/tcp_workqueue/sys/powerpc/aim/trap_subr64.S user/andre/tcp_workqueue/sys/powerpc/booke/locore.S user/andre/tcp_workqueue/sys/powerpc/booke/machdep.c user/andre/tcp_workqueue/sys/powerpc/booke/platform_bare.c user/andre/tcp_workqueue/sys/powerpc/booke/pmap.c user/andre/tcp_workqueue/sys/powerpc/conf/GENERIC user/andre/tcp_workqueue/sys/sparc64/include/pmap.h user/andre/tcp_workqueue/sys/sparc64/pci/fire.c user/andre/tcp_workqueue/sys/sparc64/pci/psycho.c user/andre/tcp_workqueue/sys/sparc64/pci/schizo.c user/andre/tcp_workqueue/sys/sparc64/sparc64/pmap.c user/andre/tcp_workqueue/sys/sys/_rwlock.h user/andre/tcp_workqueue/sys/sys/param.h user/andre/tcp_workqueue/sys/sys/rwlock.h user/andre/tcp_workqueue/sys/sys/tty.h user/andre/tcp_workqueue/sys/ufs/ffs/ffs_alloc.c user/andre/tcp_workqueue/sys/ufs/ffs/ffs_balloc.c user/andre/tcp_workqueue/sys/ufs/ffs/ffs_softdep.c Directory Properties: user/andre/tcp_workqueue/sys/ (props changed) user/andre/tcp_workqueue/sys/boot/ (props changed) user/andre/tcp_workqueue/sys/boot/powerpc/ofw/ (props changed) user/andre/tcp_workqueue/sys/cddl/contrib/opensolaris/ (props changed) user/andre/tcp_workqueue/sys/conf/ (props changed) Modified: user/andre/tcp_workqueue/sys/amd64/amd64/identcpu.c ============================================================================== --- user/andre/tcp_workqueue/sys/amd64/amd64/identcpu.c Fri Nov 9 17:46:07 2012 (r242830) +++ user/andre/tcp_workqueue/sys/amd64/amd64/identcpu.c Fri Nov 9 17:47:54 2012 (r242831) @@ -481,7 +481,7 @@ SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, void identify_cpu(void) { - u_int regs[4]; + u_int regs[4], cpu_stdext_disable; do_cpuid(0, regs); cpu_high = regs[0]; @@ -516,6 +516,20 @@ identify_cpu(void) if (cpu_high >= 7) { cpuid_count(7, 0, regs); cpu_stdext_feature = regs[1]; + + /* + * Some hypervisors fail to filter out unsupported + * extended features. For now, disable the + * extensions, activation of which requires setting a + * bit in CR4, and which VM monitors do not support. + */ + if (cpu_feature2 & CPUID2_HV) { + cpu_stdext_disable = CPUID_STDEXT_FSGSBASE | + CPUID_STDEXT_SMEP; + } else + cpu_stdext_disable = 0; + TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); + cpu_stdext_feature &= ~cpu_stdext_disable; } if (cpu_vendor_id == CPU_VENDOR_INTEL || Modified: user/andre/tcp_workqueue/sys/amd64/amd64/pmap.c ============================================================================== --- user/andre/tcp_workqueue/sys/amd64/amd64/pmap.c Fri Nov 9 17:46:07 2012 (r242830) +++ user/andre/tcp_workqueue/sys/amd64/amd64/pmap.c Fri Nov 9 17:47:54 2012 (r242831) @@ -225,16 +225,7 @@ u_int64_t KPML4phys; /* phys addr of ke static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ -/* - * Isolate the global pv list lock from data and other locks to prevent false - * sharing within the cache. - */ -static struct { - struct rwlock lock; - char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)]; -} pvh_global __aligned(CACHE_LINE_SIZE); - -#define pvh_global_lock pvh_global.lock +static struct rwlock_padalign pvh_global_lock; /* * Data for the pv entry allocation mechanism Modified: user/andre/tcp_workqueue/sys/arm/arm/machdep.c ============================================================================== --- user/andre/tcp_workqueue/sys/arm/arm/machdep.c Fri Nov 9 17:46:07 2012 (r242830) +++ user/andre/tcp_workqueue/sys/arm/arm/machdep.c Fri Nov 9 17:47:54 2012 (r242831) @@ -44,6 +44,7 @@ #include "opt_compat.h" #include "opt_ddb.h" +#include "opt_platform.h" #include "opt_timer.h" #include @@ -59,11 +60,13 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -94,6 +97,17 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef FDT +#include +#include +#endif + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; @@ -114,6 +128,35 @@ extern int *end; extern vm_offset_t ksym_start, ksym_end; #endif +#ifdef FDT +/* + * This is the number of L2 page tables required for covering max + * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, + * stacks etc.), uprounded to be divisible by 4. + */ +#define KERNEL_PT_MAX 78 + +static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; + +vm_paddr_t phys_avail[10]; +vm_paddr_t dump_avail[4]; + +extern u_int data_abort_handler_address; +extern u_int prefetch_abort_handler_address; +extern u_int undefined_handler_address; + +vm_paddr_t pmap_pa; + +struct pv_addr systempage; +static struct pv_addr msgbufpv; +struct pv_addr irqstack; +struct pv_addr undstack; +struct pv_addr abtstack; +static struct pv_addr kernelstack; + +const struct pmap_devmap *pmap_devmap_bootstrap_table; +#endif + #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 @@ -961,3 +1004,407 @@ set_stackptrs(int cpu) undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } +#ifdef FDT +static char * +kenv_next(char *cp) +{ + + if (cp != NULL) { + while (*cp != 0) + cp++; + cp++; + if (*cp == 0) + cp = NULL; + } + return (cp); +} + +static void +print_kenv(void) +{ + int len; + char *cp; + + debugf("loader passed (static) kenv:\n"); + if (kern_envp == NULL) { + debugf(" no env, null ptr\n"); + return; + } + debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp); + + len = 0; + for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) + debugf(" %x %s\n", (uint32_t)cp, cp); +} + +static void +print_kernel_section_addr(void) +{ + + debugf("kernel image addresses:\n"); + debugf(" kernbase = 0x%08x\n", (uint32_t)kernbase); + debugf(" _etext (sdata) = 0x%08x\n", (uint32_t)_etext); + debugf(" _edata = 0x%08x\n", (uint32_t)_edata); + debugf(" __bss_start = 0x%08x\n", (uint32_t)__bss_start); + debugf(" _end = 0x%08x\n", (uint32_t)_end); +} + +static void +physmap_init(struct mem_region *availmem_regions, int availmem_regions_sz) +{ + int i, j, cnt; + vm_offset_t phys_kernelend, kernload; + uint32_t s, e, sz; + struct mem_region *mp, *mp1; + + phys_kernelend = KERNPHYSADDR + (virtual_avail - KERNVIRTADDR); + kernload = KERNPHYSADDR; + + /* + * Remove kernel physical address range from avail + * regions list. Page align all regions. + * Non-page aligned memory isn't very interesting to us. + * Also, sort the entries for ascending addresses. + */ + sz = 0; + cnt = availmem_regions_sz; + debugf("processing avail regions:\n"); + for (mp = availmem_regions; mp->mr_size; mp++) { + s = mp->mr_start; + e = mp->mr_start + mp->mr_size; + debugf(" %08x-%08x -> ", s, e); + /* Check whether this region holds all of the kernel. */ + if (s < kernload && e > phys_kernelend) { + availmem_regions[cnt].mr_start = phys_kernelend; + availmem_regions[cnt++].mr_size = e - phys_kernelend; + e = kernload; + } + /* Look whether this regions starts within the kernel. */ + if (s >= kernload && s < phys_kernelend) { + if (e <= phys_kernelend) + goto empty; + s = phys_kernelend; + } + /* Now look whether this region ends within the kernel. */ + if (e > kernload && e <= phys_kernelend) { + if (s >= kernload) { + goto empty; + } + e = kernload; + } + /* Now page align the start and size of the region. */ + s = round_page(s); + e = trunc_page(e); + if (e < s) + e = s; + sz = e - s; + debugf("%08x-%08x = %x\n", s, e, sz); + + /* Check whether some memory is left here. */ + if (sz == 0) { + empty: + printf("skipping\n"); + bcopy(mp + 1, mp, + (cnt - (mp - availmem_regions)) * sizeof(*mp)); + cnt--; + mp--; + continue; + } + + /* Do an insertion sort. */ + for (mp1 = availmem_regions; mp1 < mp; mp1++) + if (s < mp1->mr_start) + break; + if (mp1 < mp) { + bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1); + mp1->mr_start = s; + mp1->mr_size = sz; + } else { + mp->mr_start = s; + mp->mr_size = sz; + } + } + availmem_regions_sz = cnt; + + /* Fill in phys_avail table, based on availmem_regions */ + debugf("fill in phys_avail:\n"); + for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { + + debugf(" region: 0x%08x - 0x%08x (0x%08x)\n", + availmem_regions[i].mr_start, + availmem_regions[i].mr_start + availmem_regions[i].mr_size, + availmem_regions[i].mr_size); + + /* + * We should not map the page at PA 0x0000000, the VM can't + * handle it, as pmap_extract() == 0 means failure. + */ + if (availmem_regions[i].mr_start > 0 || + availmem_regions[i].mr_size > PAGE_SIZE) { + phys_avail[j] = availmem_regions[i].mr_start; + if (phys_avail[j] == 0) + phys_avail[j] += PAGE_SIZE; + phys_avail[j + 1] = availmem_regions[i].mr_start + + availmem_regions[i].mr_size; + } else + j -= 2; + } + phys_avail[j] = 0; + phys_avail[j + 1] = 0; +} + +void * +initarm(struct arm_boot_params *abp) +{ + struct mem_region availmem_regions[FDT_MEM_REGIONS]; + struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; + vm_offset_t dtbp, freemempos, l2_start, lastaddr; + vm_offset_t pmap_bootstrap_lastaddr; + uint32_t memsize, l2size; + char *env; + void *kmdp; + u_int l1pagetable; + int i = 0, j = 0, err_devmap = 0; + int availmem_regions_sz; + + lastaddr = parse_boot_param(abp); + memsize = 0; + set_cpufuncs(); + + /* + * Find the dtb passed in by the boot loader. + */ + kmdp = preload_search_by_type("elf kernel"); + if (kmdp != NULL) + dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); + else + dtbp = (vm_offset_t)NULL; + +#if defined(FDT_DTB_STATIC) + /* + * In case the device tree blob was not retrieved (from metadata) try + * to use the statically embedded one. + */ + if (dtbp == (vm_offset_t)NULL) + dtbp = (vm_offset_t)&fdt_static_dtb; +#endif + + if (OF_install(OFW_FDT, 0) == FALSE) + while (1); + + if (OF_init((void *)dtbp) != 0) + while (1); + + /* Grab physical memory regions information from device tree. */ + if (fdt_get_mem_regions(availmem_regions, &availmem_regions_sz, + &memsize) != 0) + while(1); + + /* Platform-specific initialisation */ + pmap_bootstrap_lastaddr = initarm_lastaddr(); + + pcpu0_init(); + + /* Do basic tuning, hz etc */ + init_param1(); + + /* Calculate number of L2 tables needed for mapping vm_page_array */ + l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); + l2size = (l2size >> L1_S_SHIFT) + 1; + + /* + * Add one table for end of kernel map, one for stacks, msgbuf and + * L1 and L2 tables map and one for vectors map. + */ + l2size += 3; + + /* Make it divisible by 4 */ + l2size = (l2size + 3) & ~3; + + freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; + + /* Define a macro to simplify memory allocation */ +#define valloc_pages(var, np) \ + alloc_pages((var).pv_va, (np)); \ + (var).pv_pa = (var).pv_va + (KERNPHYSADDR - KERNVIRTADDR); + +#define alloc_pages(var, np) \ + (var) = freemempos; \ + freemempos += (np * PAGE_SIZE); \ + memset((char *)(var), 0, ((np) * PAGE_SIZE)); + + while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) + freemempos += PAGE_SIZE; + valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); + + for (i = 0; i < l2size; ++i) { + if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { + valloc_pages(kernel_pt_table[i], + L2_TABLE_SIZE / PAGE_SIZE); + j = i; + } else { + kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + + L2_TABLE_SIZE_REAL * (i - j); + kernel_pt_table[i].pv_pa = + kernel_pt_table[i].pv_va - KERNVIRTADDR + + KERNPHYSADDR; + + } + } + /* + * Allocate a page for the system page mapped to 0x00000000 + * or 0xffff0000. This page will just contain the system vectors + * and can be shared by all processes. + */ + valloc_pages(systempage, 1); + + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + + /* Allocate stacks for all modes */ + valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); + valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); + valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); + valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); + valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); + + /* + * Now we start construction of the L1 page table + * We start by mapping the L2 page tables into the L1. + * This means that we can replace L1 mappings later on if necessary + */ + l1pagetable = kernel_l1pt.pv_va; + + /* + * Try to map as much as possible of kernel text and data using + * 1MB section mapping and for the rest of initial kernel address + * space use L2 coarse tables. + * + * Link L2 tables for mapping remainder of kernel (modulo 1MB) + * and kernel structures + */ + l2_start = lastaddr & ~(L1_S_OFFSET); + for (i = 0 ; i < l2size - 1; i++) + pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, + &kernel_pt_table[i]); + + pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; + + /* Map kernel code and data */ + pmap_map_chunk(l1pagetable, KERNVIRTADDR, KERNPHYSADDR, + (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, + VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); + + + /* Map L1 directory and allocated L2 page tables */ + pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, + L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); + + pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, + kernel_pt_table[0].pv_pa, + L2_TABLE_SIZE_REAL * l2size, + VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); + + /* Map allocated DPCPU, stacks and msgbuf */ + pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, + freemempos - dpcpu.pv_va, + VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); + + /* Link and map the vector page */ + pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, + &kernel_pt_table[l2size - 1]); + pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); + + /* Map pmap_devmap[] entries */ + err_devmap = platform_devmap_init(); + pmap_devmap_bootstrap(l1pagetable, pmap_devmap_bootstrap_table); + + cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); + pmap_pa = kernel_l1pt.pv_pa; + setttb(kernel_l1pt.pv_pa); + cpu_tlb_flushID(); + cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); + + /* + * Only after the SOC registers block is mapped we can perform device + * tree fixups, as they may attempt to read parameters from hardware. + */ + OF_interpret("perform-fixup", 0); + + initarm_gpio_init(); + + cninit(); + + physmem = memsize / PAGE_SIZE; + + debugf("initarm: console initialized\n"); + debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); + debugf(" boothowto = 0x%08x\n", boothowto); + debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); + print_kernel_section_addr(); + print_kenv(); + + env = getenv("kernelname"); + if (env != NULL) + strlcpy(kernelname, env, sizeof(kernelname)); + + if (err_devmap != 0) + printf("WARNING: could not fully configure devmap, error=%d\n", + err_devmap); + + initarm_late_init(); + + /* + * Pages were allocated during the secondary bootstrap for the + * stacks for different CPU modes. + * We must now set the r13 registers in the different CPU modes to + * point to these stacks. + * Since the ARM stacks use STMFD etc. we must set r13 to the top end + * of the stack memory. + */ + cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); + + set_stackptrs(0); + + /* + * We must now clean the cache again.... + * Cleaning may be done by reading new data to displace any + * dirty data in the cache. This will have happened in setttb() + * but since we are boot strapping the addresses used for the read + * may have just been remapped and thus the cache could be out + * of sync. A re-clean after the switch will cure this. + * After booting there are no gross relocations of the kernel thus + * this problem will not occur after initarm(). + */ + cpu_idcache_wbinv_all(); + + /* Set stack for exception handlers */ + data_abort_handler_address = (u_int)data_abort_handler; + prefetch_abort_handler_address = (u_int)prefetch_abort_handler; + undefined_handler_address = (u_int)undefinedinstruction_bounce; + undefined_init(); + + init_proc0(kernelstack.pv_va); + + arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); + arm_dump_avail_init(memsize, sizeof(dump_avail) / sizeof(dump_avail[0])); + pmap_bootstrap(freemempos, pmap_bootstrap_lastaddr, &kernel_l1pt); + msgbufp = (void *)msgbufpv.pv_va; + msgbufinit(msgbufp, msgbufsize); + mutex_init(); + + /* + * Prepare map of physical memory regions available to vm subsystem. + */ + physmap_init(availmem_regions, availmem_regions_sz); + + init_param2(physmem); + kdb_init(); + + return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - + sizeof(struct pcb))); +} +#endif Modified: user/andre/tcp_workqueue/sys/arm/at91/at91_machdep.c ============================================================================== --- user/andre/tcp_workqueue/sys/arm/at91/at91_machdep.c Fri Nov 9 17:46:07 2012 (r242830) +++ user/andre/tcp_workqueue/sys/arm/at91/at91_machdep.c Fri Nov 9 17:47:54 2012 (r242831) @@ -96,6 +96,10 @@ __FBSDID("$FreeBSD$"); #include #include +#ifndef MAXCPU +#define MAXCPU 1 +#endif + /* Page table for mapping proc0 zero page */ #define KERNEL_PT_SYS 0 #define KERNEL_PT_KERN 1 @@ -454,7 +458,7 @@ initarm(struct arm_boot_params *abp) { struct pv_addr kernel_l1pt; struct pv_addr dpcpu; - int loop, i; + int i; u_int l1pagetable; vm_offset_t freemempos; vm_offset_t afterkern; @@ -482,23 +486,23 @@ initarm(struct arm_boot_params *abp) while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); - for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { - if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { - valloc_pages(kernel_pt_table[loop], + for (i = 0; i < NUM_KERNEL_PTS; ++i) { + if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { + valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); } else { - kernel_pt_table[loop].pv_va = freemempos - - (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) * + kernel_pt_table[i].pv_va = freemempos - + (i % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) * L2_TABLE_SIZE_REAL; - kernel_pt_table[loop].pv_pa = - kernel_pt_table[loop].pv_va - KERNVIRTADDR + + kernel_pt_table[i].pv_pa = + kernel_pt_table[i].pv_va - KERNVIRTADDR + KERNPHYSADDR; } } /* - * Allocate a page for the system page mapped to V0x00000000 - * This page will just contain the system vectors and can be - * shared by all processes. + * Allocate a page for the system page mapped to 0x00000000 + * or 0xffff0000. This page will just contain the system vectors + * and can be shared by all processes. */ valloc_pages(systempage, 1); @@ -507,10 +511,10 @@ initarm(struct arm_boot_params *abp) dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ - valloc_pages(irqstack, IRQ_STACK_SIZE); - valloc_pages(abtstack, ABT_STACK_SIZE); - valloc_pages(undstack, UND_STACK_SIZE); - valloc_pages(kernelstack, KSTACK_PAGES); + valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); + valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); + valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); + valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* @@ -558,17 +562,17 @@ initarm(struct arm_boot_params *abp) pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa, msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); - for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { - pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va, - kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE, + for (i = 0; i < NUM_KERNEL_PTS; ++i) { + pmap_map_chunk(l1pagetable, kernel_pt_table[i].pv_va, + kernel_pt_table[i].pv_pa, L2_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); } pmap_devmap_bootstrap(l1pagetable, at91_devmap); - cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT); + cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); - cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)); + cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); at91_soc_id(); Modified: user/andre/tcp_workqueue/sys/arm/broadcom/bcm2835/bcm2835_machdep.c ============================================================================== --- user/andre/tcp_workqueue/sys/arm/broadcom/bcm2835/bcm2835_machdep.c Fri Nov 9 17:46:07 2012 (r242830) +++ user/andre/tcp_workqueue/sys/arm/broadcom/bcm2835/bcm2835_machdep.c Fri Nov 9 17:47:54 2012 (r242831) @@ -46,514 +46,23 @@ __FBSDID("$FreeBSD$"); #define _ARM32_BUS_DMA_PRIVATE #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include +#include /* For trapframe_t, used in */ +#include +#include -#include +#include -#define DEBUG -#ifdef DEBUG -#define debugf(fmt, args...) printf(fmt, ##args) -#else -#define debugf(fmt, args...) -#endif +#include /* Start of address space used for bootstrap map */ #define DEVMAP_BOOTSTRAP_MAP_START 0xE0000000 -/* - * This is the number of L2 page tables required for covering max - * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, - * stacks etc.), uprounded to be divisible by 4. - */ -#define KERNEL_PT_MAX 78 - -extern unsigned char kernbase[]; -extern unsigned char _etext[]; -extern unsigned char _edata[]; -extern unsigned char __bss_start[]; -extern unsigned char _end[]; - -#ifdef DDB -extern vm_offset_t ksym_start, ksym_end; -#endif - -extern u_int data_abort_handler_address; -extern u_int prefetch_abort_handler_address; -extern u_int undefined_handler_address; - -extern vm_offset_t pmap_bootstrap_lastaddr; -extern int *end; - -struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; - -/* Physical and virtual addresses for some global pages */ -vm_paddr_t phys_avail[10]; -vm_paddr_t dump_avail[4]; -vm_offset_t physical_pages; -vm_offset_t pmap_bootstrap_lastaddr; -vm_paddr_t pmap_pa; - -const struct pmap_devmap *pmap_devmap_bootstrap_table; -struct pv_addr systempage; -struct pv_addr msgbufpv; -struct pv_addr irqstack; -struct pv_addr undstack; -struct pv_addr abtstack; -struct pv_addr kernelstack; - -static struct mem_region availmem_regions[FDT_MEM_REGIONS]; -static int availmem_regions_sz; - -static void print_kenv(void); -static void print_kernel_section_addr(void); - -static void physmap_init(void); -static int platform_devmap_init(void); - -static char * -kenv_next(char *cp) -{ - - if (cp != NULL) { - while (*cp != 0) - cp++; - cp++; - if (*cp == 0) - cp = NULL; - } - return (cp); -} - -static void -print_kenv(void) -{ - int len; - char *cp; - - debugf("loader passed (static) kenv:\n"); - if (kern_envp == NULL) { - debugf(" no env, null ptr\n"); - return; - } - debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp); - - len = 0; - for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) - debugf(" %x %s\n", (uint32_t)cp, cp); -} - -static void -print_kernel_section_addr(void) -{ - - debugf("kernel image addresses:\n"); - debugf(" kernbase = 0x%08x\n", (uint32_t)kernbase); - debugf(" _etext (sdata) = 0x%08x\n", (uint32_t)_etext); - debugf(" _edata = 0x%08x\n", (uint32_t)_edata); - debugf(" __bss_start = 0x%08x\n", (uint32_t)__bss_start); - debugf(" _end = 0x%08x\n", (uint32_t)_end); -} - -static void -physmap_init(void) -{ - int i, j, cnt; - vm_offset_t phys_kernelend, kernload; - uint32_t s, e, sz; - struct mem_region *mp, *mp1; - - phys_kernelend = KERNPHYSADDR + (virtual_avail - KERNVIRTADDR); - kernload = KERNPHYSADDR; - - /* - * Remove kernel physical address range from avail - * regions list. Page align all regions. - * Non-page aligned memory isn't very interesting to us. - * Also, sort the entries for ascending addresses. - */ - sz = 0; - cnt = availmem_regions_sz; - debugf("processing avail regions:\n"); - for (mp = availmem_regions; mp->mr_size; mp++) { - s = mp->mr_start; - e = mp->mr_start + mp->mr_size; - debugf(" %08x-%08x -> ", s, e); - /* Check whether this region holds all of the kernel. */ - if (s < kernload && e > phys_kernelend) { - availmem_regions[cnt].mr_start = phys_kernelend; - availmem_regions[cnt++].mr_size = e - phys_kernelend; - e = kernload; - } - /* Look whether this regions starts within the kernel. */ - if (s >= kernload && s < phys_kernelend) { - if (e <= phys_kernelend) - goto empty; - s = phys_kernelend; - } - /* Now look whether this region ends within the kernel. */ - if (e > kernload && e <= phys_kernelend) { - if (s >= kernload) { - goto empty; - } - e = kernload; - } - /* Now page align the start and size of the region. */ - s = round_page(s); - e = trunc_page(e); - if (e < s) - e = s; - sz = e - s; - debugf("%08x-%08x = %x\n", s, e, sz); - - /* Check whether some memory is left here. */ - if (sz == 0) { - empty: - printf("skipping\n"); - bcopy(mp + 1, mp, - (cnt - (mp - availmem_regions)) * sizeof(*mp)); - cnt--; - mp--; - continue; - } - - /* Do an insertion sort. */ - for (mp1 = availmem_regions; mp1 < mp; mp1++) - if (s < mp1->mr_start) - break; - if (mp1 < mp) { - bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1); - mp1->mr_start = s; - mp1->mr_size = sz; - } else { - mp->mr_start = s; - mp->mr_size = sz; - } - } - availmem_regions_sz = cnt; - - /* Fill in phys_avail table, based on availmem_regions */ - debugf("fill in phys_avail:\n"); - for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { - - debugf(" region: 0x%08x - 0x%08x (0x%08x)\n", - availmem_regions[i].mr_start, - availmem_regions[i].mr_start + availmem_regions[i].mr_size, - availmem_regions[i].mr_size); - - /* - * We should not map the page at PA 0x0000000, the VM can't - * handle it, as pmap_extract() == 0 means failure. - */ - if (availmem_regions[i].mr_start > 0 || - availmem_regions[i].mr_size > PAGE_SIZE) { - phys_avail[j] = availmem_regions[i].mr_start; - if (phys_avail[j] == 0) - phys_avail[j] += PAGE_SIZE; - phys_avail[j + 1] = availmem_regions[i].mr_start + - availmem_regions[i].mr_size; - } else - j -= 2; - } - phys_avail[j] = 0; - phys_avail[j + 1] = 0; -} - -void * -initarm(struct arm_boot_params *abp) -{ - struct pv_addr kernel_l1pt; - struct pv_addr dpcpu; - vm_offset_t dtbp, freemempos, l2_start, lastaddr; - uint32_t memsize, l2size; - char *env; - void *kmdp; - u_int l1pagetable; - int i = 0, j = 0, err_devmap = 0; - - lastaddr = parse_boot_param(abp); - memsize = 0; - set_cpufuncs(); - - /* - * Find the dtb passed in by the boot loader. - */ - kmdp = preload_search_by_type("elf kernel"); - if (kmdp != NULL) - dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); - else - dtbp = (vm_offset_t)NULL; - -#if defined(FDT_DTB_STATIC) - /* - * In case the device tree blob was not retrieved (from metadata) try - * to use the statically embedded one. - */ - if (dtbp == (vm_offset_t)NULL) - dtbp = (vm_offset_t)&fdt_static_dtb; -#endif - - if (OF_install(OFW_FDT, 0) == FALSE) - while (1); - - if (OF_init((void *)dtbp) != 0) - while (1); - - /* Grab physical memory regions information from device tree. */ - if (fdt_get_mem_regions(availmem_regions, &availmem_regions_sz, - &memsize) != 0) - while(1); - - /* Platform-specific initialisation */ - pmap_bootstrap_lastaddr = initarm_lastaddr(); - - pcpu0_init(); - - /* Calculate number of L2 tables needed for mapping vm_page_array */ - l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); - l2size = (l2size >> L1_S_SHIFT) + 1; - - /* - * Add one table for end of kernel map, one for stacks, msgbuf and - * L1 and L2 tables map and one for vectors map. - */ - l2size += 3; - - /* Make it divisible by 4 */ - l2size = (l2size + 3) & ~3; - -#define KERNEL_TEXT_BASE (KERNBASE) - freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; - - /* Define a macro to simplify memory allocation */ -#define valloc_pages(var, np) \ - alloc_pages((var).pv_va, (np)); \ - (var).pv_pa = (var).pv_va + (KERNPHYSADDR - KERNVIRTADDR); - -#define alloc_pages(var, np) \ - (var) = freemempos; \ - freemempos += (np * PAGE_SIZE); \ - memset((char *)(var), 0, ((np) * PAGE_SIZE)); - - while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) - freemempos += PAGE_SIZE; - valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); - - for (i = 0; i < l2size; ++i) { - if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { - valloc_pages(kernel_pt_table[i], - L2_TABLE_SIZE / PAGE_SIZE); - j = i; - } else { - kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + - L2_TABLE_SIZE_REAL * (i - j); - kernel_pt_table[i].pv_pa = - kernel_pt_table[i].pv_va - KERNVIRTADDR + - KERNPHYSADDR; - - } - } - /* - * Allocate a page for the system page mapped to 0x00000000 - * or 0xffff0000. This page will just contain the system vectors - * and can be shared by all processes. - */ - valloc_pages(systempage, 1); - - /* Allocate dynamic per-cpu area. */ - valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** From owner-svn-src-user@FreeBSD.ORG Fri Nov 9 17:56:50 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 13F53BF5; Fri, 9 Nov 2012 17:56:50 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id E10FD8FC12; Fri, 9 Nov 2012 17:56:49 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qA9HunRr025058; Fri, 9 Nov 2012 17:56:49 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qA9HunYS025057; Fri, 9 Nov 2012 17:56:49 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211091756.qA9HunYS025057@svn.freebsd.org> From: Andre Oppermann Date: Fri, 9 Nov 2012 17:56:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242832 - user/andre/tcp_workqueue/sys/dev/bge X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Nov 2012 17:56:50 -0000 Author: andre Date: Fri Nov 9 17:56:49 2012 New Revision: 242832 URL: http://svnweb.freebsd.org/changeset/base/242832 Log: Move per packet dequeueing from bge_rxeof() into a new function bge_rx_packet() and merge bge_rxcsum() into it. To avoid the unlock/lock pair for injection into the stack dequeue all packets from the rx DMA ring into an m_nextpkt chain and then inject that chain one by one into the stack w/o any locking. Work in progress. Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c ============================================================================== --- user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Fri Nov 9 17:47:54 2012 (r242831) +++ user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Fri Nov 9 17:56:49 2012 (r242832) @@ -4190,6 +4190,94 @@ bge_rxreuse_jumbo(struct bge_softc *sc, BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } +struct mbuf * +bge_rx_packet(struct ifnet *ifp, struct bge_rx_bd *rx, uint16_t rxidx, + struct bge_softc *sc) { + struct mbuf *m = NULL; + + if (rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { + m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx]; + if (rx->bge_flags & BGE_RXBDFLAG_ERROR) { + bge_rxreuse_jumbo(sc, rxidx); + return (NULL); + } + if (bge_newbuf_jumbo(sc, rxidx) != 0) { + bge_rxreuse_jumbo(sc, rxidx); + ifp->if_iqdrops++; + return (NULL); + } + BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); + } else { + m = sc->bge_cdata.bge_rx_std_chain[rxidx]; + if (rx->bge_flags & BGE_RXBDFLAG_ERROR) { + bge_rxreuse_std(sc, rxidx); + return (NULL); + } + if (bge_newbuf_std(sc, rxidx) != 0) { + bge_rxreuse_std(sc, rxidx); + ifp->if_iqdrops++; + return (NULL); + } + BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); + } + m->m_pkthdr.len = m->m_len = rx->bge_len - ETHER_CRC_LEN; + m->m_pkthdr.rcvif = ifp; + + if ((ifp->if_capenable & IFCAP_RXCSUM) && + BGE_IS_5717_PLUS(sc)) { + if ((rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) { + if (rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; + if ((rx->bge_error_flag & + BGE_RXERRFLAG_IP_CSUM_NOK) == 0) + m->m_pkthdr.csum_flags |= CSUM_IP_VALID; + } + if (rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) { + m->m_pkthdr.csum_data = + rx->bge_tcp_udp_csum; + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | + CSUM_PSEUDO_HDR; + } + } + } else if (ifp->if_capenable & IFCAP_RXCSUM) { + if (rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; + if ((rx->bge_ip_csum ^ 0xFFFF) == 0) + m->m_pkthdr.csum_flags |= CSUM_IP_VALID; + } + if (rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM && + m->m_pkthdr.len >= ETHER_MIN_NOPAD) { + m->m_pkthdr.csum_data = + rx->bge_tcp_udp_csum; + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | + CSUM_PSEUDO_HDR; + } + } + + /* + * If we received a packet with a vlan tag, + * attach that information to the packet. + */ + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING && + rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) { + m->m_pkthdr.ether_vtag = rx->bge_vlan_tag; + m->m_flags |= M_VLANTAG; + } + +#ifndef __NO_STRICT_ALIGNMENT + /* + * For architectures with strict alignment we must make sure + * the payload is aligned. + */ + if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) { + bcopy(m->m_data, m->m_data + ETHER_ALIGN, m->m-len); + m->m_data += ETHER_ALIGN; + } +#endif + ifp->if_ipackets++; + return (m); +} + /* * Frame reception handling. This is called if there's a frame * on the receive return list. @@ -4198,14 +4286,14 @@ bge_rxreuse_jumbo(struct bge_softc *sc, * 1) the frame is from the jumbo receive ring * 2) the frame is from the standard receive ring */ - static int -bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck) +bge_rxeof(struct bge_softc *sc, uint16_t rx_prod) { struct ifnet *ifp; int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; int pkts = 0; uint16_t rx_cons; + struct mbuf *m = NULL, n = NULL; rx_cons = sc->bge_rx_saved_considx; @@ -4228,119 +4316,41 @@ bge_rxeof(struct bge_softc *sc, uint16_t while (rx_cons != rx_prod) { struct bge_rx_bd *cur_rx; uint32_t rxidx; - struct mbuf *m = NULL; - uint16_t vlan_tag = 0; - int have_tag = 0; - -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) { - if (sc->rxcycles <= 0) - break; - sc->rxcycles--; - } -#endif + struct mbuf *mm; cur_rx = &sc->bge_ldata.bge_rx_return_ring[rx_cons]; rxidx = cur_rx->bge_idx; BGE_INC(rx_cons, sc->bge_return_ring_cnt); - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING && - cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) { - have_tag = 1; - vlan_tag = cur_rx->bge_vlan_tag; - } - - if (cur_rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { - jumbocnt++; - m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx]; - if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { - bge_rxreuse_jumbo(sc, rxidx); - continue; - } - if (bge_newbuf_jumbo(sc, rxidx) != 0) { - bge_rxreuse_jumbo(sc, rxidx); - ifp->if_iqdrops++; - continue; - } - BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); - } else { - stdcnt++; - m = sc->bge_cdata.bge_rx_std_chain[rxidx]; - if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { - bge_rxreuse_std(sc, rxidx); - continue; - } - if (bge_newbuf_std(sc, rxidx) != 0) { - bge_rxreuse_std(sc, rxidx); - ifp->if_iqdrops++; - continue; - } - BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); - } - - ifp->if_ipackets++; -#ifndef __NO_STRICT_ALIGNMENT - /* - * For architectures with strict alignment we must make sure - * the payload is aligned. - */ - if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) { - bcopy(m->m_data, m->m_data + ETHER_ALIGN, - cur_rx->bge_len); - m->m_data += ETHER_ALIGN; - } -#endif - m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN; - m->m_pkthdr.rcvif = ifp; - - if (ifp->if_capenable & IFCAP_RXCSUM) - bge_rxcsum(sc, cur_rx, m); - - /* - * If we received a packet with a vlan tag, - * attach that information to the packet. - */ - if (have_tag) { - m->m_pkthdr.ether_vtag = vlan_tag; - m->m_flags |= M_VLANTAG; - } - - if (holdlck != 0) { - BGE_UNLOCK(sc); - (*ifp->if_input)(ifp, m); - if (++pkts > 10) { - maybe_yield(); - pkts = 0; - } - BGE_LOCK(sc); + mm = bge_rx_packet(ifp, cur_rx, rxidx, sc); + if (mm != NULL) { + if (n != NULL) + n->m_nextpkt = mm; + else + m = n = mm; } else - (*ifp->if_input)(ifp, m); - rx_npkts++; - - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - return (rx_npkts); + continue; } bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_PREREAD); - if (stdcnt > 0) + if (stdcnt > 0) { bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); - - if (jumbocnt > 0) - bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, - sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); - - sc->bge_rx_saved_considx = rx_cons; - bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); - if (stdcnt) bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, (sc->bge_std + BGE_STD_RX_RING_CNT - 1) % BGE_STD_RX_RING_CNT); - if (jumbocnt) + } + if (jumbocnt > 0) { + bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, + sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, (sc->bge_jumbo + BGE_JUMBO_RX_RING_CNT - 1) % BGE_JUMBO_RX_RING_CNT); -#ifdef notyet + } + sc->bge_rx_saved_considx = rx_cons; + bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); + +#if 0 /* * This register wraps very quickly under heavy packet drops. * If you need correct statistics, you can enable this check. @@ -4348,42 +4358,19 @@ bge_rxeof(struct bge_softc *sc, uint16_t if (BGE_IS_5705_PLUS(sc)) ifp->if_ierrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); #endif - return (rx_npkts); -} -static void -bge_rxcsum(struct bge_softc *sc, struct bge_rx_bd *cur_rx, struct mbuf *m) -{ + BGE_UNLOCK(sc); + while (m != NULL) { + /* n = SLIST_REMOVE_HEAD(m, nxtpkt); /* + n = m; + m = n->m_nextpkt; + n->m_nextpkt = NULL; + (*ifp->if_input)(ifp, n); + } + maybe_yield(); + BGE_LOCK(sc); - if (BGE_IS_5717_PLUS(sc)) { - if ((cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) { - if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { - m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - if ((cur_rx->bge_error_flag & - BGE_RXERRFLAG_IP_CSUM_NOK) == 0) - m->m_pkthdr.csum_flags |= CSUM_IP_VALID; - } - if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) { - m->m_pkthdr.csum_data = - cur_rx->bge_tcp_udp_csum; - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | - CSUM_PSEUDO_HDR; - } - } - } else { - if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { - m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - if ((cur_rx->bge_ip_csum ^ 0xFFFF) == 0) - m->m_pkthdr.csum_flags |= CSUM_IP_VALID; - } - if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM && - m->m_pkthdr.len >= ETHER_MIN_NOPAD) { - m->m_pkthdr.csum_data = - cur_rx->bge_tcp_udp_csum; - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | - CSUM_PSEUDO_HDR; - } - } + return (rx_npkts); } static void @@ -4554,7 +4541,6 @@ bge_ithr_msix(void *arg) bge_start_locked(ifp); } BGE_UNLOCK(sc); - return; } static void @@ -4636,7 +4622,6 @@ bge_ithr(void *xsc) bge_start_locked(ifp); BGE_UNLOCK(sc); - return; } static void From owner-svn-src-user@FreeBSD.ORG Sat Nov 10 20:34:48 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id B9C3EB7E; Sat, 10 Nov 2012 20:34:48 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id A065E8FC0C; Sat, 10 Nov 2012 20:34:48 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qAAKYmop003478; Sat, 10 Nov 2012 20:34:48 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qAAKYmYB003477; Sat, 10 Nov 2012 20:34:48 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211102034.qAAKYmYB003477@svn.freebsd.org> From: Andre Oppermann Date: Sat, 10 Nov 2012 20:34:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242876 - user/andre/tcp_workqueue/sys/dev/cxgbe/tom X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 10 Nov 2012 20:34:48 -0000 Author: andre Date: Sat Nov 10 20:34:48 2012 New Revision: 242876 URL: http://svnweb.freebsd.org/changeset/base/242876 Log: Fix fallout from r242802 prototype to do ECMP with a next-hop array. rt_ifp is a #define and may not be used other variable names. Modified: user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_connect.c Modified: user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_connect.c ============================================================================== --- user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_connect.c Sat Nov 10 19:32:16 2012 (r242875) +++ user/andre/tcp_workqueue/sys/dev/cxgbe/tom/t4_connect.c Sat Nov 10 20:34:48 2012 (r242876) @@ -279,7 +279,7 @@ t4_connect(struct toedev *tod, struct so struct wrqe *wr = NULL; struct cpl_act_open_req *cpl; struct l2t_entry *e = NULL; - struct ifnet *rt_ifp = rt->rt_ifp; + struct ifnet *ifp = rt->rt_ifp; struct port_info *pi; int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM; struct inpcb *inp = sotoinpcb(so); @@ -290,13 +290,13 @@ t4_connect(struct toedev *tod, struct so if (nam->sa_family != AF_INET) CXGBE_UNIMPLEMENTED("IPv6 connect"); - if (rt_ifp->if_type == IFT_ETHER) - pi = rt_ifp->if_softc; - else if (rt_ifp->if_type == IFT_L2VLAN) { - struct ifnet *ifp = VLAN_COOKIE(rt_ifp); - + if (ifp->if_type == IFT_ETHER) pi = ifp->if_softc; - } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) + else if (ifp->if_type == IFT_L2VLAN) { + struct ifnet *ifp_v = VLAN_COOKIE(ifp); + + pi = ifp_v->if_softc; + } else if (ifp->if_type == IFT_IEEE8023ADLAG) return (ENOSYS); /* XXX: implement lagg support */ else return (ENOTSUP); @@ -309,7 +309,7 @@ t4_connect(struct toedev *tod, struct so if (atid < 0) goto failed; - e = t4_l2t_get(pi, rt_ifp, + e = t4_l2t_get(pi, ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (e == NULL) goto failed; From owner-svn-src-user@FreeBSD.ORG Sat Nov 10 20:36:31 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 52D6AD04; Sat, 10 Nov 2012 20:36:31 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 2E22A8FC08; Sat, 10 Nov 2012 20:36:31 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qAAKaV70003762; Sat, 10 Nov 2012 20:36:31 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qAAKaVIl003761; Sat, 10 Nov 2012 20:36:31 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211102036.qAAKaVIl003761@svn.freebsd.org> From: Andre Oppermann Date: Sat, 10 Nov 2012 20:36:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242877 - user/andre/tcp_workqueue/sys/dev/bge X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 10 Nov 2012 20:36:31 -0000 Author: andre Date: Sat Nov 10 20:36:30 2012 New Revision: 242877 URL: http://svnweb.freebsd.org/changeset/base/242877 Log: Fix up function declarations after r242832 to make it compile. Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Modified: user/andre/tcp_workqueue/sys/dev/bge/if_bge.c ============================================================================== --- user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Sat Nov 10 20:34:48 2012 (r242876) +++ user/andre/tcp_workqueue/sys/dev/bge/if_bge.c Sat Nov 10 20:36:30 2012 (r242877) @@ -390,8 +390,7 @@ static int bge_get_eaddr_eeprom(struct b static int bge_get_eaddr(struct bge_softc *, uint8_t[]); static void bge_txeof(struct bge_softc *, uint16_t); -static void bge_rxcsum(struct bge_softc *, struct bge_rx_bd *, struct mbuf *); -static int bge_rxeof(struct bge_softc *, uint16_t, int); +static int bge_rxeof(struct bge_softc *, uint16_t); static void bge_asf_driver_up (struct bge_softc *); static void bge_tick(void *); @@ -4190,9 +4189,9 @@ bge_rxreuse_jumbo(struct bge_softc *sc, BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } -struct mbuf * -bge_rx_packet(struct ifnet *ifp, struct bge_rx_bd *rx, uint16_t rxidx, - struct bge_softc *sc) { +static struct mbuf * +bge_rx_packet(struct bge_softc *sc, struct bge_rx_bd *rx, uint16_t rxidx, + struct ifnet *ifp) { struct mbuf *m = NULL; if (rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { @@ -4293,7 +4292,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; int pkts = 0; uint16_t rx_cons; - struct mbuf *m = NULL, n = NULL; + struct mbuf *m = NULL, *n = NULL; rx_cons = sc->bge_rx_saved_considx; @@ -4323,7 +4322,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rxidx = cur_rx->bge_idx; BGE_INC(rx_cons, sc->bge_return_ring_cnt); - mm = bge_rx_packet(ifp, cur_rx, rxidx, sc); + mm = bge_rx_packet(sc, cur_rx, rxidx, ifp); if (mm != NULL) { if (n != NULL) n->m_nextpkt = mm; @@ -4331,6 +4330,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t m = n = mm; } else continue; + pkts++; } bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, @@ -4361,7 +4361,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t BGE_UNLOCK(sc); while (m != NULL) { - /* n = SLIST_REMOVE_HEAD(m, nxtpkt); /* + /* n = SLIST_REMOVE_HEAD(m, nxtpkt); */ n = m; m = n->m_nextpkt; n->m_nextpkt = NULL; @@ -4459,7 +4459,7 @@ bge_poll(struct ifnet *ifp, enum poll_cm bge_link_upd(sc); sc->rxcycles = count; - rx_npkts = bge_rxeof(sc, rx_prod, 1); + rx_npkts = bge_rxeof(sc, rx_prod); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); @@ -4531,7 +4531,7 @@ bge_ithr_msix(void *arg) sc->bge_rx_saved_considx != rx_prod) { /* Check RX return ring producer/consumer. */ BGE_UNLOCK(sc); - bge_rxeof(sc, rx_prod, 0); + bge_rxeof(sc, rx_prod); BGE_LOCK(sc); } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { @@ -4609,7 +4609,7 @@ bge_ithr(void *xsc) if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* Check RX return ring producer/consumer. */ - bge_rxeof(sc, rx_prod, 1); + bge_rxeof(sc, rx_prod); } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { From owner-svn-src-user@FreeBSD.ORG Sat Nov 10 21:09:17 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id CCD8239B; Sat, 10 Nov 2012 21:09:17 +0000 (UTC) (envelope-from andre@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id A7E498FC0C; Sat, 10 Nov 2012 21:09:17 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qAAL9H78011128; Sat, 10 Nov 2012 21:09:17 GMT (envelope-from andre@svn.freebsd.org) Received: (from andre@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qAAL9HHM011126; Sat, 10 Nov 2012 21:09:17 GMT (envelope-from andre@svn.freebsd.org) Message-Id: <201211102109.qAAL9HHM011126@svn.freebsd.org> From: Andre Oppermann Date: Sat, 10 Nov 2012 21:09:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r242878 - in user/andre/tcp_workqueue/sys: kern sys X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 10 Nov 2012 21:09:17 -0000 Author: andre Date: Sat Nov 10 21:09:17 2012 New Revision: 242878 URL: http://svnweb.freebsd.org/changeset/base/242878 Log: Add m_uiotosfmbuf() which turns a uio iovec array into a chain of mbuf's with sfbuffers attaches to it. sfbuffers in turn directly reference and hold the VM page. Once traveled through the stack the NIC then directly DMA's the data from the userspace VM pages. The referenced pages are protected from going aways but no from concurrent modification by the application. If data in the page is modified while the mbuf/sfbuf is still waiting in a queue to the sent, the modified data will be sent. Additionally certain cache and memory coherence effects may come into play resulting in unpredictably in whether the old or new data will go out eventually. This is the same as with sendfile(2) where the application also should not modify the backing file while sending. It is the equivalent to m_uiotombuf() which does a normal copyin. It is the first step for a new send side zero copy implementation. This checkpoints a work in progress, isn't complete and likely has evil bugs or poorly handled edge cases. Modified: user/andre/tcp_workqueue/sys/kern/uipc_mbuf.c user/andre/tcp_workqueue/sys/sys/mbuf.h Modified: user/andre/tcp_workqueue/sys/kern/uipc_mbuf.c ============================================================================== --- user/andre/tcp_workqueue/sys/kern/uipc_mbuf.c Sat Nov 10 20:36:30 2012 (r242877) +++ user/andre/tcp_workqueue/sys/kern/uipc_mbuf.c Sat Nov 10 21:09:17 2012 (r242878) @@ -43,11 +43,17 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include +#include #include #include +#include +#include + int max_linkhdr; int max_protohdr; int max_hdr; @@ -1776,6 +1782,107 @@ m_uiotombuf(struct uio *uio, int how, in } /* + * Turn the contents of uio into a sfbuf attached mbuf chain. + */ +#define STACKPAGES 32 +struct mbuf * +m_uiotosfmbuf(struct uio *uio, int how, int len, int align, int flags) +{ + vm_page_t pp[STACKPAGES], p; + struct vm_map *map; + struct iovec *iov; + struct sf_buf *sf; + struct mbuf *m, *m0, *n; + int iolen, pages, mallocfree; + vm_offset_t uva, kva; + vm_size_t plen; + + m = n = NULL; + map = &curproc->p_vmspace->vm_map; + mallocfree = 0; + + while (uio->uio_iovcnt > 0 && uio->uio_resid > 0) { + + iov = uio->uio_iov; + iolen = iov->iov_len; + uva = (vm_offset_t)iov->iov_base; + + if (iolen == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + + pages = howmany(uva + iov->iov_len - (uva & PAGE_MASK), PAGE_SIZE); + if (pages > STACKPAGES) { + p = malloc(pages * sizeof(vm_page_t), M_TEMP, how); + if (p == NULL) + goto out; + mallocfree = 1; + } else + p = pp[0]; + + /* Verify that access to the given address is allowed from user-space. */ + if (vm_fault_quick_hold_pages(map, uva, plen, VM_PROT_READ, &p, pages) < 0) + goto out; + + while (--pages) { + m0 = m_get(how, MT_DATA); + if (m0 == NULL) + goto out; + sf = sf_buf_alloc(p, SFB_CATCH); + if (sf == NULL) + goto out; + + vm_page_lock(p); + vm_page_wire(p); + vm_page_unhold(p); + vm_page_unlock(p); + + /* attach to mbuf */ + kva = sf_buf_kva(sf); + plen = PAGE_SIZE - (kva & PAGE_MASK); + MEXTADD(m0, kva, PAGE_SIZE, sf_buf_mext, + NULL, sf, M_RDONLY, EXT_SFBUF); + m0->m_len = plen; + m0->m_data = (caddr_t)kva + (PAGE_SIZE - plen); + + iov->iov_len -= iolen; + uio->uio_offset += iolen; + + if (n != NULL) { + n->m_next = m0; + n = m0; + } else + m = n = m0; + + p++; + } + + if (mallocfree) + free(p, M_TEMP); + + uio->uio_offset = 0; + uio->uio_resid -= iolen; + uio->uio_iov++; + uio->uio_iovcnt--; + } + +out: + while (--pages) { + vm_page_lock(p); + vm_page_unhold(p); + if (p->wire_count == 0 && p->object == NULL) + vm_page_free(p); + vm_page_unlock(p); + p++; + } + if (mallocfree) + free(p, M_TEMP); + return (m); +} + +/* * Copy an mbuf chain into a uio limited by len if set. */ int Modified: user/andre/tcp_workqueue/sys/sys/mbuf.h ============================================================================== --- user/andre/tcp_workqueue/sys/sys/mbuf.h Sat Nov 10 20:36:30 2012 (r242877) +++ user/andre/tcp_workqueue/sys/sys/mbuf.h Sat Nov 10 21:09:17 2012 (r242878) @@ -915,6 +915,7 @@ struct mbuf *m_pullup(struct mbuf *, int int m_sanity(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); +struct mbuf *m_uiotosfmbuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int how); /*-