From owner-svn-src-stable-7@FreeBSD.ORG Sun Oct 31 00:35:19 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4C05E106564A; Sun, 31 Oct 2010 00:35:19 +0000 (UTC) (envelope-from bz@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 399C28FC13; Sun, 31 Oct 2010 00:35:19 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o9V0ZJLL033862; Sun, 31 Oct 2010 00:35:19 GMT (envelope-from bz@svn.freebsd.org) Received: (from bz@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o9V0ZJJL033860; Sun, 31 Oct 2010 00:35:19 GMT (envelope-from bz@svn.freebsd.org) Message-Id: <201010310035.o9V0ZJJL033860@svn.freebsd.org> From: "Bjoern A. Zeeb" Date: Sun, 31 Oct 2010 00:35:19 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214577 - stable/7/usr.sbin/rtadvd X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 31 Oct 2010 00:35:19 -0000 Author: bz Date: Sun Oct 31 00:35:18 2010 New Revision: 214577 URL: http://svn.freebsd.org/changeset/base/214577 Log: MFC r214447: Correct a typo. Modified: stable/7/usr.sbin/rtadvd/rtadvd.conf.5 Directory Properties: stable/7/usr.sbin/rtadvd/ (props changed) Modified: stable/7/usr.sbin/rtadvd/rtadvd.conf.5 ============================================================================== --- stable/7/usr.sbin/rtadvd/rtadvd.conf.5 Sat Oct 30 23:49:37 2010 (r214576) +++ stable/7/usr.sbin/rtadvd/rtadvd.conf.5 Sun Oct 31 00:35:18 2010 (r214577) @@ -109,7 +109,7 @@ The default value is 64. (str or num) A 8-bit flags field in router advertisement message header. This field can be specified either as a case-sensitive string or as an integer. -A sting consists of characters each of which corresponds to a +A string consists of characters each of which corresponds to a particular flag bit(s). An integer should be the logical OR of all enabled bits. Bit 7 From owner-svn-src-stable-7@FreeBSD.ORG Sun Oct 31 08:50:31 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6E285106566B; Sun, 31 Oct 2010 08:50:31 +0000 (UTC) (envelope-from nyan@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 5C5CA8FC14; Sun, 31 Oct 2010 08:50:31 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o9V8oV9L068868; Sun, 31 Oct 2010 08:50:31 GMT (envelope-from nyan@svn.freebsd.org) Received: (from nyan@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o9V8oVKR068866; Sun, 31 Oct 2010 08:50:31 GMT (envelope-from nyan@svn.freebsd.org) Message-Id: <201010310850.o9V8oVKR068866@svn.freebsd.org> From: Takahashi Yoshihiro Date: Sun, 31 Oct 2010 08:50:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214594 - stable/7/sys/pc98/pc98 X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 31 Oct 2010 08:50:31 -0000 Author: nyan Date: Sun Oct 31 08:50:31 2010 New Revision: 214594 URL: http://svn.freebsd.org/changeset/base/214594 Log: MFC: revision 214258 Rewrite the i386 memory probe: - Move the base memory setup into a new basemem_setup() routine. Modified: stable/7/sys/pc98/pc98/machdep.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/pc98/pc98/machdep.c ============================================================================== --- stable/7/sys/pc98/pc98/machdep.c Sun Oct 31 08:39:42 2010 (r214593) +++ stable/7/sys/pc98/pc98/machdep.c Sun Oct 31 08:50:31 2010 (r214594) @@ -1618,51 +1618,13 @@ sdtossd(sd, ssd) ssd->ssd_gran = sd->sd_gran; } -/* - * Populate the (physmap) array with base/bound pairs describing the - * available physical memory in the system, then test this memory and - * build the phys_avail array describing the actually-available memory. - * - * If we cannot accurately determine the physical memory map, then use - * value from the 0xE801 call, and failing that, the RTC. - * - * Total memory size may be set by the kernel environment variable - * hw.physmem or the compile-time define MAXMEM. - * - * XXX first should be vm_paddr_t. - */ static void -getmemsize(int first) +basemem_setup(void) { - int i, off, physmap_idx, pa_indx, da_indx; - int pg_n; - u_long physmem_tunable; - u_int extmem, under16; - vm_paddr_t pa, physmap[PHYSMAP_SIZE]; + vm_paddr_t pa; pt_entry_t *pte; - quad_t dcons_addr, dcons_size; - - bzero(physmap, sizeof(physmap)); - - /* XXX - some of EPSON machines can't use PG_N */ - pg_n = PG_N; - if (pc98_machine_type & M_EPSON_PC98) { - switch (epson_machine_id) { -#ifdef WB_CACHE - default: -#endif - case EPSON_PC486_HX: - case EPSON_PC486_HG: - case EPSON_PC486_HA: - pg_n = 0; - break; - } - } + int i; - /* - * Perform "base memory" related probes & setup - */ - under16 = pc98_getmemsize(&basemem, &extmem); if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); @@ -1694,12 +1656,62 @@ getmemsize(int first) pmap_kenter(KERNBASE + pa, pa); /* - * if basemem != 640, map pages r/w into vm86 page table so - * that the bios can scribble on it. + * Map pages between basemem and ISA_HOLE_START, if any, r/w into + * the vm86 page table so that vm86 can scribble on them using + * the vm86 map too. XXX: why 2 ways for this and only 1 way for + * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; +} + +/* + * Populate the (physmap) array with base/bound pairs describing the + * available physical memory in the system, then test this memory and + * build the phys_avail array describing the actually-available memory. + * + * If we cannot accurately determine the physical memory map, then use + * value from the 0xE801 call, and failing that, the RTC. + * + * Total memory size may be set by the kernel environment variable + * hw.physmem or the compile-time define MAXMEM. + * + * XXX first should be vm_paddr_t. + */ +static void +getmemsize(int first) +{ + int off, physmap_idx, pa_indx, da_indx; + u_long physmem_tunable; + vm_paddr_t physmap[PHYSMAP_SIZE]; + pt_entry_t *pte; + quad_t dcons_addr, dcons_size; + int i; + int pg_n; + u_int extmem; + u_int under16; + vm_paddr_t pa; + + bzero(physmap, sizeof(physmap)); + + /* XXX - some of EPSON machines can't use PG_N */ + pg_n = PG_N; + if (pc98_machine_type & M_EPSON_PC98) { + switch (epson_machine_id) { +#ifdef WB_CACHE + default: +#endif + case EPSON_PC486_HX: + case EPSON_PC486_HG: + case EPSON_PC486_HA: + pg_n = 0; + break; + } + } + + under16 = pc98_getmemsize(&basemem, &extmem); + basemem_setup(); physmap[0] = 0; physmap[1] = basemem * 1024; From owner-svn-src-stable-7@FreeBSD.ORG Mon Nov 1 19:04:16 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 97777106564A; Mon, 1 Nov 2010 19:04:16 +0000 (UTC) (envelope-from bschmidt@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 84DF28FC13; Mon, 1 Nov 2010 19:04:16 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA1J4GPi083121; Mon, 1 Nov 2010 19:04:16 GMT (envelope-from bschmidt@svn.freebsd.org) Received: (from bschmidt@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA1J4G2J083119; Mon, 1 Nov 2010 19:04:16 GMT (envelope-from bschmidt@svn.freebsd.org) Message-Id: <201011011904.oA1J4G2J083119@svn.freebsd.org> From: Bernhard Schmidt Date: Mon, 1 Nov 2010 19:04:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214632 - stable/7/sys/dev/iwi X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 01 Nov 2010 19:04:16 -0000 Author: bschmidt Date: Mon Nov 1 19:04:16 2010 New Revision: 214632 URL: http://svn.freebsd.org/changeset/base/214632 Log: MFC r214160,214162,214236 r214236 & r214160: The firmware does pad notifications to an even number of bytes (at least the association notification), the included information though always contains an elem block with an odd number of bytes. We handle the last byte as if it might contain a whole elem block, this of course is not true as one byte is not enough to hold a block, we therefore discard the complete frame. The solution here is to subtract one from the actual notification length, this is also what the Linux driver does. With this change the frame ends exactly where the last elem block ends. r214262: The firmware always sets bit 14 and 15, to get the real associd we need to clear those bits. Modified: stable/7/sys/dev/iwi/if_iwi.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/iwi/if_iwi.c ============================================================================== --- stable/7/sys/dev/iwi/if_iwi.c Mon Nov 1 18:18:46 2010 (r214631) +++ stable/7/sys/dev/iwi/if_iwi.c Mon Nov 1 19:04:16 2010 (r214632) @@ -1377,7 +1377,7 @@ iwi_checkforqos(struct iwi_softc *sc, co ni = sc->sc_ic.ic_bss; ni->ni_capinfo = capinfo; - ni->ni_associd = associd; + ni->ni_associd = associd & 0x3fff; if (wme != NULL) ni->ni_flags |= IEEE80211_NODE_QOS; else @@ -1480,7 +1480,7 @@ iwi_notification_intr(struct iwi_softc * IWI_STATE_END(sc, IWI_FW_ASSOCIATING); iwi_checkforqos(sc, (const struct ieee80211_frame *)(assoc+1), - le16toh(notif->len) - sizeof(*assoc)); + le16toh(notif->len) - sizeof(*assoc) - 1); ieee80211_new_state(ic, IEEE80211_S_RUN, -1); break; From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 20:06:49 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D9841106576D; Tue, 2 Nov 2010 20:06:49 +0000 (UTC) (envelope-from marius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C13388FC17; Tue, 2 Nov 2010 20:06:49 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2K6nKt082134; Tue, 2 Nov 2010 20:06:49 GMT (envelope-from marius@svn.freebsd.org) Received: (from marius@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2K6niX082129; Tue, 2 Nov 2010 20:06:49 GMT (envelope-from marius@svn.freebsd.org) Message-Id: <201011022006.oA2K6niX082129@svn.freebsd.org> From: Marius Strobl Date: Tue, 2 Nov 2010 20:06:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214685 - in stable/7/sys: dev/mii modules/mii X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 20:06:50 -0000 Author: marius Date: Tue Nov 2 20:06:49 2010 New Revision: 214685 URL: http://svn.freebsd.org/changeset/base/214685 Log: MFC: r213878 Add a NetBSD-compatible mii_attach(), which is intended to eventually replace mii_phy_probe() altogether. Compared to the latter the advantages of mii_attach() are: - intended to be called multiple times in order to attach PHYs in multiple passes (f.e. in order to only use sub-ranges of the 0 to MII_NPHY - 1 range) - being able to pass along the capability mask from the NIC to the PHY drivers - being able to specify at which address (phyloc) to probe for a PHY (instead of always probing at all addresses from 0 to MII_NPHY - 1) - being able to specify which PHY instance (offloc) to attach - being able to pass along MIIF_* flags from the NIC to the PHY drivers (f.e. as required to indicated to the PHY drivers that flow control is supported by the NIC driver, which actually is the motivation for this change). While at it, I used the opportunity to get rid of some hacks in mii(4) like miibus_probe() generally doing work besides sheer probing and the "EVIL HACK" (which will vanish entirely along with mii_phy_probe()) by passing the struct ifnet pointer via an argument of mii_attach() as well as to fix some resource leaks in mii(4) in case something fails. Commits which will update the PHY drivers to honor the MII flags passed down from the NIC drivers and take advantage of mii_attach() to get rid of certain types of hacks in NIC and PHY drivers as well as a conversion of the remaining uses of mii_phy_probe() will follow shortly. Reviewed by: jhb, yongari Obtained from: NetBSD (partially) Modified: stable/7/sys/dev/mii/mii.c stable/7/sys/dev/mii/mii.h stable/7/sys/dev/mii/miivar.h stable/7/sys/modules/mii/Makefile Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/mii/mii.c ============================================================================== --- stable/7/sys/dev/mii/mii.c Tue Nov 2 20:06:46 2010 (r214684) +++ stable/7/sys/dev/mii/mii.c Tue Nov 2 20:06:49 2010 (r214685) @@ -58,6 +58,8 @@ MODULE_VERSION(miibus, 1); #include "miibus_if.h" static int miibus_print_child(device_t dev, device_t child); +static int miibus_read_ivar(device_t dev, device_t child, int which, + uintptr_t *result); static int miibus_child_location_str(device_t bus, device_t child, char *buf, size_t buflen); static int miibus_child_pnpinfo_str(device_t bus, device_t child, char *buf, @@ -77,6 +79,7 @@ static device_method_t miibus_methods[] /* bus interface */ DEVMETHOD(bus_print_child, miibus_print_child), + DEVMETHOD(bus_read_ivar, miibus_read_ivar), DEVMETHOD(bus_driver_added, bus_generic_driver_added), DEVMETHOD(bus_child_pnpinfo_str, miibus_child_pnpinfo_str), DEVMETHOD(bus_child_location_str, miibus_child_location_str), @@ -100,85 +103,50 @@ driver_t miibus_driver = { }; struct miibus_ivars { + struct ifnet *ifp; ifm_change_cb_t ifmedia_upd; ifm_stat_cb_t ifmedia_sts; + int mii_flags; }; -/* - * Helper function used by network interface drivers, attaches PHYs - * to the network interface driver parent. - */ int miibus_probe(device_t dev) { - struct mii_attach_args ma, *args; - struct mii_data *mii; - device_t child = NULL, parent; - int bmsr, capmask = 0xFFFFFFFF; - - mii = device_get_softc(dev); - parent = device_get_parent(dev); - LIST_INIT(&mii->mii_phys); - - for (ma.mii_phyno = 0; ma.mii_phyno < MII_NPHY; ma.mii_phyno++) { - /* - * Check to see if there is a PHY at this address. Note, - * many braindead PHYs report 0/0 in their ID registers, - * so we test for media in the BMSR. - */ - bmsr = MIIBUS_READREG(parent, ma.mii_phyno, MII_BMSR); - if (bmsr == 0 || bmsr == 0xffff || - (bmsr & (BMSR_EXTSTAT | BMSR_MEDIAMASK)) == 0) { - /* Assume no PHY at this address. */ - continue; - } - - /* - * Extract the IDs. Braindead PHYs will be handled by - * the `ukphy' driver, as we have no ID information to - * match on. - */ - ma.mii_id1 = MIIBUS_READREG(parent, ma.mii_phyno, - MII_PHYIDR1); - ma.mii_id2 = MIIBUS_READREG(parent, ma.mii_phyno, - MII_PHYIDR2); - - ma.mii_data = mii; - ma.mii_capmask = capmask; - - args = malloc(sizeof(struct mii_attach_args), - M_DEVBUF, M_NOWAIT); - bcopy((char *)&ma, (char *)args, sizeof(ma)); - child = device_add_child(dev, NULL, -1); - device_set_ivars(child, args); - } - - if (child == NULL) - return (ENXIO); device_set_desc(dev, "MII bus"); - return (0); + return (BUS_PROBE_SPECIFIC); } int miibus_attach(device_t dev) { struct miibus_ivars *ivars; + struct mii_attach_args *ma; struct mii_data *mii; + device_t *children; + int i, nchildren; mii = device_get_softc(dev); - /* - * Note that each NIC's softc must start with an ifnet pointer. - * XXX: EVIL HACK! - */ - mii->mii_ifp = *(struct ifnet**)device_get_softc(device_get_parent(dev)); + nchildren = 0; + if (device_get_children(dev, &children, &nchildren) == 0) { + for (i = 0; i < nchildren; i++) { + ma = device_get_ivars(children[i]); + ma->mii_data = mii; + } + free(children, M_TEMP); + } + if (nchildren == 0) { + device_printf(dev, "cannot get children"); + return (ENXIO); + } ivars = device_get_ivars(dev); ifmedia_init(&mii->mii_media, IFM_IMASK, ivars->ifmedia_upd, ivars->ifmedia_sts); - bus_generic_attach(dev); + mii->mii_ifp = ivars->ifp; + LIST_INIT(&mii->mii_phys); - return (0); + return (bus_generic_attach(dev)); } int @@ -209,7 +177,28 @@ miibus_print_child(device_t dev, device_ } static int -miibus_child_pnpinfo_str(device_t bus, device_t child, char *buf, +miibus_read_ivar(device_t dev, device_t child __unused, int which, + uintptr_t *result) +{ + struct miibus_ivars *ivars; + + /* + * NB: this uses the instance variables of the miibus rather than + * its PHY children. + */ + ivars = device_get_ivars(dev); + switch (which) { + case MIIBUS_IVAR_FLAGS: + *result = ivars->mii_flags; + break; + default: + return (ENOENT); + } + return (0); +} + +static int +miibus_child_pnpinfo_str(device_t bus __unused, device_t child, char *buf, size_t buflen) { struct mii_attach_args *ma; @@ -222,7 +211,7 @@ miibus_child_pnpinfo_str(device_t bus, d } static int -miibus_child_location_str(device_t bus, device_t child, char *buf, +miibus_child_location_str(device_t bus __unused, device_t child, char *buf, size_t buflen) { struct mii_attach_args *ma; @@ -316,40 +305,177 @@ miibus_mediainit(device_t dev) ifmedia_set(&mii->mii_media, media); } +/* + * Helper function used by network interface drivers, attaches the miibus and + * the PHYs to the network interface driver parent. + */ int -mii_phy_probe(device_t dev, device_t *child, ifm_change_cb_t ifmedia_upd, - ifm_stat_cb_t ifmedia_sts) -{ - struct miibus_ivars *ivars; - int bmsr, i; +mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp, + ifm_change_cb_t ifmedia_upd, ifm_stat_cb_t ifmedia_sts, int capmask, + int phyloc, int offloc, int flags) +{ + struct miibus_ivars *ivars; + struct mii_attach_args ma, *args; + device_t *children, phy; + int bmsr, first, i, nchildren, offset, phymax, phymin, rv; + + if (phyloc != MII_PHY_ANY && offloc != MII_OFFSET_ANY) { + printf("%s: phyloc and offloc specified", __func__); + return (EINVAL); + } - ivars = malloc(sizeof(*ivars), M_DEVBUF, M_NOWAIT); - if (ivars == NULL) - return (ENOMEM); - ivars->ifmedia_upd = ifmedia_upd; - ivars->ifmedia_sts = ifmedia_sts; - *child = device_add_child(dev, "miibus", -1); - device_set_ivars(*child, ivars); - - for (i = 0; i < MII_NPHY; i++) { - bmsr = MIIBUS_READREG(dev, i, MII_BMSR); - if (bmsr == 0 || bmsr == 0xffff || - (bmsr & (BMSR_EXTSTAT | BMSR_MEDIAMASK)) == 0) { - /* Assume no PHY at this address. */ - continue; - } else - break; + if (offloc != MII_OFFSET_ANY && (offloc < 0 || offloc >= MII_NPHY)) { + printf("%s: ivalid offloc %d", __func__, offloc); + return (EINVAL); } - if (i == MII_NPHY) { - device_delete_child(dev, *child); - *child = NULL; - return (ENXIO); + if (phyloc == MII_PHY_ANY) { + phymin = 0; + phymax = MII_NPHY - 1; + } else { + if (phyloc < 0 || phyloc >= MII_NPHY) { + printf("%s: ivalid phyloc %d", __func__, phyloc); + return (EINVAL); + } + phymin = phymax = phyloc; } - bus_generic_attach(dev); + first = 0; + if (*miibus == NULL) { + first = 1; + ivars = malloc(sizeof(*ivars), M_DEVBUF, M_NOWAIT); + if (ivars == NULL) + return (ENOMEM); + ivars->ifp = ifp; + ivars->ifmedia_upd = ifmedia_upd; + ivars->ifmedia_sts = ifmedia_sts; + ivars->mii_flags = flags; + *miibus = device_add_child(dev, "miibus", -1); + if (*miibus == NULL) { + rv = ENXIO; + goto fail; + } + device_set_ivars(*miibus, ivars); + } else { + ivars = device_get_ivars(*miibus); + if (ivars->ifp != ifp || ivars->ifmedia_upd != ifmedia_upd || + ivars->ifmedia_sts != ifmedia_sts || + ivars->mii_flags != flags) { + printf("%s: non-matching invariant", __func__); + return (EINVAL); + } + /* + * Assignment of the attach arguments mii_data for the first + * pass is done in miibus_attach(), i.e. once the miibus softc + * has been allocated. + */ + ma.mii_data = device_get_softc(*miibus); + } + + ma.mii_capmask = capmask; + + phy = NULL; + offset = 0; + for (ma.mii_phyno = phymin; ma.mii_phyno <= phymax; ma.mii_phyno++) { + /* + * Make sure we haven't already configured a PHY at this + * address. This allows mii_attach() to be called + * multiple times. + */ + if (device_get_children(*miibus, &children, &nchildren) == 0) { + for (i = 0; i < nchildren; i++) { + args = device_get_ivars(children[i]); + if (args->mii_phyno == ma.mii_phyno) { + /* + * Yes, there is already something + * configured at this address. + */ + free(children, M_TEMP); + goto skip; + } + } + free(children, M_TEMP); + } + + /* + * Check to see if there is a PHY at this address. Note, + * many braindead PHYs report 0/0 in their ID registers, + * so we test for media in the BMSR. + */ + bmsr = MIIBUS_READREG(dev, ma.mii_phyno, MII_BMSR); + if (bmsr == 0 || bmsr == 0xffff || + (bmsr & (BMSR_EXTSTAT | BMSR_MEDIAMASK)) == 0) { + /* Assume no PHY at this address. */ + continue; + } + + /* + * There is a PHY at this address. If we were given an + * `offset' locator, skip this PHY if it doesn't match. + */ + if (offloc != MII_OFFSET_ANY && offloc != offset) + goto skip; + + /* + * Extract the IDs. Braindead PHYs will be handled by + * the `ukphy' driver, as we have no ID information to + * match on. + */ + ma.mii_id1 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR1); + ma.mii_id2 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR2); + + args = malloc(sizeof(struct mii_attach_args), M_DEVBUF, + M_NOWAIT); + if (args == NULL) + goto skip; + bcopy((char *)&ma, (char *)args, sizeof(ma)); + phy = device_add_child(*miibus, NULL, -1); + if (phy == NULL) { + free(args, M_DEVBUF); + goto skip; + } + device_set_ivars(phy, args); + skip: + offset++; + } + + if (first != 0) { + if (phy == NULL) { + rv = ENXIO; + goto fail; + } + rv = bus_generic_attach(dev); + if (rv != 0) + goto fail; + } + rv = bus_generic_attach(*miibus); + if (rv != 0) + goto fail; return (0); + + fail: + if (*miibus != NULL) + device_delete_child(dev, *miibus); + free(ivars, M_DEVBUF); + if (first != 0) + *miibus = NULL; + return (rv); +} + +int +mii_phy_probe(device_t dev, device_t *child, ifm_change_cb_t ifmedia_upd, + ifm_stat_cb_t ifmedia_sts) +{ + struct ifnet *ifp; + + /* + * Note that each NIC's softc must start with an ifnet pointer. + * XXX: EVIL HACK! + */ + ifp = *(struct ifnet **)device_get_softc(dev); + return (mii_attach(dev, child, ifp, ifmedia_upd, ifmedia_sts, + BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0)); } /* Modified: stable/7/sys/dev/mii/mii.h ============================================================================== --- stable/7/sys/dev/mii/mii.h Tue Nov 2 20:06:46 2010 (r214684) +++ stable/7/sys/dev/mii/mii.h Tue Nov 2 20:06:49 2010 (r214685) @@ -1,5 +1,5 @@ /* $NetBSD: mii.h,v 1.9 2001/05/31 03:07:14 thorpej Exp $ */ - + /*- * Copyright (c) 1997 Manuel Bouyer. All rights reserved. * @@ -47,7 +47,7 @@ #define MII_COMMAND_WRITE 0x01 #define MII_COMMAND_ACK 0x02 -#define MII_BMCR 0x00 /* Basic mode control register (rw) */ +#define MII_BMCR 0x00 /* Basic mode control register (rw) */ #define BMCR_RESET 0x8000 /* reset */ #define BMCR_LOOP 0x4000 /* loopback */ #define BMCR_SPEED0 0x2000 /* speed selection (LSB) */ @@ -82,6 +82,8 @@ #define BMSR_JABBER 0x0002 /* Jabber detected */ #define BMSR_EXTCAP 0x0001 /* Extended capability */ +#define BMSR_DEFCAPMASK 0xffffffff + /* * Note that the EXTSTAT bit indicates that there is extended status * info available in register 15, but 802.3 section 22.2.4.3 also Modified: stable/7/sys/dev/mii/miivar.h ============================================================================== --- stable/7/sys/dev/mii/miivar.h Tue Nov 2 20:06:46 2010 (r214684) +++ stable/7/sys/dev/mii/miivar.h Tue Nov 2 20:06:49 2010 (r214685) @@ -102,7 +102,7 @@ typedef int (*mii_downcall_t)(struct mii */ struct mii_softc { device_t mii_dev; /* generic device glue */ - + LIST_ENTRY(mii_softc) mii_list; /* entry on parent's PHY list */ int mii_phy; /* our MII address */ @@ -122,16 +122,22 @@ struct mii_softc { typedef struct mii_softc mii_softc_t; /* mii_flags */ -#define MIIF_INITDONE 0x0001 /* has been initialized (mii_data) */ -#define MIIF_NOISOLATE 0x0002 /* do not isolate the PHY */ -#define MIIF_NOLOOP 0x0004 /* no loopback capability */ -#define MIIF_AUTOTSLEEP 0x0010 /* use tsleep(), not callout() */ -#define MIIF_HAVEFIBER 0x0020 /* from parent: has fiber interface */ -#define MIIF_HAVE_GTCR 0x0040 /* has 100base-T2/1000base-T CR */ -#define MIIF_IS_1000X 0x0080 /* is a 1000BASE-X device */ -#define MIIF_DOPAUSE 0x0100 /* advertise PAUSE capability */ -#define MIIF_IS_HPNA 0x0200 /* is a HomePNA device */ -#define MIIF_FORCEANEG 0x0400 /* force auto-negotiation */ +#define MIIF_INITDONE 0x00000001 /* has been initialized (mii_data) */ +#define MIIF_NOISOLATE 0x00000002 /* do not isolate the PHY */ +#define MIIF_NOLOOP 0x00000004 /* no loopback capability */ +#define MIIF_AUTOTSLEEP 0x00000010 /* use tsleep(), not callout() */ +#define MIIF_HAVEFIBER 0x00000020 /* from parent: has fiber interface */ +#define MIIF_HAVE_GTCR 0x00000040 /* has 100base-T2/1000base-T CR */ +#define MIIF_IS_1000X 0x00000080 /* is a 1000BASE-X device */ +#define MIIF_DOPAUSE 0x00000100 /* advertise PAUSE capability */ +#define MIIF_IS_HPNA 0x00000200 /* is a HomePNA device */ +#define MIIF_FORCEANEG 0x00000400 /* force auto-negotiation */ +#define MIIF_MACPRIV0 0x01000000 /* private to the MAC driver */ +#define MIIF_MACPRIV1 0x02000000 /* private to the MAC driver */ +#define MIIF_MACPRIV2 0x04000000 /* private to the MAC driver */ +#define MIIF_PHYPRIV0 0x10000000 /* private to the PHY driver */ +#define MIIF_PHYPRIV1 0x20000000 /* private to the PHY driver */ +#define MIIF_PHYPRIV2 0x40000000 /* private to the PHY driver */ /* Default mii_anegticks values */ #define MII_ANEGTICKS 5 @@ -140,6 +146,14 @@ typedef struct mii_softc mii_softc_t; #define MIIF_INHERIT_MASK (MIIF_NOISOLATE|MIIF_NOLOOP|MIIF_AUTOTSLEEP) /* + * Special `locators' passed to mii_attach(). If one of these is not + * an `any' value, we look for *that* PHY and configure it. If both + * are not `any', that is an error, and mii_attach() will panic. + */ +#define MII_OFFSET_ANY -1 +#define MII_PHY_ANY -1 + +/* * Used to attach a PHY to a parent. */ struct mii_attach_args { @@ -192,6 +206,18 @@ struct mii_media { #define PHY_WRITE(p, r, v) \ MIIBUS_WRITEREG((p)->mii_dev, (p)->mii_phy, (r), (v)) +enum miibus_device_ivars { + MIIBUS_IVAR_FLAGS +}; + +/* + * Simplified accessors for miibus + */ +#define MIIBUS_ACCESSOR(var, ivar, type) \ + __BUS_ACCESSOR(miibus, var, MIIBUS, ivar, type) + +MIIBUS_ACCESSOR(flags, FLAGS, int) + extern devclass_t miibus_devclass; extern driver_t miibus_driver; @@ -199,6 +225,8 @@ int miibus_probe(device_t); int miibus_attach(device_t); int miibus_detach(device_t); +int mii_attach(device_t, device_t *, struct ifnet *, ifm_change_cb_t, + ifm_stat_cb_t, int, int, int, int); int mii_anar(int); void mii_down(struct mii_data *); int mii_mediachg(struct mii_data *); Modified: stable/7/sys/modules/mii/Makefile ============================================================================== --- stable/7/sys/modules/mii/Makefile Tue Nov 2 20:06:46 2010 (r214684) +++ stable/7/sys/modules/mii/Makefile Tue Nov 2 20:06:49 2010 (r214685) @@ -11,7 +11,8 @@ SRCS+= rgephy.c rlphy.c ruephy.c tdkphy. SRCS+= ukphy_subr.c SRCS+= xmphy.c -EXPORT_SYMS= mii_mediachg \ +EXPORT_SYMS= mii_attach \ + mii_mediachg \ mii_phy_probe \ mii_phy_reset \ mii_pollstat \ From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 21:01:18 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id CA11B106566C; Tue, 2 Nov 2010 21:01:18 +0000 (UTC) (envelope-from marius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id B713A8FC13; Tue, 2 Nov 2010 21:01:18 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2L1InD087605; Tue, 2 Nov 2010 21:01:18 GMT (envelope-from marius@svn.freebsd.org) Received: (from marius@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2L1I4I087602; Tue, 2 Nov 2010 21:01:18 GMT (envelope-from marius@svn.freebsd.org) Message-Id: <201011022101.oA2L1I4I087602@svn.freebsd.org> From: Marius Strobl Date: Tue, 2 Nov 2010 21:01:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214688 - stable/7/sys/dev/dc X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 21:01:18 -0000 Author: marius Date: Tue Nov 2 21:01:18 2010 New Revision: 214688 URL: http://svn.freebsd.org/changeset/base/214688 Log: MFC: r201430 Fix card/device names, no functional change The ADMtek AN985 is the cardbus variant of ADMtek AN983 The Netgear FA511 is just a relabled ADMtek AN985 PR: kern/50574 Modified: stable/7/sys/dev/dc/if_dc.c stable/7/sys/dev/dc/if_dcreg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/dc/if_dc.c ============================================================================== --- stable/7/sys/dev/dc/if_dc.c Tue Nov 2 21:01:15 2010 (r214687) +++ stable/7/sys/dev/dc/if_dc.c Tue Nov 2 21:01:18 2010 (r214688) @@ -43,8 +43,9 @@ __FBSDID("$FreeBSD$"); * ASIX Electronics AX88140A (www.asix.com.tw) * ASIX Electronics AX88141 (www.asix.com.tw) * ADMtek AL981 (www.admtek.com.tw) - * ADMtek AN985 (www.admtek.com.tw) - * Netgear FA511 (www.netgear.com) Appears to be rebadged ADMTek AN985 + * ADMtek AN983 (www.admtek.com.tw) + * ADMtek cardbus AN985 (www.admtek.com.tw) + * Netgear FA511 (www.netgear.com) Appears to be rebadged ADMTek cardbus AN985 * Davicom DM9100, DM9102, DM9102A (www.davicom8.com) * Accton EN1217 (www.accton.com) * Xircom X3201 (www.xircom.com) @@ -161,14 +162,14 @@ static const struct dc_type dc_devs[] = "Davicom DM9102 10/100BaseTX" }, { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_AL981), 0, "ADMtek AL981 10/100BaseTX" }, + { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_AN983), 0, + "ADMtek AN983 10/100BaseTX" }, { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_AN985), 0, - "ADMtek AN985 10/100BaseTX" }, + "ADMtek AN985 cardBus 10/100BaseTX or clone" }, { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_ADM9511), 0, "ADMtek ADM9511 10/100BaseTX" }, { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_ADM9513), 0, "ADMtek ADM9513 10/100BaseTX" }, - { DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_FA511), 0, - "Netgear FA511 10/100BaseTX" }, { DC_DEVID(DC_VENDORID_ASIX, DC_DEVICEID_AX88140A), DC_REVISION_88141, "ASIX AX88141 10/100BaseTX" }, { DC_DEVID(DC_VENDORID_ASIX, DC_DEVICEID_AX88140A), 0, @@ -779,10 +780,10 @@ dc_miibus_readreg(device_t dev, int phy, bzero(&frame, sizeof(frame)); /* - * Note: both the AL981 and AN985 have internal PHYs, + * Note: both the AL981 and AN983 have internal PHYs, * however the AL981 provides direct access to the PHY - * registers while the AN985 uses a serial MII interface. - * The AN985's MII interface is also buggy in that you + * registers while the AN983 uses a serial MII interface. + * The AN983's MII interface is also buggy in that you * can read from any MII address (0 to 31), but only address 1 * behaves normally. To deal with both cases, we pretend * that the PHY is at MII address 1. @@ -1896,11 +1897,11 @@ dc_attach(device_t dev) sc->dc_pmode = DC_PMODE_MII; dc_read_srom(sc, sc->dc_romwidth); break; + case DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_AN983): case DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_AN985): case DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_ADM9511): case DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_ADM9513): case DC_DEVID(DC_VENDORID_DLINK, DC_DEVICEID_DRP32TXD): - case DC_DEVID(DC_VENDORID_ADMTEK, DC_DEVICEID_FA511): case DC_DEVID(DC_VENDORID_ABOCOM, DC_DEVICEID_FE2500): case DC_DEVID(DC_VENDORID_ABOCOM, DC_DEVICEID_FE2500MX): case DC_DEVID(DC_VENDORID_ACCTON, DC_DEVICEID_EN2242): @@ -1911,7 +1912,7 @@ dc_attach(device_t dev) case DC_DEVID(DC_VENDORID_MICROSOFT, DC_DEVICEID_MSMN130): case DC_DEVID(DC_VENDORID_LINKSYS, DC_DEVICEID_PCMPC200_AB08): case DC_DEVID(DC_VENDORID_LINKSYS, DC_DEVICEID_PCMPC200_AB09): - sc->dc_type = DC_TYPE_AN985; + sc->dc_type = DC_TYPE_AN983; sc->dc_flags |= DC_64BIT_HASH; sc->dc_flags |= DC_TX_USE_TX_INTR; sc->dc_flags |= DC_TX_ADMTEK_WAR; @@ -2058,7 +2059,7 @@ dc_attach(device_t dev) dc_read_eeprom(sc, (caddr_t)&eaddr, DC_EE_NODEADDR, 3, 0); break; case DC_TYPE_AL981: - case DC_TYPE_AN985: + case DC_TYPE_AN983: reg = CSR_READ_4(sc, DC_AL_PAR0); mac = (uint8_t *)&eaddr[0]; mac[0] = (reg >> 0) & 0xff; Modified: stable/7/sys/dev/dc/if_dcreg.h ============================================================================== --- stable/7/sys/dev/dc/if_dcreg.h Tue Nov 2 21:01:15 2010 (r214687) +++ stable/7/sys/dev/dc/if_dcreg.h Tue Nov 2 21:01:18 2010 (r214688) @@ -72,7 +72,7 @@ #define DC_TYPE_21143 0x4 /* Intel 21143 */ #define DC_TYPE_ASIX 0x5 /* ASIX AX88140A/AX88141 */ #define DC_TYPE_AL981 0x6 /* ADMtek AL981 Comet */ -#define DC_TYPE_AN985 0x7 /* ADMtek AN985 Centaur */ +#define DC_TYPE_AN983 0x7 /* ADMtek AN983 Centaur */ #define DC_TYPE_DM9102 0x8 /* Davicom DM9102 */ #define DC_TYPE_PNICII 0x9 /* 82c115 PNIC II */ #define DC_TYPE_PNIC 0xA /* 82c168/82c169 PNIC I */ @@ -86,12 +86,12 @@ #define DC_IS_ADMTEK(x) \ (x->dc_type == DC_TYPE_AL981 || \ - x->dc_type == DC_TYPE_AN985) + x->dc_type == DC_TYPE_AN983) #define DC_IS_INTEL(x) (x->dc_type == DC_TYPE_21143) #define DC_IS_ASIX(x) (x->dc_type == DC_TYPE_ASIX) #define DC_IS_COMET(x) (x->dc_type == DC_TYPE_AL981) -#define DC_IS_CENTAUR(x) (x->dc_type == DC_TYPE_AN985) +#define DC_IS_CENTAUR(x) (x->dc_type == DC_TYPE_AN983) #define DC_IS_DAVICOM(x) (x->dc_type == DC_TYPE_DM9102) #define DC_IS_PNICII(x) (x->dc_type == DC_TYPE_PNICII) #define DC_IS_PNIC(x) (x->dc_type == DC_TYPE_PNIC) @@ -543,8 +543,8 @@ struct dc_mii_frame { */ /* - * ADMtek specific registers and constants for the AL981 and AN985. - * The AN985 doesn't use the magic PHY registers. + * ADMtek specific registers and constants for the AL981 and AN983. + * The AN983 doesn't use the magic PHY registers. */ #define DC_AL_CR 0x88 /* command register */ #define DC_AL_PAR0 0xA4 /* station address */ @@ -890,8 +890,8 @@ struct dc_softc { * ADMtek device IDs. */ #define DC_DEVICEID_AL981 0x0981 -#define DC_DEVICEID_AN985 0x0985 -#define DC_DEVICEID_FA511 0x1985 +#define DC_DEVICEID_AN983 0x0985 +#define DC_DEVICEID_AN985 0x1985 #define DC_DEVICEID_ADM9511 0x9511 #define DC_DEVICEID_ADM9513 0x9513 From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 22:12:13 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 50AF31065673; Tue, 2 Nov 2010 22:12:13 +0000 (UTC) (envelope-from marius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 3DE848FC22; Tue, 2 Nov 2010 22:12:13 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2MCDvM092504; Tue, 2 Nov 2010 22:12:13 GMT (envelope-from marius@svn.freebsd.org) Received: (from marius@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2MCD67092502; Tue, 2 Nov 2010 22:12:13 GMT (envelope-from marius@svn.freebsd.org) Message-Id: <201011022212.oA2MCD67092502@svn.freebsd.org> From: Marius Strobl Date: Tue, 2 Nov 2010 22:12:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214691 - stable/7/sys/boot/sparc64/loader X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 22:12:13 -0000 Author: marius Date: Tue Nov 2 22:12:12 2010 New Revision: 214691 URL: http://svn.freebsd.org/changeset/base/214691 Log: MFC: r214526 Partially revert r203829 (MFC'ed to stable/7 in r205921); as it turns out what the PowerPC OFW loader did was incorrect as further down the road cons_probe() calls malloc() so the former can't be called before init_heap() has succeed. Instead just exit to the firmware in case init_heap() fails like OF_init() does when hitting a problem as we're then likely running in a very broken environment where hardly anything can be trusted to work. Modified: stable/7/sys/boot/sparc64/loader/main.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/boot/sparc64/loader/main.c ============================================================================== --- stable/7/sys/boot/sparc64/loader/main.c Tue Nov 2 22:12:06 2010 (r214690) +++ stable/7/sys/boot/sparc64/loader/main.c Tue Nov 2 22:12:12 2010 (r214691) @@ -812,15 +812,15 @@ main(int (*openfirm)(void *)) archsw.arch_autoload = sparc64_autoload; archsw.arch_maphint = sparc64_maphint; + if (init_heap() == (vm_offset_t)-1) + OF_exit(); + setheap((void *)heapva, (void *)(heapva + HEAPSZ)); + /* * Probe for a console. */ cons_probe(); - if (init_heap() == (vm_offset_t)-1) - panic("%s: can't claim heap", __func__); - setheap((void *)heapva, (void *)(heapva + HEAPSZ)); - if ((root = OF_peer(0)) == -1) panic("%s: can't get root phandle", __func__); OF_getprop(root, "compatible", compatible, sizeof(compatible)); From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 22:48:52 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id E45C31065674; Tue, 2 Nov 2010 22:48:52 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id D18968FC1A; Tue, 2 Nov 2010 22:48:52 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2Mmqm1094687; Tue, 2 Nov 2010 22:48:52 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2MmqSL094684; Tue, 2 Nov 2010 22:48:52 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022248.oA2MmqSL094684@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 22:48:52 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214696 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 22:48:53 -0000 Author: yongari Date: Tue Nov 2 22:48:52 2010 New Revision: 214696 URL: http://svn.freebsd.org/changeset/base/214696 Log: MFC r213081,213225,213280: r213081: Always show asic/chip revision in device attach phase. There are too many bge(4) controllers there and model name does not necessarily match asic/chip revision. Relying on VPD string made it hard to identify exact asic/chip revision so the first step to debug bge(4) was getting exact asic/chip information with verbose boot which may not be available on production server. r213255: Set the number of RX frames to receive after RX MBUF low watermark has reached. This reduced number of dropped frames when flow-control is enabled. Previously it dropped incoming frames once RX MBUF low watermark has reached. The value used in MAC RX MBUF low watermark is greater than or equal to 4 so receiving two more RX frames should not be a problem. Obtained from: OpenBSD r213280: After r207391, brgphy(4) passes resolved flow-control settings to parent driver. Use that information to configure flow-control. One drawback is there is no way to disable flow-control as we still don't have proper way to not advertise RX/TX pause capability to link partner. But I don't think it would cause severe problems and users can selectively disable flow-control in switch port. Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 22:48:18 2010 (r214695) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 22:48:52 2010 (r214696) @@ -882,10 +882,21 @@ bge_miibus_statchg(device_t dev) else BGE_SETBIT(sc, BGE_MAC_MODE, BGE_PORTMODE_MII); - if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) + if (IFM_OPTIONS(mii->mii_media_active & IFM_FDX) != 0) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); - else + if (IFM_OPTIONS(mii->mii_media_active) & IFM_FLAG1) + BGE_SETBIT(sc, BGE_TX_MODE, BGE_TXMODE_FLOWCTL_ENABLE); + else + BGE_CLRBIT(sc, BGE_TX_MODE, BGE_TXMODE_FLOWCTL_ENABLE); + if (IFM_OPTIONS(mii->mii_media_active) & IFM_FLAG0) + BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_FLOWCTL_ENABLE); + else + BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_FLOWCTL_ENABLE); + } else { BGE_SETBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); + BGE_CLRBIT(sc, BGE_TX_MODE, BGE_TXMODE_FLOWCTL_ENABLE); + BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_FLOWCTL_ENABLE); + } } /* @@ -2644,12 +2655,11 @@ bge_attach(device_t dev) goto fail; } - if (bootverbose) - device_printf(dev, - "CHIP ID 0x%08x; ASIC REV 0x%02x; CHIP REV 0x%02x; %s\n", - sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev, - (sc->bge_flags & BGE_FLAG_PCIX) ? "PCI-X" : - ((sc->bge_flags & BGE_FLAG_PCIE) ? "PCI-E" : "PCI")); + device_printf(dev, + "CHIP ID 0x%08x; ASIC REV 0x%02x; CHIP REV 0x%02x; %s\n", + sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev, + (sc->bge_flags & BGE_FLAG_PCIX) ? "PCI-X" : + ((sc->bge_flags & BGE_FLAG_PCIE) ? "PCI-E" : "PCI")); BGE_LOCK_INIT(sc, device_get_nameunit(dev)); @@ -4219,6 +4229,14 @@ bge_init_locked(struct bge_softc *sc) /* Turn on receiver. */ BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE); + /* + * Set the number of good frames to receive after RX MBUF + * Low Watermark has been reached. After the RX MAC receives + * this number of frames, it will drop subsequent incoming + * frames until the MBUF High Watermark is reached. + */ + CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2); + /* Tell firmware we're alive. */ BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 22:48:18 2010 (r214695) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 22:48:52 2010 (r214696) @@ -632,6 +632,7 @@ #define BGE_RX_BD_RULES_CTL15 0x04F8 #define BGE_RX_BD_RULES_MASKVAL15 0x04FC #define BGE_RX_RULES_CFG 0x0500 +#define BGE_MAX_RX_FRAME_LOWAT 0x0504 #define BGE_SERDES_CFG 0x0590 #define BGE_SERDES_STS 0x0594 #define BGE_SGDIG_CFG 0x05B0 From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 22:59:46 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 1C3031065673; Tue, 2 Nov 2010 22:59:46 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 0823E8FC1D; Tue, 2 Nov 2010 22:59:46 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2Mxk8F095772; Tue, 2 Nov 2010 22:59:46 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2MxjgW095769; Tue, 2 Nov 2010 22:59:45 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022259.oA2MxjgW095769@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 22:59:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214699 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 22:59:46 -0000 Author: yongari Date: Tue Nov 2 22:59:45 2010 New Revision: 214699 URL: http://svn.freebsd.org/changeset/base/214699 Log: MFC r213283,213410: r213283: Implement hardware MAC statistics for BCM5705 or newer Broadcom controllers. bge(4) exported MAC statistics on controllers that maintain the statistics in the NIC's internal memory. Newer controllers require register access to fetch these values. These counters provide useful information to diagnose driver issues. r213410: Consistently use ifHCOutOctets/ifHCInOctets instead of Octets as these names are used in data sheet. Also use UnicastPkts, MulticastPkts and BroadcastPkts instead of UcastPkts, McastPkts and BcastPkts to clarify its meaning. Suggested by: bde Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 22:57:20 2010 (r214698) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 22:59:45 2010 (r214699) @@ -371,6 +371,7 @@ static void bge_rxeof(struct bge_softc * static void bge_asf_driver_up (struct bge_softc *); static void bge_tick(void *); +static void bge_stats_clear_regs(struct bge_softc *); static void bge_stats_update(struct bge_softc *); static void bge_stats_update_regs(struct bge_softc *); static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *, @@ -453,6 +454,10 @@ static int bge_sysctl_reg_read(SYSCTL_HA static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS); #endif static void bge_add_sysctls(struct bge_softc *); +static void bge_add_sysctl_stats_regs(struct bge_softc *, + struct sysctl_ctx_list *, struct sysctl_oid_list *); +static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, + struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); static device_method_t bge_methods[] = { @@ -3726,15 +3731,127 @@ static void bge_stats_update_regs(struct bge_softc *sc) { struct ifnet *ifp; + struct bge_mac_stats *stats; ifp = sc->bge_ifp; + stats = &sc->bge_mac_stats; - ifp->if_collisions += CSR_READ_4(sc, BGE_MAC_STATS + - offsetof(struct bge_mac_stats_regs, etherStatsCollisions)); + stats->ifHCOutOctets += + CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); + stats->etherStatsCollisions += + CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); + stats->outXonSent += + CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); + stats->outXoffSent += + CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); + stats->dot3StatsInternalMacTransmitErrors += + CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); + stats->dot3StatsSingleCollisionFrames += + CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); + stats->dot3StatsMultipleCollisionFrames += + CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); + stats->dot3StatsDeferredTransmissions += + CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); + stats->dot3StatsExcessiveCollisions += + CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); + stats->dot3StatsLateCollisions += + CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); + stats->ifHCOutUcastPkts += + CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); + stats->ifHCOutMulticastPkts += + CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); + stats->ifHCOutBroadcastPkts += + CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); + + stats->ifHCInOctets += + CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); + stats->etherStatsFragments += + CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); + stats->ifHCInUcastPkts += + CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); + stats->ifHCInMulticastPkts += + CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); + stats->ifHCInBroadcastPkts += + CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); + stats->dot3StatsFCSErrors += + CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); + stats->dot3StatsAlignmentErrors += + CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); + stats->xonPauseFramesReceived += + CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); + stats->xoffPauseFramesReceived += + CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); + stats->macControlFramesReceived += + CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); + stats->xoffStateEntered += + CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); + stats->dot3StatsFramesTooLong += + CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); + stats->etherStatsJabbers += + CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); + stats->etherStatsUndersizePkts += + CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); + + stats->FramesDroppedDueToFilters += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); + stats->DmaWriteQueueFull += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); + stats->DmaWriteHighPriQueueFull += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); + stats->NoMoreRxBDs += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); + stats->InputDiscards += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); + stats->InputErrors += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); + stats->RecvThresholdHit += + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); + + ifp->if_collisions = (u_long)stats->etherStatsCollisions; + ifp->if_ierrors = (u_long)(stats->NoMoreRxBDs + stats->InputDiscards + + stats->InputErrors); +} + +static void +bge_stats_clear_regs(struct bge_softc *sc) +{ - ifp->if_ierrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); - ifp->if_ierrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); - ifp->if_ierrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); + CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); + CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); + CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); + CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); + CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); + CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); + CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); + CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); + CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); + CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); + CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); + CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); + CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); + + CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); + CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); + CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); + CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); + CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); + CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); + CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); + CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); + CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); + CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); + CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); + CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); + CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); + CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); + + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); + CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); } static void @@ -4237,6 +4354,10 @@ bge_init_locked(struct bge_softc *sc) */ CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2); + /* Clear MAC statistics. */ + if (BGE_IS_5705_PLUS(sc)) + bge_stats_clear_regs(sc); + /* Tell firmware we're alive. */ BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); @@ -4638,6 +4759,9 @@ bge_stop(struct bge_softc *sc) BGE_CLRBIT(sc, BGE_BMAN_MODE, BGE_BMANMODE_ENABLE); BGE_CLRBIT(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); } + /* Update MAC statistics. */ + if (BGE_IS_5705_PLUS(sc)) + bge_stats_update_regs(sc); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_STOP); @@ -4836,17 +4960,11 @@ bge_link_upd(struct bge_softc *sc) BGE_MACSTAT_LINK_CHANGED); } -#define BGE_SYSCTL_STAT(sc, ctx, desc, parent, node, oid) \ - SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, oid, CTLTYPE_UINT|CTLFLAG_RD, \ - sc, offsetof(struct bge_stats, node), bge_sysctl_stats, "IU", \ - desc) - static void bge_add_sysctls(struct bge_softc *sc) { struct sysctl_ctx_list *ctx; - struct sysctl_oid_list *children, *schildren; - struct sysctl_oid *tree; + struct sysctl_oid_list *children; char tn[32]; int unit; @@ -4908,9 +5026,24 @@ bge_add_sysctls(struct bge_softc *sc) "generate UDP checksum value 0"); if (BGE_IS_5705_PLUS(sc)) - return; + bge_add_sysctl_stats_regs(sc, ctx, children); + else + bge_add_sysctl_stats(sc, ctx, children); +} + +#define BGE_SYSCTL_STAT(sc, ctx, desc, parent, node, oid) \ + SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, oid, CTLTYPE_UINT|CTLFLAG_RD, \ + sc, offsetof(struct bge_stats, node), bge_sysctl_stats, "IU", \ + desc) - tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, +static void +bge_add_sysctl_stats(struct bge_softc *sc, struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *parent) +{ + struct sysctl_oid *tree; + struct sysctl_oid_list *children, *schildren; + + tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schildren = children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Frames Dropped Due To Filters", @@ -4949,11 +5082,11 @@ bge_add_sysctls(struct bge_softc *sc) NULL, "BGE RX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Inbound Octets", - children, rxstats.ifHCInOctets, "Octets"); + children, rxstats.ifHCInOctets, "ifHCInOctets"); BGE_SYSCTL_STAT(sc, ctx, "Fragments", children, rxstats.etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Unicast Packets", - children, rxstats.ifHCInUcastPkts, "UcastPkts"); + children, rxstats.ifHCInUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Multicast Packets", children, rxstats.ifHCInMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "FCS Errors", @@ -4985,7 +5118,7 @@ bge_add_sysctls(struct bge_softc *sc) NULL, "BGE TX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Outbound Octets", - children, txstats.ifHCOutOctets, "Octets"); + children, txstats.ifHCOutOctets, "ifHCOutOctets"); BGE_SYSCTL_STAT(sc, ctx, "TX Collisions", children, txstats.etherStatsCollisions, "Collisions"); BGE_SYSCTL_STAT(sc, ctx, "XON Sent", @@ -5013,7 +5146,7 @@ bge_add_sysctls(struct bge_softc *sc) children, txstats.dot3StatsLateCollisions, "LateCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Unicast Packets", - children, txstats.ifHCOutUcastPkts, "UcastPkts"); + children, txstats.ifHCOutUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Multicast Packets", children, txstats.ifHCOutMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Broadcast Packets", @@ -5027,6 +5160,106 @@ bge_add_sysctls(struct bge_softc *sc) children, txstats.ifOutErrors, "Errors"); } +#undef BGE_SYSCTL_STAT + +#define BGE_SYSCTL_STAT_ADD64(c, h, n, p, d) \ + SYSCTL_ADD_QUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) + +static void +bge_add_sysctl_stats_regs(struct bge_softc *sc, struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *parent) +{ + struct sysctl_oid *tree; + struct sysctl_oid_list *child, *schild; + struct bge_mac_stats *stats; + + stats = &sc->bge_mac_stats; + tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, + NULL, "BGE Statistics"); + schild = child = SYSCTL_CHILDREN(tree); + BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesDroppedDueToFilters", + &stats->FramesDroppedDueToFilters, "Frames Dropped Due to Filters"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteQueueFull", + &stats->DmaWriteQueueFull, "NIC DMA Write Queue Full"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteHighPriQueueFull", + &stats->DmaWriteHighPriQueueFull, + "NIC DMA Write High Priority Queue Full"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "NoMoreRxBDs", + &stats->NoMoreRxBDs, "NIC No More RX Buffer Descriptors"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "InputDiscards", + &stats->InputDiscards, "Discarded Input Frames"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "InputErrors", + &stats->InputErrors, "Input Errors"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "RecvThresholdHit", + &stats->RecvThresholdHit, "NIC Recv Threshold Hit"); + + tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, + NULL, "BGE RX Statistics"); + child = SYSCTL_CHILDREN(tree); + BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCInOctets", + &stats->ifHCInOctets, "Inbound Octets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "Fragments", + &stats->etherStatsFragments, "Fragments"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", + &stats->ifHCInUcastPkts, "Inbound Unicast Packets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", + &stats->ifHCInMulticastPkts, "Inbound Multicast Packets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", + &stats->ifHCInBroadcastPkts, "Inbound Broadcast Packets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "FCSErrors", + &stats->dot3StatsFCSErrors, "FCS Errors"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "AlignmentErrors", + &stats->dot3StatsAlignmentErrors, "Alignment Errors"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "xonPauseFramesReceived", + &stats->xonPauseFramesReceived, "XON Pause Frames Received"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffPauseFramesReceived", + &stats->xoffPauseFramesReceived, "XOFF Pause Frames Received"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "ControlFramesReceived", + &stats->macControlFramesReceived, "MAC Control Frames Received"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffStateEntered", + &stats->xoffStateEntered, "XOFF State Entered"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesTooLong", + &stats->dot3StatsFramesTooLong, "Frames Too Long"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "Jabbers", + &stats->etherStatsJabbers, "Jabbers"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "UndersizePkts", + &stats->etherStatsUndersizePkts, "Undersized Packets"); + + tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, + NULL, "BGE TX Statistics"); + child = SYSCTL_CHILDREN(tree); + BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCOutOctets", + &stats->ifHCOutOctets, "Outbound Octets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "Collisions", + &stats->etherStatsCollisions, "TX Collisions"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "XonSent", + &stats->outXonSent, "XON Sent"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "XoffSent", + &stats->outXoffSent, "XOFF Sent"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "InternalMacTransmitErrors", + &stats->dot3StatsInternalMacTransmitErrors, + "Internal MAC TX Errors"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "SingleCollisionFrames", + &stats->dot3StatsSingleCollisionFrames, "Single Collision Frames"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "MultipleCollisionFrames", + &stats->dot3StatsMultipleCollisionFrames, + "Multiple Collision Frames"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "DeferredTransmissions", + &stats->dot3StatsDeferredTransmissions, "Deferred Transmissions"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "ExcessiveCollisions", + &stats->dot3StatsExcessiveCollisions, "Excessive Collisions"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "LateCollisions", + &stats->dot3StatsLateCollisions, "Late Collisions"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", + &stats->ifHCOutUcastPkts, "Outbound Unicast Packets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", + &stats->ifHCOutMulticastPkts, "Outbound Multicast Packets"); + BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", + &stats->ifHCOutBroadcastPkts, "Outbound Broadcast Packets"); +} + +#undef BGE_SYSCTL_STAT_ADD64 + static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS) { Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 22:57:20 2010 (r214698) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 22:59:45 2010 (r214699) @@ -637,7 +637,53 @@ #define BGE_SERDES_STS 0x0594 #define BGE_SGDIG_CFG 0x05B0 #define BGE_SGDIG_STS 0x05B4 -#define BGE_MAC_STATS 0x0800 +#define BGE_TX_MAC_STATS_OCTETS 0x0800 +#define BGE_TX_MAC_STATS_RESERVE_0 0x0804 +#define BGE_TX_MAC_STATS_COLLS 0x0808 +#define BGE_TX_MAC_STATS_XON_SENT 0x080C +#define BGE_TX_MAC_STATS_XOFF_SENT 0x0810 +#define BGE_TX_MAC_STATS_RESERVE_1 0x0814 +#define BGE_TX_MAC_STATS_ERRORS 0x0818 +#define BGE_TX_MAC_STATS_SINGLE_COLL 0x081C +#define BGE_TX_MAC_STATS_MULTI_COLL 0x0820 +#define BGE_TX_MAC_STATS_DEFERRED 0x0824 +#define BGE_TX_MAC_STATS_RESERVE_2 0x0828 +#define BGE_TX_MAC_STATS_EXCESS_COLL 0x082C +#define BGE_TX_MAC_STATS_LATE_COLL 0x0830 +#define BGE_TX_MAC_STATS_RESERVE_3 0x0834 +#define BGE_TX_MAC_STATS_RESERVE_4 0x0838 +#define BGE_TX_MAC_STATS_RESERVE_5 0x083C +#define BGE_TX_MAC_STATS_RESERVE_6 0x0840 +#define BGE_TX_MAC_STATS_RESERVE_7 0x0844 +#define BGE_TX_MAC_STATS_RESERVE_8 0x0848 +#define BGE_TX_MAC_STATS_RESERVE_9 0x084C +#define BGE_TX_MAC_STATS_RESERVE_10 0x0850 +#define BGE_TX_MAC_STATS_RESERVE_11 0x0854 +#define BGE_TX_MAC_STATS_RESERVE_12 0x0858 +#define BGE_TX_MAC_STATS_RESERVE_13 0x085C +#define BGE_TX_MAC_STATS_RESERVE_14 0x0860 +#define BGE_TX_MAC_STATS_RESERVE_15 0x0864 +#define BGE_TX_MAC_STATS_RESERVE_16 0x0868 +#define BGE_TX_MAC_STATS_UCAST 0x086C +#define BGE_TX_MAC_STATS_MCAST 0x0870 +#define BGE_TX_MAC_STATS_BCAST 0x0874 +#define BGE_TX_MAC_STATS_RESERVE_17 0x0878 +#define BGE_TX_MAC_STATS_RESERVE_18 0x087C +#define BGE_RX_MAC_STATS_OCTESTS 0x0880 +#define BGE_RX_MAC_STATS_RESERVE_0 0x0884 +#define BGE_RX_MAC_STATS_FRAGMENTS 0x0888 +#define BGE_RX_MAC_STATS_UCAST 0x088C +#define BGE_RX_MAC_STATS_MCAST 0x0890 +#define BGE_RX_MAC_STATS_BCAST 0x0894 +#define BGE_RX_MAC_STATS_FCS_ERRORS 0x0898 +#define BGE_RX_MAC_STATS_ALGIN_ERRORS 0x089C +#define BGE_RX_MAC_STATS_XON_RCVD 0x08A0 +#define BGE_RX_MAC_STATS_XOFF_RCVD 0x08A4 +#define BGE_RX_MAC_STATS_CTRL_RCVD 0x08A8 +#define BGE_RX_MAC_STATS_XOFF_ENTERED 0x08AC +#define BGE_RX_MAC_STATS_FRAME_TOO_LONG 0x08B0 +#define BGE_RX_MAC_STATS_JABBERS 0x08B4 +#define BGE_RX_MAC_STATS_UNDERSIZE 0x08B8 /* Ethernet MAC Mode register */ #define BGE_MACMODE_RESET 0x00000001 @@ -2349,40 +2395,50 @@ struct bge_tx_mac_stats { }; /* Stats counters access through registers */ -struct bge_mac_stats_regs { - uint32_t ifHCOutOctets; - uint32_t Reserved0; - uint32_t etherStatsCollisions; - uint32_t outXonSent; - uint32_t outXoffSent; - uint32_t Reserved1; - uint32_t dot3StatsInternalMacTransmitErrors; - uint32_t dot3StatsSingleCollisionFrames; - uint32_t dot3StatsMultipleCollisionFrames; - uint32_t dot3StatsDeferredTransmissions; - uint32_t Reserved2; - uint32_t dot3StatsExcessiveCollisions; - uint32_t dot3StatsLateCollisions; - uint32_t Reserved3[14]; - uint32_t ifHCOutUcastPkts; - uint32_t ifHCOutMulticastPkts; - uint32_t ifHCOutBroadcastPkts; - uint32_t Reserved4[2]; - uint32_t ifHCInOctets; - uint32_t Reserved5; - uint32_t etherStatsFragments; - uint32_t ifHCInUcastPkts; - uint32_t ifHCInMulticastPkts; - uint32_t ifHCInBroadcastPkts; - uint32_t dot3StatsFCSErrors; - uint32_t dot3StatsAlignmentErrors; - uint32_t xonPauseFramesReceived; - uint32_t xoffPauseFramesReceived; - uint32_t macControlFramesReceived; - uint32_t xoffStateEntered; - uint32_t dot3StatsFramesTooLong; - uint32_t etherStatsJabbers; - uint32_t etherStatsUndersizePkts; +struct bge_mac_stats { + /* TX MAC statistics */ + uint64_t ifHCOutOctets; + uint64_t Reserved0; + uint64_t etherStatsCollisions; + uint64_t outXonSent; + uint64_t outXoffSent; + uint64_t Reserved1; + uint64_t dot3StatsInternalMacTransmitErrors; + uint64_t dot3StatsSingleCollisionFrames; + uint64_t dot3StatsMultipleCollisionFrames; + uint64_t dot3StatsDeferredTransmissions; + uint64_t Reserved2; + uint64_t dot3StatsExcessiveCollisions; + uint64_t dot3StatsLateCollisions; + uint64_t Reserved3[14]; + uint64_t ifHCOutUcastPkts; + uint64_t ifHCOutMulticastPkts; + uint64_t ifHCOutBroadcastPkts; + uint64_t Reserved4[2]; + /* RX MAC statistics */ + uint64_t ifHCInOctets; + uint64_t Reserved5; + uint64_t etherStatsFragments; + uint64_t ifHCInUcastPkts; + uint64_t ifHCInMulticastPkts; + uint64_t ifHCInBroadcastPkts; + uint64_t dot3StatsFCSErrors; + uint64_t dot3StatsAlignmentErrors; + uint64_t xonPauseFramesReceived; + uint64_t xoffPauseFramesReceived; + uint64_t macControlFramesReceived; + uint64_t xoffStateEntered; + uint64_t dot3StatsFramesTooLong; + uint64_t etherStatsJabbers; + uint64_t etherStatsUndersizePkts; + /* Receive List Placement control */ + uint64_t FramesDroppedDueToFilters; + uint64_t DmaWriteQueueFull; + uint64_t DmaWriteHighPriQueueFull; + uint64_t NoMoreRxBDs; + uint64_t InputDiscards; + uint64_t InputErrors; + uint64_t RecvThresholdHit; }; struct bge_stats { @@ -2660,6 +2716,7 @@ struct bge_softc { #ifdef DEVICE_POLLING int rxcycles; #endif /* DEVICE_POLLING */ + struct bge_mac_stats bge_mac_stats; struct task bge_intr_task; struct taskqueue *bge_tq; }; From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:07:39 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 05E48106564A; Tue, 2 Nov 2010 23:07:39 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id E76858FC19; Tue, 2 Nov 2010 23:07:38 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2N7ct4096586; Tue, 2 Nov 2010 23:07:38 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2N7cJR096583; Tue, 2 Nov 2010 23:07:38 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022307.oA2N7cJR096583@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:07:38 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214701 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:07:39 -0000 Author: yongari Date: Tue Nov 2 23:07:38 2010 New Revision: 214701 URL: http://svn.freebsd.org/changeset/base/214701 Log: MFC r213316,213333-213334: r213316: Fix IFCAP_TXCSUM/IFCAP_RXCSUM handling. Previously bge(4) used IFCAP_HWCSUM to know which capability should be changed such that disabling RX checksun offloading resulted in disabling TX checksum offloading. r213333: Allow write DMA to request larger DMA burst size to get better performance on BCM5785. Obtained from: OpenBSD r213334: Remove extra semicolon. Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:04:23 2010 (r214700) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:07:38 2010 (r214701) @@ -1052,7 +1052,7 @@ bge_init_rx_ring_std(struct bge_softc *s if ((error = bge_newbuf_std(sc, i)) != 0) return (error); BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); - }; + } bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); @@ -1095,7 +1095,7 @@ bge_init_rx_ring_jumbo(struct bge_softc if ((error = bge_newbuf_jumbo(sc, i)) != 0) return (error); BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); - }; + } bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); @@ -1834,6 +1834,10 @@ bge_blockinit(struct bge_softc *sc) if (BGE_IS_5755_PLUS(sc)) val |= BGE_WDMAMODE_STATUS_TAG_FIX; + /* Request larger DMA burst size to get better performance. */ + if (sc->bge_asicrev == BGE_ASICREV_BCM5785) + val |= BGE_WDMAMODE_BURST_ALL_DATA; + /* Turn on write DMA state machine */ CSR_WRITE_4(sc, BGE_WDMA_MODE, val); DELAY(40); @@ -4628,15 +4632,19 @@ bge_ioctl(struct ifnet *ifp, u_long comm } } #endif - if (mask & IFCAP_HWCSUM) { - ifp->if_capenable ^= IFCAP_HWCSUM; - if (IFCAP_HWCSUM & ifp->if_capenable && - IFCAP_HWCSUM & ifp->if_capabilities) + if ((mask & IFCAP_TXCSUM) != 0 && + (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { + ifp->if_capenable ^= IFCAP_TXCSUM; + if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= sc->bge_csum_features; else ifp->if_hwassist &= ~sc->bge_csum_features; } + if ((mask & IFCAP_RXCSUM) != 0 && + (ifp->if_capabilities & IFCAP_RXCSUM) != 0) + ifp->if_capenable ^= IFCAP_RXCSUM; + if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:04:23 2010 (r214700) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:07:38 2010 (r214701) @@ -1481,6 +1481,7 @@ #define BGE_WDMAMODE_LOCREAD_TOOBIG 0x00000200 #define BGE_WDMAMODE_ALL_ATTNS 0x000003FC #define BGE_WDMAMODE_STATUS_TAG_FIX 0x20000000 +#define BGE_WDMAMODE_BURST_ALL_DATA 0xC0000000 /* Write DMA status register */ #define BGE_WDMASTAT_PCI_TGT_ABRT_ATTN 0x00000004 From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:26:07 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 9434C106564A; Tue, 2 Nov 2010 23:26:07 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 812878FC12; Tue, 2 Nov 2010 23:26:07 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2NQ7rZ098061; Tue, 2 Nov 2010 23:26:07 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2NQ7DU098057; Tue, 2 Nov 2010 23:26:07 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022326.oA2NQ7DU098057@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:26:07 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214703 - in stable/7/sys/dev: bge mii X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:26:07 -0000 Author: yongari Date: Tue Nov 2 23:26:07 2010 New Revision: 214703 URL: http://svn.freebsd.org/changeset/base/214703 Log: MFC r213411,213464-213465,213468: r213411: Enable fix for read DMA FIFO overruns on controllers that have this fix. Note, we still need workaround for controllers that lacks this fix and it needs more work in RX BD updating. Submitted by: davidch r213464: Separate common flags into controller specific and PHY related flags. There should be no functional changes. This change will make it easy to add more quirk/flags in future. Reviewed by: davidch r213465: Rearrange code a bit to correctly set PHY flags. This change make it easy to add more newer ASICs. Obtained from: OpenBSD r213468: Fix bge(4) build breakage when BGE_REGISTER_DEBUG is defined. Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h stable/7/sys/dev/mii/brgphy.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:23:48 2010 (r214702) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:26:07 2010 (r214703) @@ -1858,6 +1858,19 @@ bge_blockinit(struct bge_softc *sc) sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_TSO6_ENABLE; } + if (sc->bge_asicrev == BGE_ASICREV_BCM5761 || + sc->bge_asicrev == BGE_ASICREV_BCM5784 || + sc->bge_asicrev == BGE_ASICREV_BCM5785 || + sc->bge_asicrev == BGE_ASICREV_BCM57780) { + /* + * Enable fix for read DMA FIFO overruns. + * The fix is to limit the number of RX BDs + * the hardware would fetch at a fime. + */ + CSR_WRITE_4(sc, BGE_RDMA_RSRVCTRL, + CSR_READ_4(sc, BGE_RDMA_RSRVCTRL) | + BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX); + } CSR_WRITE_4(sc, BGE_RDMA_MODE, val); DELAY(40); @@ -2497,7 +2510,7 @@ bge_attach(device_t dev) sc->bge_asicrev != BGE_ASICREV_BCM5906 && sc->bge_chipid != BGE_CHIPID_BCM5705_A0 && sc->bge_chipid != BGE_CHIPID_BCM5705_A1) - sc->bge_flags |= BGE_FLAG_WIRESPEED; + sc->bge_phy_flags |= BGE_PHY_WIRESPEED; if (bge_has_eaddr(sc)) sc->bge_flags |= BGE_FLAG_EADDR; @@ -2534,30 +2547,32 @@ bge_attach(device_t dev) break; } - /* Set various bug flags. */ + /* Set various PHY bug flags. */ if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 || sc->bge_chipid == BGE_CHIPID_BCM5701_B0) - sc->bge_flags |= BGE_FLAG_CRC_BUG; + sc->bge_phy_flags |= BGE_PHY_CRC_BUG; if (sc->bge_chiprev == BGE_CHIPREV_5703_AX || sc->bge_chiprev == BGE_CHIPREV_5704_AX) - sc->bge_flags |= BGE_FLAG_ADC_BUG; + sc->bge_phy_flags |= BGE_PHY_ADC_BUG; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0) - sc->bge_flags |= BGE_FLAG_5704_A0_BUG; + sc->bge_phy_flags |= BGE_PHY_5704_A0_BUG; if (pci_get_subvendor(dev) == DELL_VENDORID) - sc->bge_flags |= BGE_FLAG_NO_3LED; - if (pci_get_device(dev) == BCOM_DEVICEID_BCM5755M) - sc->bge_flags |= BGE_FLAG_ADJUST_TRIM; - if (BGE_IS_5705_PLUS(sc) && - !(sc->bge_flags & BGE_FLAG_ADJUST_TRIM)) { + sc->bge_phy_flags |= BGE_PHY_NO_3LED; + if ((BGE_IS_5705_PLUS(sc)) && + sc->bge_asicrev != BGE_ASICREV_BCM5906 && + sc->bge_asicrev != BGE_ASICREV_BCM5785 && + sc->bge_asicrev != BGE_ASICREV_BCM57780) { if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5787) { if (pci_get_device(dev) != BCOM_DEVICEID_BCM5722 && pci_get_device(dev) != BCOM_DEVICEID_BCM5756) - sc->bge_flags |= BGE_FLAG_JITTER_BUG; - } else if (sc->bge_asicrev != BGE_ASICREV_BCM5906) - sc->bge_flags |= BGE_FLAG_BER_BUG; + sc->bge_phy_flags |= BGE_PHY_JITTER_BUG; + if (pci_get_device(dev) == BCOM_DEVICEID_BCM5755M) + sc->bge_phy_flags |= BGE_PHY_ADJUST_TRIM; + } else + sc->bge_phy_flags |= BGE_PHY_BER_BUG; } /* @@ -5338,7 +5353,7 @@ bge_sysctl_debug_info(SYSCTL_HANDLER_ARG printf(" - PCI-X Bus\n"); if (sc->bge_flags & BGE_FLAG_PCIE) printf(" - PCI Express Bus\n"); - if (sc->bge_flags & BGE_FLAG_NO_3LED) + if (sc->bge_phy_flags & BGE_PHY_NO_3LED) printf(" - No 3 LEDs\n"); if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) printf(" - RX Alignment Bug\n"); Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:23:48 2010 (r214702) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:26:07 2010 (r214703) @@ -1431,6 +1431,7 @@ */ #define BGE_RDMA_MODE 0x4800 #define BGE_RDMA_STATUS 0x4804 +#define BGE_RDMA_RSRVCTRL 0x4900 /* Read DMA mode register */ #define BGE_RDMAMODE_RESET 0x00000001 @@ -1462,6 +1463,9 @@ #define BGE_RDMASTAT_PCI_FIFOOREAD_ATTN 0x00000100 #define BGE_RDMASTAT_LOCWRITE_TOOBIG 0x00000200 +/* Read DMA Reserved Control register */ +#define BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX 0x00000004 + /* * Write DMA control registers */ @@ -2659,29 +2663,30 @@ struct bge_softc { uint32_t bge_flags; #define BGE_FLAG_TBI 0x00000001 #define BGE_FLAG_JUMBO 0x00000002 -#define BGE_FLAG_WIRESPEED 0x00000004 #define BGE_FLAG_EADDR 0x00000008 #define BGE_FLAG_MII_SERDES 0x00000010 #define BGE_FLAG_MSI 0x00000100 #define BGE_FLAG_PCIX 0x00000200 #define BGE_FLAG_PCIE 0x00000400 #define BGE_FLAG_TSO 0x00000800 -#define BGE_FLAG_5700_FAMILY 0x00001000 -#define BGE_FLAG_5705_PLUS 0x00002000 -#define BGE_FLAG_5714_FAMILY 0x00004000 -#define BGE_FLAG_575X_PLUS 0x00008000 -#define BGE_FLAG_5755_PLUS 0x00010000 -#define BGE_FLAG_40BIT_BUG 0x00020000 -#define BGE_FLAG_4G_BNDRY_BUG 0x00040000 -#define BGE_FLAG_RX_ALIGNBUG 0x00100000 -#define BGE_FLAG_NO_3LED 0x00200000 -#define BGE_FLAG_ADC_BUG 0x00400000 -#define BGE_FLAG_5704_A0_BUG 0x00800000 -#define BGE_FLAG_JITTER_BUG 0x01000000 -#define BGE_FLAG_BER_BUG 0x02000000 -#define BGE_FLAG_ADJUST_TRIM 0x04000000 -#define BGE_FLAG_CRC_BUG 0x08000000 -#define BGE_FLAG_5788 0x20000000 +#define BGE_FLAG_5700_FAMILY 0x00010000 +#define BGE_FLAG_5705_PLUS 0x00020000 +#define BGE_FLAG_5714_FAMILY 0x00040000 +#define BGE_FLAG_575X_PLUS 0x00080000 +#define BGE_FLAG_5755_PLUS 0x00100000 +#define BGE_FLAG_5788 0x00200000 +#define BGE_FLAG_40BIT_BUG 0x01000000 +#define BGE_FLAG_4G_BNDRY_BUG 0x02000000 +#define BGE_FLAG_RX_ALIGNBUG 0x04000000 + uint32_t bge_phy_flags; +#define BGE_PHY_WIRESPEED 0x00000001 +#define BGE_PHY_ADC_BUG 0x00000002 +#define BGE_PHY_5704_A0_BUG 0x00000004 +#define BGE_PHY_JITTER_BUG 0x00000008 +#define BGE_PHY_BER_BUG 0x00000010 +#define BGE_PHY_ADJUST_TRIM 0x00000020 +#define BGE_PHY_CRC_BUG 0x00000040 +#define BGE_PHY_NO_3LED 0x00000080 uint32_t bge_chipid; uint32_t bge_asicrev; uint32_t bge_chiprev; Modified: stable/7/sys/dev/mii/brgphy.c ============================================================================== --- stable/7/sys/dev/mii/brgphy.c Tue Nov 2 23:23:48 2010 (r214702) +++ stable/7/sys/dev/mii/brgphy.c Tue Nov 2 23:26:07 2010 (r214703) @@ -1013,26 +1013,26 @@ brgphy_reset(struct mii_softc *sc) /* Handle any bge (NetXtreme/NetLink) workarounds. */ if (bge_sc) { /* Fix up various bugs */ - if (bge_sc->bge_flags & BGE_FLAG_5704_A0_BUG) + if (bge_sc->bge_phy_flags & BGE_PHY_5704_A0_BUG) brgphy_fixup_5704_a0_bug(sc); - if (bge_sc->bge_flags & BGE_FLAG_ADC_BUG) + if (bge_sc->bge_phy_flags & BGE_PHY_ADC_BUG) brgphy_fixup_adc_bug(sc); - if (bge_sc->bge_flags & BGE_FLAG_ADJUST_TRIM) + if (bge_sc->bge_phy_flags & BGE_PHY_ADJUST_TRIM) brgphy_fixup_adjust_trim(sc); - if (bge_sc->bge_flags & BGE_FLAG_BER_BUG) + if (bge_sc->bge_phy_flags & BGE_PHY_BER_BUG) brgphy_fixup_ber_bug(sc); - if (bge_sc->bge_flags & BGE_FLAG_CRC_BUG) + if (bge_sc->bge_phy_flags & BGE_PHY_CRC_BUG) brgphy_fixup_crc_bug(sc); - if (bge_sc->bge_flags & BGE_FLAG_JITTER_BUG) + if (bge_sc->bge_phy_flags & BGE_PHY_JITTER_BUG) brgphy_fixup_jitter_bug(sc); brgphy_jumbo_settings(sc, ifp->if_mtu); - if (bge_sc->bge_flags & BGE_FLAG_WIRESPEED) + if (bge_sc->bge_phy_flags & BGE_PHY_WIRESPEED) brgphy_ethernet_wirespeed(sc); /* Enable Link LED on Dell boxes */ - if (bge_sc->bge_flags & BGE_FLAG_NO_3LED) { + if (bge_sc->bge_phy_flags & BGE_PHY_NO_3LED) { PHY_WRITE(sc, BRGPHY_MII_PHY_EXTCTL, PHY_READ(sc, BRGPHY_MII_PHY_EXTCTL) & ~BRGPHY_PHY_EXTCTL_3_LED); From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:36:55 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 0F70D106564A; Tue, 2 Nov 2010 23:36:55 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id EFECA8FC08; Tue, 2 Nov 2010 23:36:54 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2NastN098490; Tue, 2 Nov 2010 23:36:54 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2NasLX098487; Tue, 2 Nov 2010 23:36:54 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022336.oA2NasLX098487@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:36:54 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214705 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:36:55 -0000 Author: yongari Date: Tue Nov 2 23:36:54 2010 New Revision: 214705 URL: http://svn.freebsd.org/changeset/base/214705 Log: MFC r213485,213710,213812: r213485: Overhaul MII register access routine and remove unnecessary BGE_MI_MODE register accesses. Previously bge(4) used to read BGE_MI_MODE register to detect whether it needs to disable autopolling feature or not. Because we don't touch autopolling in other part of driver there is no reason to read BGE_MI_MODE register given that we know default value in advance. In order to achieve the goal, check whether the controller has CPMU(Central Power Mangement Unit) capability. If controller has CPMU feature, use 500KHz MII management interface(mdio/mdc) frequency regardless core clock frequency. Otherwise use default MII clock. While I'm here, add CPMU register definition. In bge_miibus_readreg(), rearrange code a bit and remove goto statement. In bge_miibus_writereg(), make sure to restore autopolling even if MII write failed. The delay time inserted after accessing BGE_MI_MODE register increased from 40us to 80us. The default PHY address is now stored in softc. All PHYs supported by bge(4) currently uses PHY address 1 but it will be changed when we add newer controllers. This change will make it easier to change default PHY address depending on PHY models. Submitted by: davidch r213710: Remove one last reference of BGE_MI_MODE register for auto polling. Previously bge(4) always enabled auto polling for non-BGE_FLAG_TBI controllers. With this change, auto polling is not used anymore so polling through mii(4) was introduced. Reviewed by: davidch r213812: Fix a regression introduced in r213710. r213710 removed the use of auto polling such that it made all controllers obtain link status information from the state of the LNKRDY input signal. Broadcom recommends disabling auto polling such that driver should rely on PHY interrupts for link status change indications. Unfortunately it seems some controllers(BCM5703, BCM5704 and BCM5705) have PHY related issues so Linux took other approach to workaround it. bge(4) didn't follow that and it used to enable auto polling to workaround it. Restore this old behavior for BCM5700 family controllers and BCM5705 to use auto polling. For BCM5700 and BCM5701, it seems it does not need to enable auto polling but I restored it for safety. Special thanks to marius who tried lots of patches with patience. Reported by: marius Tested by: marius Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:35:08 2010 (r214704) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:36:54 2010 (r214705) @@ -768,38 +768,34 @@ static int bge_miibus_readreg(device_t dev, int phy, int reg) { struct bge_softc *sc; - uint32_t val, autopoll; + uint32_t val; int i; sc = device_get_softc(dev); - /* - * Broadcom's own driver always assumes the internal - * PHY is at GMII address 1. On some chips, the PHY responds - * to accesses at all addresses, which could cause us to - * bogusly attach the PHY 32 times at probe type. Always - * restricting the lookup to address 1 is simpler than - * trying to figure out which chips revisions should be - * special-cased. - */ - if (phy != 1) + /* Prevent the probe from finding incorrect devices. */ + if (phy != sc->bge_phy_addr) return (0); - /* Reading with autopolling on may trigger PCI errors */ - autopoll = CSR_READ_4(sc, BGE_MI_MODE); - if (autopoll & BGE_MIMODE_AUTOPOLL) { - BGE_CLRBIT(sc, BGE_MI_MODE, BGE_MIMODE_AUTOPOLL); - DELAY(40); + /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ + if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { + CSR_WRITE_4(sc, BGE_MI_MODE, + sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); + DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg)); + /* Poll for the PHY register access to complete. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = CSR_READ_4(sc, BGE_MI_COMM); - if (!(val & BGE_MICOMM_BUSY)) + if ((val & BGE_MICOMM_BUSY) == 0) { + DELAY(5); + val = CSR_READ_4(sc, BGE_MI_COMM); break; + } } if (i == BGE_TIMEOUT) { @@ -807,16 +803,12 @@ bge_miibus_readreg(device_t dev, int phy "PHY read timed out (phy %d, reg %d, val 0x%08x)\n", phy, reg, val); val = 0; - goto done; } - DELAY(5); - val = CSR_READ_4(sc, BGE_MI_COMM); - -done: - if (autopoll & BGE_MIMODE_AUTOPOLL) { - BGE_SETBIT(sc, BGE_MI_MODE, BGE_MIMODE_AUTOPOLL); - DELAY(40); + /* Restore the autopoll bit if necessary. */ + if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { + CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); + DELAY(80); } if (val & BGE_MICOMM_READFAIL) @@ -829,7 +821,6 @@ static int bge_miibus_writereg(device_t dev, int phy, int reg, int val) { struct bge_softc *sc; - uint32_t autopoll; int i; sc = device_get_softc(dev); @@ -838,11 +829,11 @@ bge_miibus_writereg(device_t dev, int ph (reg == BRGPHY_MII_1000CTL || reg == BRGPHY_MII_AUXCTL)) return (0); - /* Reading with autopolling on may trigger PCI errors */ - autopoll = CSR_READ_4(sc, BGE_MI_MODE); - if (autopoll & BGE_MIMODE_AUTOPOLL) { - BGE_CLRBIT(sc, BGE_MI_MODE, BGE_MIMODE_AUTOPOLL); - DELAY(40); + /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ + if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { + CSR_WRITE_4(sc, BGE_MI_MODE, + sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); + DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY | @@ -857,17 +848,16 @@ bge_miibus_writereg(device_t dev, int ph } } - if (i == BGE_TIMEOUT) { + /* Restore the autopoll bit if necessary. */ + if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { + CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); + DELAY(80); + } + + if (i == BGE_TIMEOUT) device_printf(sc->bge_dev, "PHY write timed out (phy %d, reg %d, val %d)\n", phy, reg, val); - return (0); - } - - if (autopoll & BGE_MIMODE_AUTOPOLL) { - BGE_SETBIT(sc, BGE_MI_MODE, BGE_MIMODE_AUTOPOLL); - DELAY(40); - } return (0); } @@ -1918,11 +1908,17 @@ bge_blockinit(struct bge_softc *sc) BGE_MACSTAT_LINK_CHANGED); CSR_WRITE_4(sc, BGE_MI_STS, 0); - /* Enable PHY auto polling (for MII/GMII only) */ + /* + * Enable attention when the link has changed state for + * devices that use auto polling. + */ if (sc->bge_flags & BGE_FLAG_TBI) { CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK); } else { - BGE_SETBIT(sc, BGE_MI_MODE, BGE_MIMODE_AUTOPOLL | (10 << 16)); + if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) { + CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); + DELAY(80); + } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, @@ -2502,6 +2498,9 @@ bge_attach(device_t dev) sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid); sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid); + /* Set default PHY address. */ + sc->bge_phy_addr = 1; + /* * Don't enable Ethernet@WireSpeed for the 5700, 5906, or the * 5705 A0 and A1 chips. @@ -2575,6 +2574,20 @@ bge_attach(device_t dev) sc->bge_phy_flags |= BGE_PHY_BER_BUG; } + /* Identify the chips that use an CPMU. */ + if (sc->bge_asicrev == BGE_ASICREV_BCM5784 || + sc->bge_asicrev == BGE_ASICREV_BCM5761 || + sc->bge_asicrev == BGE_ASICREV_BCM5785 || + sc->bge_asicrev == BGE_ASICREV_BCM57780) + sc->bge_flags |= BGE_FLAG_CPMU_PRESENT; + if ((sc->bge_flags & BGE_FLAG_CPMU_PRESENT) != 0) + sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST; + else + sc->bge_mi_mode = BGE_MIMODE_BASE; + /* Enable auto polling for BCM570[0-5]. */ + if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705) + sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL; + /* * All controllers that are not 5755 or higher have 4GB * boundary DMA bug. @@ -4944,7 +4957,7 @@ bge_link_upd(struct bge_softc *sc) if_printf(sc->bge_ifp, "link DOWN\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_DOWN); } - } else if (CSR_READ_4(sc, BGE_MI_MODE) & BGE_MIMODE_AUTOPOLL) { + } else if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { /* * Some broken BCM chips have BGE_STATFLAG_LINKSTATE_CHANGED bit * in status word always set. Workaround this bug by reading @@ -4972,9 +4985,17 @@ bge_link_upd(struct bge_softc *sc) } } else { /* - * Discard link events for MII/GMII controllers - * if MI auto-polling is disabled. + * For controllers that call mii_tick, we have to poll + * link status. */ + mii = device_get_softc(sc->bge_miibus); + mii_pollstat(mii); + if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && + IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { + bge_miibus_statchg(sc->bge_dev); + sc->bge_link = 1; + } else + sc->bge_link = 0; } /* Clear the attention. */ Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:35:08 2010 (r214704) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:36:54 2010 (r214705) @@ -863,9 +863,12 @@ #define BGE_MISTS_LINK 0x00000001 #define BGE_MISTS_10MBPS 0x00000002 +#define BGE_MIMODE_CLK_10MHZ 0x00000001 #define BGE_MIMODE_SHORTPREAMBLE 0x00000002 #define BGE_MIMODE_AUTOPOLL 0x00000010 #define BGE_MIMODE_CLKCNT 0x001F0000 +#define BGE_MIMODE_500KHZ_CONST 0x00008000 +#define BGE_MIMODE_BASE 0x000C0000 /* @@ -1221,6 +1224,51 @@ /* Receive List Selector Status register */ #define BGE_RXLSSTAT_ERROR 0x00000004 +#define BGE_CPMU_CTRL 0x3600 +#define BGE_CPMU_LSPD_10MB_CLK 0x3604 +#define BGE_CPMU_LSPD_1000MB_CLK 0x360C +#define BGE_CPMU_LNK_AWARE_PWRMD 0x3610 +#define BGE_CPMU_HST_ACC 0x361C +#define BGE_CPMU_CLCK_STAT 0x3630 +#define BGE_CPMU_MUTEX_REQ 0x365C +#define BGE_CPMU_MUTEX_GNT 0x3660 +#define BGE_CPMU_PHY_STRAP 0x3664 + +/* Central Power Management Unit (CPMU) register */ +#define BGE_CPMU_CTRL_LINK_IDLE_MODE 0x00000200 +#define BGE_CPMU_CTRL_LINK_AWARE_MODE 0x00000400 +#define BGE_CPMU_CTRL_LINK_SPEED_MODE 0x00004000 +#define BGE_CPMU_CTRL_GPHY_10MB_RXONLY 0x00010000 + +/* Link Speed 10MB/No Link Power Mode Clock Policy register */ +#define BGE_CPMU_LSPD_10MB_MACCLK_MASK 0x001F0000 +#define BGE_CPMU_LSPD_10MB_MACCLK_6_25 0x00130000 + +/* Link Speed 1000MB Power Mode Clock Policy register */ +#define BGE_CPMU_LSPD_1000MB_MACCLK_62_5 0x00000000 +#define BGE_CPMU_LSPD_1000MB_MACCLK_12_5 0x00110000 +#define BGE_CPMU_LSPD_1000MB_MACCLK_MASK 0x001F0000 + +/* Link Aware Power Mode Clock Policy register */ +#define BGE_CPMU_LNK_AWARE_MACCLK_MASK 0x001F0000 +#define BGE_CPMU_LNK_AWARE_MACCLK_6_25 0x00130000 + +#define BGE_CPMU_HST_ACC_MACCLK_MASK 0x001F0000 +#define BGE_CPMU_HST_ACC_MACCLK_6_25 0x00130000 + +/* CPMU Clock Status register */ +#define BGE_CPMU_CLCK_STAT_MAC_CLCK_MASK 0x001F0000 +#define BGE_CPMU_CLCK_STAT_MAC_CLCK_62_5 0x00000000 +#define BGE_CPMU_CLCK_STAT_MAC_CLCK_12_5 0x00110000 +#define BGE_CPMU_CLCK_STAT_MAC_CLCK_6_25 0x00130000 + +/* CPMU Mutex Request register */ +#define BGE_CPMU_MUTEX_REQ_DRIVER 0x00001000 +#define BGE_CPMU_MUTEX_GNT_DRIVER 0x00001000 + +/* CPMU GPHY Strap register */ +#define BGE_CPMU_PHY_STRAP_IS_SERDES 0x00000020 + /* * Mbuf Cluster Free registers (has nothing to do with BSD mbufs) */ @@ -2665,6 +2713,7 @@ struct bge_softc { #define BGE_FLAG_JUMBO 0x00000002 #define BGE_FLAG_EADDR 0x00000008 #define BGE_FLAG_MII_SERDES 0x00000010 +#define BGE_FLAG_CPMU_PRESENT 0x00000020 #define BGE_FLAG_MSI 0x00000100 #define BGE_FLAG_PCIX 0x00000200 #define BGE_FLAG_PCIE 0x00000400 @@ -2707,7 +2756,9 @@ struct bge_softc { uint32_t bge_rx_max_coal_bds; uint32_t bge_tx_max_coal_bds; uint32_t bge_tx_buf_ratio; + uint32_t bge_mi_mode; int bge_if_flags; + int bge_phy_addr; int bge_txcnt; int bge_link; /* link state */ int bge_link_evt; /* pending link event */ From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:43:27 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B34FF1065697; Tue, 2 Nov 2010 23:43:27 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 9F9CC8FC08; Tue, 2 Nov 2010 23:43:27 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2NhRdI098761; Tue, 2 Nov 2010 23:43:27 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2NhRtJ098759; Tue, 2 Nov 2010 23:43:27 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022343.oA2NhRtJ098759@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:43:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214707 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:43:27 -0000 Author: yongari Date: Tue Nov 2 23:43:27 2010 New Revision: 214707 URL: http://svn.freebsd.org/changeset/base/214707 Log: MFC r213495,213742: r213495: Add more comments to rings supported by the controller. Different versions of controller support different number of ring control blocks such that adjust code a bit to access known number of send/receive ring control blocks. Previously bge(4) blindly accessed 16 send/receive RCBs. Also move initializing standard receive producer ring producer index, jumbo receive producer ring producer index and mini receive producer ring producer index to the end of each receive producer ring initialization. Do not assume mini receive producer ring is available only when controller has jumbo frame capability, instead explicitly check ASIC version BCM5700 to disable mini receive producer ring. Additionally always enable send ring 0 regardless of controller versions. Previously bge(4) didn't enable send ring 0 if controller is BGE_IS_5705_PLUS. Becase bge(4) need 1 send ring to send frames at least, I have no idea how it would have worked so far. Submitted by: davidch r213742: Fix a regression introduced in r213495. r213495 disabled mini receive producer ring only for BCM5700. It was believed that BCM5700 with external SSRAM is the only controller that supports mini ring but it seems all BCM570[0-4] requires to disable mini receive producer ring. Otherwise, it caused unexpected RX DMA error or watchdog timeouts. Reported by: marius, Steve Kargl troutmask dot apl dot washington dot edu> Tested by: marius, Steve Kargl troutmask dot apl dot washington dot edu> Modified: stable/7/sys/dev/bge/if_bge.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:41:43 2010 (r214706) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:43:27 2010 (r214707) @@ -1092,9 +1092,10 @@ bge_init_rx_ring_jumbo(struct bge_softc sc->bge_jumbo = 0; + /* Enable the jumbo receive producer ring. */ rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; - rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, - BGE_RCB_FLAG_USE_EXT_RX_BD); + rcb->bge_maxlen_flags = + BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, BGE_JUMBO_RX_RING_CNT - 1); @@ -1473,7 +1474,7 @@ bge_blockinit(struct bge_softc *sc) bus_size_t vrcb; bge_hostaddr taddr; uint32_t val; - int i; + int i, limit; /* * Initialize the memory window pointer register so that @@ -1553,7 +1554,38 @@ bge_blockinit(struct bge_softc *sc) return (ENXIO); } - /* Initialize the standard RX ring control block */ + /* + * Summary of rings supported by the controller: + * + * Standard Receive Producer Ring + * - This ring is used to feed receive buffers for "standard" + * sized frames (typically 1536 bytes) to the controller. + * + * Jumbo Receive Producer Ring + * - This ring is used to feed receive buffers for jumbo sized + * frames (i.e. anything bigger than the "standard" frames) + * to the controller. + * + * Mini Receive Producer Ring + * - This ring is used to feed receive buffers for "mini" + * sized frames to the controller. + * - This feature required external memory for the controller + * but was never used in a production system. Should always + * be disabled. + * + * Receive Return Ring + * - After the controller has placed an incoming frame into a + * receive buffer that buffer is moved into a receive return + * ring. The driver is then responsible to passing the + * buffer up to the stack. Many versions of the controller + * support multiple RR rings. + * + * Send Ring + * - This ring is used for outgoing frames. Many versions of + * the controller support multiple send rings. + */ + + /* Initialize the standard receive producer ring control block. */ rcb = &sc->bge_ldata.bge_info.bge_std_rx_rcb; rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_std_ring_paddr); @@ -1561,28 +1593,45 @@ bge_blockinit(struct bge_softc *sc) BGE_ADDR_HI(sc->bge_ldata.bge_rx_std_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREREAD); - if (BGE_IS_5705_PLUS(sc)) + if (BGE_IS_5705_PLUS(sc)) { + /* + * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32) + * Bits 15-2 : Reserved (should be 0) + * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled + * Bit 0 : Reserved + */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0); - else + } else { + /* + * Ring size is always XXX entries + * Bits 31-16: Maximum RX frame size + * Bits 15-2 : Reserved (should be 0) + * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled + * Bit 0 : Reserved + */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0); + } rcb->bge_nicaddr = BGE_STD_RX_RINGS; + /* Write the standard receive producer ring control block. */ CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); - CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr); + /* Reset the standard receive producer ring producer index. */ + bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0); + /* - * Initialize the jumbo RX ring control block - * We set the 'ring disabled' bit in the flags - * field until we're actually ready to start + * Initialize the jumbo RX producer ring control + * block. We set the 'ring disabled' bit in the + * flags field until we're actually ready to start * using this ring (i.e. once we set the MTU * high enough to require it). */ if (BGE_IS_JUMBO_CAPABLE(sc)) { rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; - + /* Get the jumbo receive producer ring RCB parameters. */ rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_jumbo_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = @@ -1597,20 +1646,31 @@ bge_blockinit(struct bge_softc *sc) rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); - + /* Program the jumbo receive producer ring RCB parameters. */ CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr); + /* Reset the jumbo receive producer ring producer index. */ + bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0); + } - /* Set up dummy disabled mini ring RCB */ + /* Disable the mini receive producer ring RCB. */ + if (BGE_IS_5700_FAMILY(sc)) { rcb = &sc->bge_ldata.bge_info.bge_mini_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED); CSR_WRITE_4(sc, BGE_RX_MINI_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); + /* Reset the mini receive producer ring producer index. */ + bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } /* + * The BD ring replenish thresholds control how often the + * hardware fetches new BD's from the producer rings in host + * memory. Setting the value too low on a busy system can + * starve the hardware and recue the throughpout. + * * Set the BD ring replentish thresholds. The recommended * values are 1/8th the number of descriptors allocated to * each ring. @@ -1632,61 +1692,67 @@ bge_blockinit(struct bge_softc *sc) BGE_JUMBO_RX_RING_CNT/8); /* - * Disable all unused send rings by setting the 'ring disabled' - * bit in the flags field of all the TX send ring control blocks. - * These are located in NIC memory. - */ + * Disable all send rings by setting the 'ring disabled' bit + * in the flags field of all the TX send ring control blocks, + * located in NIC memory. + */ + if (!BGE_IS_5705_PLUS(sc)) + /* 5700 to 5704 had 16 send rings. */ + limit = BGE_TX_RINGS_EXTSSRAM_MAX; + else + limit = 1; vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; - for (i = 0; i < BGE_TX_RINGS_EXTSSRAM_MAX; i++) { + for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED)); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); vrcb += sizeof(struct bge_rcb); } - /* Configure TX RCB 0 (we use only the first ring) */ + /* Configure send ring RCB 0 (we use only the first ring) */ vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_tx_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT)); - if (!(BGE_IS_5705_PLUS(sc))) - RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, - BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0)); + RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, + BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0)); - /* Disable all unused RX return rings */ + /* + * Disable all receive return rings by setting the + * 'ring diabled' bit in the flags field of all the receive + * return ring control blocks, located in NIC memory. + */ + if (!BGE_IS_5705_PLUS(sc)) + limit = BGE_RX_RINGS_MAX; + else if (sc->bge_asicrev == BGE_ASICREV_BCM5755) + limit = 4; + else + limit = 1; + /* Disable all receive return rings. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; - for (i = 0; i < BGE_RX_RINGS_MAX; i++) { + for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, - BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, - BGE_RCB_FLAG_RING_DISABLED)); + BGE_RCB_FLAG_RING_DISABLED); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); bge_writembx(sc, BGE_MBX_RX_CONS0_LO + (i * (sizeof(uint64_t))), 0); vrcb += sizeof(struct bge_rcb); } - /* Initialize RX ring indexes */ - bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0); - if (BGE_IS_JUMBO_CAPABLE(sc)) - bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0); - if (sc->bge_asicrev == BGE_ASICREV_BCM5700) - bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); - /* - * Set up RX return ring 0 - * Note that the NIC address for RX return rings is 0x00000000. - * The return rings live entirely within the host, so the - * nicaddr field in the RCB isn't used. + * Set up receive return ring 0. Note that the NIC address + * for RX return rings is 0x0. The return rings live entirely + * within the host, so the nicaddr field in the RCB isn't used. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_rx_return_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); - RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0x00000000); + RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, 0)); From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:49:48 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id C2C11106564A; Tue, 2 Nov 2010 23:49:48 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id B035A8FC15; Tue, 2 Nov 2010 23:49:48 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2Nnmbe099173; Tue, 2 Nov 2010 23:49:48 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2NnmEx099171; Tue, 2 Nov 2010 23:49:48 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022349.oA2NnmEx099171@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:49:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214711 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:49:48 -0000 Author: yongari Date: Tue Nov 2 23:49:48 2010 New Revision: 214711 URL: http://svn.freebsd.org/changeset/base/214711 Log: MFC r213522,213587,213711: r213522: Fix a long standing bug which regarded some revisions of controller as 5788. This caused BGE_MISC_LOCAL_CTL register is used to generate link state change interrupt for non-5788 controllers. The interrupt handler may or may not detect link state attention as status block wouldn't be updated when an interrupt was generated with BGE_MISC_LOCAL_CTL register. All controllers except 5700 and 5788 should use host coalescing mode register to trigger an interrupt. r213587: Do not blindly UP the interface when interface's MTU is changed. If driver is not running there is no need to up the interface. While I'm here hold driver lock before modifying MTU as it is referenced in RX handler. r213711: The IFF_DRV_RUNNING flag is set at the end of bge_init_locked. But before setting the flag, interrupt was already enabled such that interrupt handler could be run before setting IFF_DRV_RUNNING flag. This can lose initial link state change interrupt which in turn make bge(4) think that it still does not have valid link. Fix this race by protecting the taskqueue with a driver lock. While I'm here move reenabling interrupt code after handling of link state chage. Reviewed by: davidch Modified: stable/7/sys/dev/bge/if_bge.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:48:08 2010 (r214710) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:49:48 2010 (r214711) @@ -2665,14 +2665,12 @@ bge_attach(device_t dev) if (BGE_IS_5755_PLUS(sc) == 0) sc->bge_flags |= BGE_FLAG_4G_BNDRY_BUG; - /* - * We could possibly check for BCOM_DEVICEID_BCM5788 in bge_probe() - * but I do not know the DEVICEID for the 5788M. - */ - misccfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID; - if (misccfg == BGE_MISCCFG_BOARD_ID_5788 || - misccfg == BGE_MISCCFG_BOARD_ID_5788M) - sc->bge_flags |= BGE_FLAG_5788; + if (sc->bge_asicrev == BGE_ASICREV_BCM5705) { + misccfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID; + if (misccfg == BGE_MISCCFG_BOARD_ID_5788 || + misccfg == BGE_MISCCFG_BOARD_ID_5788M) + sc->bge_flags |= BGE_FLAG_5788; + } /* * Some controllers seem to require a special firmware to use @@ -3634,8 +3632,11 @@ bge_intr_task(void *arg, int pending) sc = (struct bge_softc *)arg; ifp = sc->bge_ifp; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + BGE_LOCK(sc); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + BGE_UNLOCK(sc); return; + } /* Get updated status block. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, @@ -3650,26 +3651,27 @@ bge_intr_task(void *arg, int pending) bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) != 0) + bge_link_upd(sc); + /* Let controller work. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); - if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) != 0) { - BGE_LOCK(sc); - bge_link_upd(sc); - BGE_UNLOCK(sc); - } - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (ifp->if_drv_flags & IFF_DRV_RUNNING && + sc->bge_rx_saved_considx != rx_prod) { /* Check RX return ring producer/consumer. */ + BGE_UNLOCK(sc); bge_rxeof(sc, rx_prod, 0); + BGE_LOCK(sc); } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - BGE_LOCK(sc); /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) bge_start_locked(ifp); - BGE_UNLOCK(sc); } + BGE_UNLOCK(sc); } static void @@ -4640,6 +4642,7 @@ bge_ioctl(struct ifnet *ifp, u_long comm switch (command) { case SIOCSIFMTU: + BGE_LOCK(sc); if (ifr->ifr_mtu < ETHERMIN || ((BGE_IS_JUMBO_CAPABLE(sc)) && ifr->ifr_mtu > BGE_JUMBO_MTU) || @@ -4648,9 +4651,12 @@ bge_ioctl(struct ifnet *ifp, u_long comm error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { ifp->if_mtu = ifr->ifr_mtu; - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - bge_init(sc); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + bge_init_locked(sc); + } } + BGE_UNLOCK(sc); break; case SIOCSIFFLAGS: BGE_LOCK(sc); From owner-svn-src-stable-7@FreeBSD.ORG Tue Nov 2 23:56:35 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7E207106566B; Tue, 2 Nov 2010 23:56:35 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 6B9A38FC17; Tue, 2 Nov 2010 23:56:35 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA2NuZuA099490; Tue, 2 Nov 2010 23:56:35 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA2NuZso099487; Tue, 2 Nov 2010 23:56:35 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011022356.oA2NuZso099487@svn.freebsd.org> From: Pyun YongHyeon Date: Tue, 2 Nov 2010 23:56:35 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214713 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2010 23:56:35 -0000 Author: yongari Date: Tue Nov 2 23:56:35 2010 New Revision: 214713 URL: http://svn.freebsd.org/changeset/base/214713 Log: MFC r213747,213808,214216: r213747: Protect bge(4) from accessing invalid NIC internal memory regions on BCM5906. Tested by: Buganini < buganini <> gmail dot com > r213808: Add more checks for resolved link speed in bge_miibus_statchg(). Link UP state could be reported first before actual completion of auto-negotiation. This change makes bge(4) reprogram BGE_MAC_MODE, BGE_TX_MODE and BGE_RX_MODE register only after controller got a valid link. r214216: Enable TX MAC state machine lockup fix for both BCM5755 or higher and BCM5906. Publicly available data sheet just says it may happen due to corrupted TxMbuf. Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:54:59 2010 (r214712) +++ stable/7/sys/dev/bge/if_bge.c Tue Nov 2 23:56:35 2010 (r214713) @@ -550,6 +550,10 @@ bge_readmem_ind(struct bge_softc *sc, in device_t dev; uint32_t val; + if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && + off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) + return (0); + dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); @@ -563,6 +567,10 @@ bge_writemem_ind(struct bge_softc *sc, i { device_t dev; + if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && + off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) + return; + dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); @@ -870,6 +878,29 @@ bge_miibus_statchg(device_t dev) sc = device_get_softc(dev); mii = device_get_softc(sc->bge_miibus); + if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == + (IFM_ACTIVE | IFM_AVALID)) { + switch (IFM_SUBTYPE(mii->mii_media_active)) { + case IFM_10_T: + case IFM_100_TX: + sc->bge_link = 1; + break; + case IFM_1000_T: + case IFM_1000_SX: + case IFM_2500_SX: + if (sc->bge_asicrev != BGE_ASICREV_BCM5906) + sc->bge_link = 1; + else + sc->bge_link = 0; + break; + default: + sc->bge_link = 0; + break; + } + } else + sc->bge_link = 0; + if (sc->bge_link == 0) + return; BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_PORTMODE); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T || IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX) @@ -4335,6 +4366,7 @@ bge_init_locked(struct bge_softc *sc) { struct ifnet *ifp; uint16_t *m; + uint32_t mode; BGE_LOCK_ASSERT(sc); @@ -4440,8 +4472,12 @@ bge_init_locked(struct bge_softc *sc) /* Init TX ring. */ bge_init_tx_ring(sc); + /* Enable TX MAC state machine lockup fix. */ + mode = CSR_READ_4(sc, BGE_TX_MODE); + if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) + mode |= BGE_TXMODE_MBUF_LOCKUP_FIX; /* Turn on transmitter. */ - BGE_SETBIT(sc, BGE_TX_MODE, BGE_TXMODE_ENABLE); + CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE); /* Turn on receiver. */ BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE); @@ -5062,12 +5098,7 @@ bge_link_upd(struct bge_softc *sc) */ mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); - if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && - IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { - bge_miibus_statchg(sc->bge_dev); - sc->bge_link = 1; - } else - sc->bge_link = 0; + bge_miibus_statchg(sc->bge_dev); } /* Clear the attention. */ Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:54:59 2010 (r214712) +++ stable/7/sys/dev/bge/if_bgereg.h Tue Nov 2 23:56:35 2010 (r214713) @@ -765,6 +765,7 @@ #define BGE_TXMODE_FLOWCTL_ENABLE 0x00000010 #define BGE_TXMODE_BIGBACKOFF_ENABLE 0x00000020 #define BGE_TXMODE_LONGPAUSE_ENABLE 0x00000040 +#define BGE_TXMODE_MBUF_LOCKUP_FIX 0x00000100 /* Transmit MAC status register */ #define BGE_TXSTAT_RX_XOFFED 0x00000001 From owner-svn-src-stable-7@FreeBSD.ORG Wed Nov 3 00:13:22 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 69DAF106564A; Wed, 3 Nov 2010 00:13:22 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 56D928FC0A; Wed, 3 Nov 2010 00:13:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA30DMSF000291; Wed, 3 Nov 2010 00:13:22 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA30DMkr000288; Wed, 3 Nov 2010 00:13:22 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011030013.oA30DMkr000288@svn.freebsd.org> From: Pyun YongHyeon Date: Wed, 3 Nov 2010 00:13:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214715 - stable/7/sys/dev/bge X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Nov 2010 00:13:22 -0000 Author: yongari Date: Wed Nov 3 00:13:22 2010 New Revision: 214715 URL: http://svn.freebsd.org/changeset/base/214715 Log: MFC r214087,214219,214251,214292: r214087: Add workaround for BCM5906 controller silicon bug. If device receive two back-to-back send BDs with less than or equal to 8 total bytes then the device may hang. The two back-to-back send BDs must be in the same frame for this failure to occur. Thanks to davidch for detailed errata information. Reviewed by: davidch r214219: Add workaround for BCM5906 A1 controller silicon bug. When auto-negotiation results in half-duplex operation, excess collision on the ethernet link may cause internal chip delays that may result in subsequent valid frames being dropped due to insufficient receive buffer resources. The workaround is to choose de-pipeline method as a flow control decision for SDI. De-pipeline method allows only 1 data in TxMbuf at a time such that a request to RDMA from SDI is made only when TxMbuf is empty. Thanks for david for providing detailed errata information. r214251: Apply the same workaround for SDI flow control used on BCM5906 A1 to BCM6906 A0/A2. This should fix a long standing BCM5906 A2 lockup issues. Data sheet explicitly mentions BCM5906 A0, A1 and A2 use de-pipelined mode on these revisions. Special thanks to Buganini who tried all combinations of experimental patches for more than 10 days. Tested by: Buganini gmail dot com > r214292: Use bge_chipid to compare controller ids. r214251 incorrectly used bge_chiprev. Reported by: Buganini gmail dot com > Modified: stable/7/sys/dev/bge/if_bge.c stable/7/sys/dev/bge/if_bgereg.h Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/bge/if_bge.c ============================================================================== --- stable/7/sys/dev/bge/if_bge.c Wed Nov 3 00:03:26 2010 (r214714) +++ stable/7/sys/dev/bge/if_bge.c Wed Nov 3 00:13:22 2010 (r214715) @@ -374,6 +374,7 @@ static void bge_tick(void *); static void bge_stats_clear_regs(struct bge_softc *); static void bge_stats_update(struct bge_softc *); static void bge_stats_update_regs(struct bge_softc *); +static struct mbuf *bge_check_short_dma(struct mbuf *); static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *, uint16_t *); static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); @@ -1696,6 +1697,14 @@ bge_blockinit(struct bge_softc *sc) bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } + /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ + if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { + if (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 || + sc->bge_chipid == BGE_CHIPID_BCM5906_A1 || + sc->bge_chipid == BGE_CHIPID_BCM5906_A2) + CSR_WRITE_4(sc, BGE_ISO_PKT_TX, + (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); + } /* * The BD ring replenish thresholds control how often the * hardware fetches new BD's from the producer rings in host @@ -2632,12 +2641,14 @@ bge_attach(device_t dev) case BGE_ASICREV_BCM5780: case BGE_ASICREV_BCM5714: sc->bge_flags |= BGE_FLAG_5714_FAMILY /* | BGE_FLAG_JUMBO */; - /* FALLTHRU */ + /* FALLTHROUGH */ case BGE_ASICREV_BCM5750: case BGE_ASICREV_BCM5752: case BGE_ASICREV_BCM5906: sc->bge_flags |= BGE_FLAG_575X_PLUS; - /* FALLTHRU */ + if (sc->bge_asicrev == BGE_ASICREV_BCM5906) + sc->bge_flags |= BGE_FLAG_SHORT_DMA_BUG; + /* FALLTHROUGH */ case BGE_ASICREV_BCM5705: sc->bge_flags |= BGE_FLAG_5705_PLUS; break; @@ -4060,6 +4071,39 @@ bge_cksum_pad(struct mbuf *m) } static struct mbuf * +bge_check_short_dma(struct mbuf *m) +{ + struct mbuf *n; + int found; + + /* + * If device receive two back-to-back send BDs with less than + * or equal to 8 total bytes then the device may hang. The two + * back-to-back send BDs must in the same frame for this failure + * to occur. Scan mbuf chains and see whether two back-to-back + * send BDs are there. If this is the case, allocate new mbuf + * and copy the frame to workaround the silicon bug. + */ + for (n = m, found = 0; n != NULL; n = n->m_next) { + if (n->m_len < 8) { + found++; + if (found > 1) + break; + continue; + } + found = 0; + } + + if (found > 1) { + n = m_defrag(m, M_DONTWAIT); + if (n == NULL) + m_freem(m); + } else + n = m; + return (n); +} + +static struct mbuf * bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss) { struct ip *ip; @@ -4132,6 +4176,13 @@ bge_encap(struct bge_softc *sc, struct m csum_flags = 0; mss = 0; vlan_tag = 0; + if ((sc->bge_flags & BGE_FLAG_SHORT_DMA_BUG) != 0 && + m->m_next != NULL) { + *m_head = bge_check_short_dma(m); + if (*m_head == NULL) + return (ENOBUFS); + m = *m_head; + } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { *m_head = m = bge_setup_tso(sc, m, &mss); if (*m_head == NULL) Modified: stable/7/sys/dev/bge/if_bgereg.h ============================================================================== --- stable/7/sys/dev/bge/if_bgereg.h Wed Nov 3 00:03:26 2010 (r214714) +++ stable/7/sys/dev/bge/if_bgereg.h Wed Nov 3 00:13:22 2010 (r214715) @@ -306,6 +306,7 @@ #define BGE_CHIPID_BCM5787_A0 0xb000 #define BGE_CHIPID_BCM5787_A1 0xb001 #define BGE_CHIPID_BCM5787_A2 0xb002 +#define BGE_CHIPID_BCM5906_A0 0xc000 #define BGE_CHIPID_BCM5906_A1 0xc001 #define BGE_CHIPID_BCM5906_A2 0xc002 #define BGE_CHIPID_BCM57780_A0 0x57780000 @@ -880,6 +881,7 @@ #define BGE_SDI_STATS_CTL 0x0C08 #define BGE_SDI_STATS_ENABLE_MASK 0x0C0C #define BGE_SDI_STATS_INCREMENT_MASK 0x0C10 +#define BGE_ISO_PKT_TX 0x0C20 #define BGE_LOCSTATS_COS0 0x0C80 #define BGE_LOCSTATS_COS1 0x0C84 #define BGE_LOCSTATS_COS2 0x0C88 @@ -2728,6 +2730,7 @@ struct bge_softc { #define BGE_FLAG_40BIT_BUG 0x01000000 #define BGE_FLAG_4G_BNDRY_BUG 0x02000000 #define BGE_FLAG_RX_ALIGNBUG 0x04000000 +#define BGE_FLAG_SHORT_DMA_BUG 0x08000000 uint32_t bge_phy_flags; #define BGE_PHY_WIRESPEED 0x00000001 #define BGE_PHY_ADC_BUG 0x00000002 From owner-svn-src-stable-7@FreeBSD.ORG Wed Nov 3 01:26:12 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 23B4C106566C; Wed, 3 Nov 2010 01:26:12 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id EAFCE8FC0C; Wed, 3 Nov 2010 01:26:11 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA31QBC6002502; Wed, 3 Nov 2010 01:26:11 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA31QB2V002500; Wed, 3 Nov 2010 01:26:11 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011030126.oA31QB2V002500@svn.freebsd.org> From: Pyun YongHyeon Date: Wed, 3 Nov 2010 01:26:11 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214718 - stable/7/sys/dev/fxp X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Nov 2010 01:26:12 -0000 Author: yongari Date: Wed Nov 3 01:26:11 2010 New Revision: 214718 URL: http://svn.freebsd.org/changeset/base/214718 Log: MFC r214302: Add TSO support over VLAN for i82550/i82551. Controller requires VLAN hardware tagging to make TSO work over VLAN. So if VLAN hardware tagging is disabled explicitly clear TSO over VLAN. While I'm here allow disabling VLAN TX checksum offloading. Tested by: Liudas < liudasb <> centras dot lt > Modified: stable/7/sys/dev/fxp/if_fxp.c Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/dev/fxp/if_fxp.c ============================================================================== --- stable/7/sys/dev/fxp/if_fxp.c Wed Nov 3 01:24:33 2010 (r214717) +++ stable/7/sys/dev/fxp/if_fxp.c Wed Nov 3 01:26:11 2010 (r214718) @@ -859,9 +859,9 @@ fxp_attach(device_t dev) ifp->if_capenable |= IFCAP_VLAN_MTU; /* the hw bits already set */ if ((sc->flags & FXP_FLAG_EXT_RFA) != 0) { ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | - IFCAP_VLAN_HWCSUM; + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | - IFCAP_VLAN_HWCSUM; + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; } /* @@ -2855,10 +2855,19 @@ fxp_ioctl(struct ifnet *ifp, u_long comm if (ifp->if_flags & IFF_UP) reinit++; } + if ((mask & IFCAP_VLAN_HWCSUM) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) + ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; + if ((mask & IFCAP_VLAN_HWTSO) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - reinit++; + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) + ifp->if_capenable &= + ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); + reinit++; } if (reinit > 0 && ifp->if_flags & IFF_UP) fxp_init_body(sc); From owner-svn-src-stable-7@FreeBSD.ORG Wed Nov 3 01:28:57 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 79C6C1065670; Wed, 3 Nov 2010 01:28:57 +0000 (UTC) (envelope-from yongari@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 4CE9E8FC0A; Wed, 3 Nov 2010 01:28:57 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA31Sv5R002697; Wed, 3 Nov 2010 01:28:57 GMT (envelope-from yongari@svn.freebsd.org) Received: (from yongari@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA31SvA8002695; Wed, 3 Nov 2010 01:28:57 GMT (envelope-from yongari@svn.freebsd.org) Message-Id: <201011030128.oA31SvA8002695@svn.freebsd.org> From: Pyun YongHyeon Date: Wed, 3 Nov 2010 01:28:57 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214721 - stable/7/share/man/man4 X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Nov 2010 01:28:57 -0000 Author: yongari Date: Wed Nov 3 01:28:57 2010 New Revision: 214721 URL: http://svn.freebsd.org/changeset/base/214721 Log: MFC r214432: Use shorten model name and add RTL8168, RTL8111 to the list of supported device. re(4) had been supported all variants of RTL8168, RTL8111 and RTL810x. I think this change will cover all controllers supported by re(4). Modified: stable/7/share/man/man4/re.4 Directory Properties: stable/7/share/man/man4/ (props changed) Modified: stable/7/share/man/man4/re.4 ============================================================================== --- stable/7/share/man/man4/re.4 Wed Nov 3 01:28:09 2010 (r214720) +++ stable/7/share/man/man4/re.4 Wed Nov 3 01:28:57 2010 (r214721) @@ -30,12 +30,12 @@ .\" .\" $FreeBSD$ .\" -.Dd February 9, 2009 +.Dd October 27, 2010 .Dt RE 4 .Os .Sh NAME .Nm re -.Nd "RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter driver" +.Nd "RealTek 8139C+/8169/816xS/811xS/8168/810xE/8111 PCI/PCIe Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your @@ -55,13 +55,13 @@ if_re_load="YES" The .Nm driver provides support for various NICs based on the RealTek RTL8139C+, -RTL8169, RTL8169S, RTL8110S, RTL8168S, RTL8111S and RTL8101E PCI and +RTL8169, RTL816xS, RTL811xS, RTL8168, RTL810xE and RTL8111 PCI and PCIe Ethernet controllers. .Pp -NICs based on the 8139C+ and 8101E are capable of 10 and 100Mbps speeds +NICs based on the 8139C+ and 810xE are capable of 10 and 100Mbps speeds over CAT5 cable. -NICs based on the 8169, 816xS and 811xS are capable of 10, 100 and -1000Mbps operation. +NICs based on the 8169, 816xS, 811xS, 8168 and 8111 are capable of 10, 100 +and 1000Mbps operation. .Pp All NICs supported by the .Nm @@ -72,8 +72,8 @@ capable of TCP large send (TCP segmentat .Pp The 8139C+ is a single-chip solution combining both a 10/100 MAC and PHY. The 8169 is a 10/100/1000 MAC only, requiring a GMII or TBI external PHY. -The 8169S and 8110S are single-chip devices containing both a 10/100/1000 -MAC and 10/100/1000 copper PHY. +The 816xS, 811xS, 8168 and 8111 are single-chip devices containing both a +10/100/1000 MAC and 10/100/1000 copper PHY. Standalone 10/100/1000 cards are available in both 32-bit PCI and 64-bit PCI models. The 8110S is designed for @@ -144,8 +144,8 @@ For more information on configuring this .Sh HARDWARE The .Nm -driver supports RealTek RTL8139C+, RTL8169, RTL816xS, RTL811xS, -and RTL8101E based Fast Ethernet and Gigabit Ethernet adapters including: +driver supports RealTek RTL8139C+, RTL8169, RTL816xS, RTL811xS, RTL8168, +RTL810xE and RTL8111 based Fast Ethernet and Gigabit Ethernet adapters including: .Pp .Bl -bullet -compact .It From owner-svn-src-stable-7@FreeBSD.ORG Wed Nov 3 01:32:45 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 022B210657C7; Wed, 3 Nov 2010 01:32:45 +0000 (UTC) (envelope-from edwin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C933C8FC12; Wed, 3 Nov 2010 01:32:44 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA31Wig4002954; Wed, 3 Nov 2010 01:32:44 GMT (envelope-from edwin@svn.freebsd.org) Received: (from edwin@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA31WiM2002952; Wed, 3 Nov 2010 01:32:44 GMT (envelope-from edwin@svn.freebsd.org) Message-Id: <201011030132.oA31WiM2002952@svn.freebsd.org> From: Edwin Groothuis Date: Wed, 3 Nov 2010 01:32:44 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214724 - stable/7/share/zoneinfo X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Nov 2010 01:32:45 -0000 Author: edwin Date: Wed Nov 3 01:32:44 2010 New Revision: 214724 URL: http://svn.freebsd.org/changeset/base/214724 Log: MFC of r214722, tzdata2010o: - Fiji goes into DST three weeks earlier in 2011. Modified: stable/7/share/zoneinfo/australasia Directory Properties: stable/7/share/zoneinfo/ (props changed) Modified: stable/7/share/zoneinfo/australasia ============================================================================== --- stable/7/share/zoneinfo/australasia Wed Nov 3 01:32:39 2010 (r214723) +++ stable/7/share/zoneinfo/australasia Wed Nov 3 01:32:44 2010 (r214724) @@ -1,5 +1,5 @@ #
-# @(#)australasia	8.18
+# @(#)australasia	8.20
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
@@ -284,13 +284,26 @@ Zone	Indian/Cocos	6:27:40	-	LMT	1900
 # http://www.timeanddate.com/news/time/fiji-dst-ends-march-2010.html
 # 
 
+# From Alexander Krivenyshev (2010-10-24):
+# According to Radio Fiji and Fiji Times online, Fiji will end DST 3 
+# weeks earlier than expected - on March 6, 2011, not March 27, 2011...
+# Here is confirmation from Government of the Republic of the Fiji Islands, 
+# Ministry of Information (fiji.gov.fj) web site:
+# 
+# http://www.fiji.gov.fj/index.php?option=com_content&view=article&id=2608:daylight-savings&catid=71:press-releases&Itemid=155
+# 
+# or
+# 
+# http://www.worldtimezone.com/dst_news/dst_news_fiji04.html
+# 
+
 # Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
 Rule	Fiji	1998	1999	-	Nov	Sun>=1	2:00	1:00	S
 Rule	Fiji	1999	2000	-	Feb	lastSun	3:00	0	-
 Rule	Fiji	2009	only	-	Nov	29	2:00	1:00	S
 Rule	Fiji	2010	only	-	Mar	lastSun	3:00	0	-
 Rule	Fiji	2010	only	-	Oct	24	2:00	1:00	S
-Rule	Fiji	2011	only	-	Mar	lastSun 3:00	0	-
+Rule	Fiji	2011	only	-	Mar	Sun>=1	3:00	0	-
 # Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
 Zone	Pacific/Fiji	11:53:40 -	LMT	1915 Oct 26	# Suva
 			12:00	Fiji	FJ%sT	# Fiji Time
@@ -487,11 +500,21 @@ Zone Pacific/Pago_Pago	 12:37:12 -	LMT	1
 # http://www.parliament.gov.ws/documents/acts/Daylight%20Saving%20Act%20%202009%20%28English%29%20-%20Final%207-7-091.pdf
 # 
 
+# From Raymond Hughes (2010-10-07):
+# Please see
+# 
+# http://www.mcil.gov.ws
+# ,
+# the Ministry of Commerce, Industry and Labour (sideframe) "Last Sunday
+# September 2010 (26/09/10) - adjust clocks forward from 12:00 midnight
+# to 01:00am and First Sunday April 2011 (03/04/11) - adjust clocks
+# backwards from 1:00am to 12:00am"
+
 Zone Pacific/Apia	 12:33:04 -	LMT	1879 Jul  5
 			-11:26:56 -	LMT	1911
 			-11:30	-	SAMT	1950		# Samoa Time
 			-11:00	-	WST	2010 Sep 26
-			-11:00	1:00	WSDT	2011 Apr 3
+			-11:00	1:00	WSDT	2011 Apr 3 1:00
 			-11:00	-	WST
 
 # Solomon Is

From owner-svn-src-stable-7@FreeBSD.ORG  Wed Nov  3 10:12:13 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 98D171065670;
	Wed,  3 Nov 2010 10:12:13 +0000 (UTC)
	(envelope-from edwin@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 864028FC20;
	Wed,  3 Nov 2010 10:12:13 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA3ACDME015466;
	Wed, 3 Nov 2010 10:12:13 GMT (envelope-from edwin@svn.freebsd.org)
Received: (from edwin@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA3ACDeW015464;
	Wed, 3 Nov 2010 10:12:13 GMT (envelope-from edwin@svn.freebsd.org)
Message-Id: <201011031012.oA3ACDeW015464@svn.freebsd.org>
From: Edwin Groothuis 
Date: Wed, 3 Nov 2010 10:12:13 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214730 - stable/7/usr.bin/uudecode
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Wed, 03 Nov 2010 10:12:13 -0000

Author: edwin
Date: Wed Nov  3 10:12:13 2010
New Revision: 214730
URL: http://svn.freebsd.org/changeset/base/214730

Log:
  MFC of r214002, r214010
  
  - Stylify of uudecode(1)
    Part of PR bin/124739.
  
  - "b64decode -r" did not handle arbitary breaks in base64 encoded
    data. White space should be accepted anywhere in a base64 encoded
    stream, not just after every chunk (4 characters).
  
    Test-scenario:
  
    VmVsb2NpdHkgUmV3YXJkcw==
  
    and
  
    VmVsb2NpdHkgUmV3YXJkcw
    ==
  
    should both produce "Velocity Rewards"
  
  PR:             bin/124739
  Submitted by:   Mark Andrews 

Modified:
  stable/7/usr.bin/uudecode/uudecode.c
Directory Properties:
  stable/7/usr.bin/uudecode/   (props changed)

Modified: stable/7/usr.bin/uudecode/uudecode.c
==============================================================================
--- stable/7/usr.bin/uudecode/uudecode.c	Wed Nov  3 10:10:34 2010	(r214729)
+++ stable/7/usr.bin/uudecode/uudecode.c	Wed Nov  3 10:12:13 2010	(r214730)
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@ main(int argc, char *argv[])
 		base64 = 1;
 
 	while ((ch = getopt(argc, argv, "cimo:prs")) != -1) {
-		switch(ch) {
+		switch (ch) {
 		case 'c':
 			if (oflag || rflag)
 				usage();
@@ -125,10 +126,10 @@ main(int argc, char *argv[])
 			usage();
 		}
 	}
-        argc -= optind;
-        argv += optind;
+	argc -= optind;
+	argv += optind;
 
-	if (*argv) {
+	if (*argv != NULL) {
 		rval = 0;
 		do {
 			infp = fopen(infile = *argv, "r");
@@ -184,7 +185,7 @@ decode2(void)
 	void *handle;
 	struct passwd *pw;
 	struct stat st;
-	char buf[MAXPATHLEN+1];
+	char buf[MAXPATHLEN + 1];
 
 	base64 = 0;
 	/* search for header line */
@@ -259,7 +260,7 @@ decode2(void)
 	if (pflag || strcmp(outfile, "/dev/stdout") == 0)
 		outfp = stdout;
 	else {
-		flags = O_WRONLY|O_CREAT|O_EXCL;
+		flags = O_WRONLY | O_CREAT | O_EXCL;
 		if (lstat(outfile, &st) == 0) {
 			if (iflag) {
 				warnc(EEXIST, "%s: %s", infile, outfile);
@@ -305,6 +306,7 @@ decode2(void)
 static int
 getline(char *buf, size_t size)
 {
+
 	if (fgets(buf, size, infp) != NULL)
 		return (2);
 	if (rflag)
@@ -341,17 +343,19 @@ uu_decode(void)
 	/* for each input line */
 	for (;;) {
 		switch (getline(buf, sizeof(buf))) {
-		case 0: return (0);
-		case 1: return (1);
+		case 0:
+			return (0);
+		case 1:
+			return (1);
 		}
 
-#define	DEC(c)	(((c) - ' ') & 077)		/* single character decode */
-#define IS_DEC(c) ( (((c) - ' ') >= 0) && (((c) - ' ') <= 077 + 1) )
+#define	DEC(c)		(((c) - ' ') & 077)	/* single character decode */
+#define IS_DEC(c)	 ( (((c) - ' ') >= 0) && (((c) - ' ') <= 077 + 1) )
 
 #define OUT_OF_RANGE do {						\
 	warnx("%s: %s: character out of range: [%d-%d]",		\
 	    infile, outfile, 1 + ' ', 077 + ' ' + 1);			\
-        return (1);							\
+	return (1);							\
 } while (0)
 
 		/*
@@ -364,8 +368,8 @@ uu_decode(void)
 		for (++p; i > 0; p += 4, i -= 3)
 			if (i >= 3) {
 				if (!(IS_DEC(*p) && IS_DEC(*(p + 1)) &&
-				     IS_DEC(*(p + 2)) && IS_DEC(*(p + 3))))
-                                	OUT_OF_RANGE;
+				    IS_DEC(*(p + 2)) && IS_DEC(*(p + 3))))
+					OUT_OF_RANGE;
 
 				ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
 				putc(ch, outfp);
@@ -373,8 +377,7 @@ uu_decode(void)
 				putc(ch, outfp);
 				ch = DEC(p[2]) << 6 | DEC(p[3]);
 				putc(ch, outfp);
-			}
-			else {
+			} else {
 				if (i >= 1) {
 					if (!(IS_DEC(*p) && IS_DEC(*(p + 1))))
 	                                	OUT_OF_RANGE;
@@ -383,56 +386,85 @@ uu_decode(void)
 				}
 				if (i >= 2) {
 					if (!(IS_DEC(*(p + 1)) &&
-						IS_DEC(*(p + 2))))
-		                                OUT_OF_RANGE;
+					    IS_DEC(*(p + 2))))
+						OUT_OF_RANGE;
 
 					ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
 					putc(ch, outfp);
 				}
 				if (i >= 3) {
 					if (!(IS_DEC(*(p + 2)) &&
-						IS_DEC(*(p + 3))))
-		                                OUT_OF_RANGE;
+					    IS_DEC(*(p + 3))))
+						OUT_OF_RANGE;
 					ch = DEC(p[2]) << 6 | DEC(p[3]);
 					putc(ch, outfp);
 				}
 			}
 	}
 	switch (getline(buf, sizeof(buf))) {
-	case 0:  return (0);
-	case 1:  return (1);
-	default: return (checkend(buf, "end", "no \"end\" line"));
+	case 0:
+		return (0);
+	case 1:
+		return (1);
+	default:
+		return (checkend(buf, "end", "no \"end\" line"));
 	}
 }
 
 static int
 base64_decode(void)
 {
-	int n;
-	char inbuf[MAXPATHLEN+1];
+	int n, count, count4;
+	char inbuf[MAXPATHLEN + 1], *p;
 	unsigned char outbuf[MAXPATHLEN * 4];
+	char leftover[MAXPATHLEN + 1];
 
+	leftover[0] = '\0';
 	for (;;) {
-		switch (getline(inbuf, sizeof(inbuf))) {
-		case 0: return (0);
-		case 1: return (1);
+		strcpy(inbuf, leftover);
+		switch (getline(inbuf + strlen(inbuf),
+		    sizeof(inbuf) - strlen(inbuf))) {
+		case 0:
+			return (0);
+		case 1:
+			return (1);
 		}
+
+		count = 0;
+		count4 = -1;
+		p = inbuf;
+		while (*p != '\0') {
+			/*
+			 * Base64 encoded strings have the following
+			 * characters in them: A-Z, a-z, 0-9 and +, / and =
+			 */
+			if (isalnum(*p) || *p == '+' || *p == '/' || *p == '=')
+				count++;
+			if (count % 4 == 0)
+				count4 = p - inbuf;
+			p++;
+		}
+
+		strcpy(leftover, inbuf + count4 + 1);
+		inbuf[count4 + 1] = 0;
+
 		n = b64_pton(inbuf, outbuf, sizeof(outbuf));
+
 		if (n < 0)
 			break;
 		fwrite(outbuf, 1, n, outfp);
 	}
-	return (checkend(inbuf, "====",
-		    "error decoding base64 input stream"));
+	return (checkend(inbuf, "====", "error decoding base64 input stream"));
 }
 
 static void
 usage(void)
 {
+
 	(void)fprintf(stderr,
-"usage: uudecode [-cimprs] [file ...]\n"
-"       uudecode [-i] -o output_file [file]\n"
-"       b64decode [-cimprs] [file ...]\n"
-"       b64decode [-i] -o output_file [file]\n");
+	    "usage: uudecode [-cimprs] [file ...]\n"
+	    "       uudecode [-i] -o output_file [file]\n"
+	    "       b64decode [-cimprs] [file ...]\n"
+	    "       b64decode [-i] -o output_file [file]\n");
 	exit(1);
 }

From owner-svn-src-stable-7@FreeBSD.ORG  Wed Nov  3 10:24:57 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id D7C50106564A;
	Wed,  3 Nov 2010 10:24:57 +0000 (UTC) (envelope-from ed@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id C63578FC0A;
	Wed,  3 Nov 2010 10:24:57 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA3AOvWL015884;
	Wed, 3 Nov 2010 10:24:57 GMT (envelope-from ed@svn.freebsd.org)
Received: (from ed@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA3AOvXL015882;
	Wed, 3 Nov 2010 10:24:57 GMT (envelope-from ed@svn.freebsd.org)
Message-Id: <201011031024.oA3AOvXL015882@svn.freebsd.org>
From: Ed Schouten 
Date: Wed, 3 Nov 2010 10:24:57 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214732 - stable/7/usr.bin/truss
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Wed, 03 Nov 2010 10:24:57 -0000

Author: ed
Date: Wed Nov  3 10:24:57 2010
New Revision: 214732
URL: http://svn.freebsd.org/changeset/base/214732

Log:
  MFC r214105:
  
    Remove setpgid() call before executing child process.
  
    Using a separate process group here is bad, since (for example) job
    control in the TTY layer prevents interaction with the TTY, causing the
    child process to hang.

Modified:
  stable/7/usr.bin/truss/setup.c
Directory Properties:
  stable/7/usr.bin/truss/   (props changed)

Modified: stable/7/usr.bin/truss/setup.c
==============================================================================
--- stable/7/usr.bin/truss/setup.c	Wed Nov  3 10:23:06 2010	(r214731)
+++ stable/7/usr.bin/truss/setup.c	Wed Nov  3 10:24:57 2010	(r214732)
@@ -78,7 +78,6 @@ setup_and_wait(char *command[])
 	}
 	if (pid == 0) {	/* Child */
 		ptrace(PT_TRACE_ME, 0, 0, 0);
-		setpgid (0, 0); 
 		execvp(command[0], command);
 		err(1, "execvp %s", command[0]);
 	}

From owner-svn-src-stable-7@FreeBSD.ORG  Wed Nov  3 15:25:40 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id E8CD4106564A;
	Wed,  3 Nov 2010 15:25:40 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id D5B3B8FC0A;
	Wed,  3 Nov 2010 15:25:40 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA3FPeGw024950;
	Wed, 3 Nov 2010 15:25:40 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA3FPeqb024942;
	Wed, 3 Nov 2010 15:25:40 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011031525.oA3FPeqb024942@svn.freebsd.org>
From: John Baldwin 
Date: Wed, 3 Nov 2010 15:25:40 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214743 - stable/7/usr.sbin/mfiutil
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Wed, 03 Nov 2010 15:25:41 -0000

Author: jhb
Date: Wed Nov  3 15:25:40 2010
New Revision: 214743
URL: http://svn.freebsd.org/changeset/base/214743

Log:
  MFC 213672,213674,214396:
  - Report subcommand handler errors in mfiutil so that tools that
    invoke the utilities can robustly report errors.
  - Fix compile with -DDEBUG by using the correct mfi_pd_ref union definition
    in mfireg.h.
  - Save errno values before calling warn(3) so that errors are correctly
    reported.
  - Use powerof2() from  rather than a copy and paste version.

Modified:
  stable/7/usr.sbin/mfiutil/mfi_config.c
  stable/7/usr.sbin/mfiutil/mfi_drive.c
  stable/7/usr.sbin/mfiutil/mfi_evt.c
  stable/7/usr.sbin/mfiutil/mfi_flash.c
  stable/7/usr.sbin/mfiutil/mfi_patrol.c
  stable/7/usr.sbin/mfiutil/mfi_show.c
  stable/7/usr.sbin/mfiutil/mfi_volume.c
  stable/7/usr.sbin/mfiutil/mfiutil.c
Directory Properties:
  stable/7/usr.sbin/mfiutil/   (props changed)
  stable/7/usr.sbin/mfiutil/mfiutil.8   (props changed)

Modified: stable/7/usr.sbin/mfiutil/mfi_config.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_config.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_config.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -29,12 +29,12 @@
  * $FreeBSD$
  */
 
-#include 
+#include 
 #ifdef DEBUG
 #include 
 #endif
-#include 
 #include 
+#include 
 #include 
 #ifdef DEBUG
 #include 
@@ -52,8 +52,6 @@ static void	dump_config(int fd, struct m
 static int	add_spare(int ac, char **av);
 static int	remove_spare(int ac, char **av);
 
-#define powerof2(x)    ((((x)-1)&(x))==0)
-
 static long
 dehumanize(const char *value)
 {
@@ -151,13 +149,14 @@ static int
 clear_config(int ac, char **av)
 {
 	struct mfi_ld_list list;
-	int ch, fd;
+	int ch, error, fd;
 	u_int i;
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (!mfi_reconfig_supported()) {
@@ -167,8 +166,9 @@ clear_config(int ac, char **av)
 	}
 
 	if (mfi_ld_get_list(fd, &list, NULL) < 0) {
+		error = errno;
 		warn("Failed to get volume list");
-		return (errno);
+		return (error);
 	}
 
 	for (i = 0; i < list.ld_count; i++) {
@@ -189,8 +189,9 @@ clear_config(int ac, char **av)
 	}
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_CLEAR, NULL, 0, NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to clear configuration");
-		return (errno);
+		return (error);
 	}
 
 	printf("mfi%d: Configuration cleared\n", mfi_unit);
@@ -335,8 +336,9 @@ parse_array(int fd, int raid_type, char 
 			return (error);
 
 		if (mfi_pd_get_info(fd, device_id, pinfo, NULL) < 0) {
+			error = errno;
 			warn("Failed to fetch drive info for drive %s", cp);
-			return (errno);
+			return (error);
 		}
 
 		if (pinfo->fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) {
@@ -548,8 +550,9 @@ create_volume(int ac, char **av)
 	
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (!mfi_reconfig_supported()) {
@@ -660,8 +663,9 @@ create_volume(int ac, char **av)
 	 * array and volume identifiers.
 	 */
 	if (mfi_config_read(fd, &config) < 0) {
+		error = errno;
 		warn("Failed to read configuration");
-		return (errno);
+		return (error);
 	}
 	p = (char *)config->array;
 	state.array_ref = 0xffff;
@@ -745,14 +749,14 @@ create_volume(int ac, char **av)
 #ifdef DEBUG
 	if (dump)
 		dump_config(fd, config);
-	else
 #endif
 
 	/* Send the new config to the controller. */
 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_ADD, config, config_size,
 	    NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to add volume");
-		return (errno);
+		return (error);
 	}
 
 	/* Clean up. */
@@ -774,7 +778,7 @@ static int
 delete_volume(int ac, char **av)
 {
 	struct mfi_ld_info info;
-	int fd;
+	int error, fd;
 	uint8_t target_id, mbox[4];
 
 	/*
@@ -799,8 +803,9 @@ delete_volume(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (!mfi_reconfig_supported()) {
@@ -810,13 +815,15 @@ delete_volume(int ac, char **av)
 	}
 
 	if (mfi_lookup_volume(fd, av[1], &target_id) < 0) {
+		error = errno;
 		warn("Invalid volume %s", av[1]);
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_ld_get_info(fd, target_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to get info for volume %d", target_id);
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_volume_busy(fd, target_id)) {
@@ -828,8 +835,9 @@ delete_volume(int ac, char **av)
 	mbox_store_ldref(mbox, &info.ld_config.properties.ld);
 	if (mfi_dcmd_command(fd, MFI_DCMD_LD_DELETE, NULL, 0, mbox,
 	    sizeof(mbox), NULL) < 0) {
+		error = errno;
 		warn("Failed to delete volume");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -858,8 +866,9 @@ add_spare(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -867,8 +876,9 @@ add_spare(int ac, char **av)
 		return (error);
 
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch drive info");
-		return (errno);
+		return (error);
 	}
 
 	if (info.fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) {
@@ -878,14 +888,16 @@ add_spare(int ac, char **av)
 
 	if (ac > 2) {
 		if (mfi_lookup_volume(fd, av[2], &target_id) < 0) {
+			error = errno;
 			warn("Invalid volume %s", av[2]);
-			return (errno);
+			return (error);
 		}
 	}
 
 	if (mfi_config_read(fd, &config) < 0) {
+		error = errno;
 		warn("Failed to read configuration");
-		return (errno);
+		return (error);
 	}
 
 	spare = malloc(sizeof(struct mfi_spare) + sizeof(uint16_t) *
@@ -939,8 +951,9 @@ add_spare(int ac, char **av)
 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_MAKE_SPARE, spare,
 	    sizeof(struct mfi_spare) + sizeof(uint16_t) * spare->array_count,
 	    NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to assign spare");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -964,8 +977,9 @@ remove_spare(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -974,8 +988,9 @@ remove_spare(int ac, char **av)
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 
 	if (info.fw_state != MFI_PD_STATE_HOT_SPARE) {
@@ -986,8 +1001,9 @@ remove_spare(int ac, char **av)
 	mbox_store_pdref(mbox, &info.ref);
 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_REMOVE_SPARE, NULL, 0, mbox,
 	    sizeof(mbox), NULL) < 0) {
+		error = errno;
 		warn("Failed to delete spare");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -1024,7 +1040,7 @@ dump_config(int fd, struct mfi_config_da
 		    ar->num_drives);
 		printf("      size = %ju\n", (uintmax_t)ar->size);
 		for (j = 0; j < ar->num_drives; j++) {
-			device_id = ar->pd[j].ref.device_id;
+			device_id = ar->pd[j].ref.v.device_id;
 			if (device_id == 0xffff)
 				printf("        drive MISSING\n");
 			else {
@@ -1080,7 +1096,7 @@ dump_config(int fd, struct mfi_config_da
 		sp = (struct mfi_spare *)p;
 		printf("    %s spare %u ",
 		    sp->spare_type & MFI_SPARE_DEDICATED ? "dedicated" :
-		    "global", sp->ref.device_id);
+		    "global", sp->ref.v.device_id);
 		printf("%s", mfi_pdstate(MFI_PD_STATE_HOT_SPARE));
 		printf(" backs:\n");
 		for (j = 0; j < sp->array_count; j++)
@@ -1093,7 +1109,7 @@ static int
 debug_config(int ac, char **av)
 {
 	struct mfi_config_data *config;
-	int fd;
+	int error, fd;
 
 	if (ac != 1) {
 		warnx("debug: extra arguments");
@@ -1102,14 +1118,16 @@ debug_config(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	/* Get the config from the controller. */
 	if (mfi_config_read(fd, &config) < 0) {
+		error = errno;
 		warn("Failed to get config");
-		return (errno);
+		return (error);
 	}
 
 	/* Dump out the configuration. */
@@ -1127,7 +1145,7 @@ dump(int ac, char **av)
 	struct mfi_config_data *config;
 	char buf[64];
 	size_t len;
-	int fd;
+	int error, fd;
 
 	if (ac != 1) {
 		warnx("dump: extra arguments");
@@ -1136,23 +1154,26 @@ dump(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	/* Get the stashed copy of the last dcmd from the driver. */
 	snprintf(buf, sizeof(buf), "dev.mfi.%d.debug_command", mfi_unit);
 	if (sysctlbyname(buf, NULL, &len, NULL, 0) < 0) {
+		error = errno;
 		warn("Failed to read debug command");
-		if (errno == ENOENT)
-			errno = EOPNOTSUPP;
-		return (errno);
+		if (error == ENOENT)
+			error = EOPNOTSUPP;
+		return (error);
 	}
 
 	config = malloc(len);
 	if (sysctlbyname(buf, config, &len, NULL, 0) < 0) {
+		error = errno;
 		warn("Failed to read debug command");
-		return (errno);
+		return (error);
 	}
 	dump_config(fd, config);
 	free(config);

Modified: stable/7/usr.sbin/mfiutil/mfi_drive.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_drive.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_drive.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -79,10 +79,11 @@ int
 mfi_lookup_drive(int fd, char *drive, uint16_t *device_id)
 {
 	struct mfi_pd_list *list;
-	uint8_t encl, slot;
 	long val;
+	int error;
 	u_int i;
 	char *cp;
+	uint8_t encl, slot;
 
 	/* Look for a raw device id first. */
 	val = strtol(drive, &cp, 0);
@@ -118,8 +119,9 @@ mfi_lookup_drive(int fd, char *drive, ui
 		slot = val;
 
 		if (mfi_pd_get_list(fd, &list, NULL) < 0) {
+			error = errno;
 			warn("Failed to fetch drive list");
-			return (errno);
+			return (error);
 		}
 
 		for (i = 0; i < list->count; i++) {
@@ -302,8 +304,9 @@ drive_set_state(char *drive, uint16_t ne
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, drive, &device_id);
@@ -312,8 +315,9 @@ drive_set_state(char *drive, uint16_t ne
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 
 	/* Try to change the state. */
@@ -327,9 +331,10 @@ drive_set_state(char *drive, uint16_t ne
 	mbox[5] = new_state >> 8;
 	if (mfi_dcmd_command(fd, MFI_DCMD_PD_STATE_SET, NULL, 0, mbox, 6,
 	    NULL) < 0) {
+		error = errno;
 		warn("Failed to set drive %u to %s", device_id,
 		    mfi_pdstate(new_state));
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -395,8 +400,9 @@ start_rebuild(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -405,13 +411,14 @@ start_rebuild(int ac, char **av)
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 
 	/* Check the state, must be REBUILD. */
 	if (info.fw_state != MFI_PD_STATE_REBUILD) {
-		warn("Drive %d is not in the REBUILD state", device_id);
+		warnx("Drive %d is not in the REBUILD state", device_id);
 		return (EINVAL);
 	}
 
@@ -419,8 +426,9 @@ start_rebuild(int ac, char **av)
 	mbox_store_pdref(&mbox[0], &info.ref);
 	if (mfi_dcmd_command(fd, MFI_DCMD_PD_REBUILD_START, NULL, 0, mbox, 4,
 	    NULL) < 0) {
+		error = errno;
 		warn("Failed to start rebuild on drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 	close(fd);
 
@@ -444,8 +452,9 @@ abort_rebuild(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -454,8 +463,9 @@ abort_rebuild(int ac, char **av)
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 
 	/* Check the state, must be REBUILD. */
@@ -468,8 +478,9 @@ abort_rebuild(int ac, char **av)
 	mbox_store_pdref(&mbox[0], &info.ref);
 	if (mfi_dcmd_command(fd, MFI_DCMD_PD_REBUILD_ABORT, NULL, 0, mbox, 4,
 	    NULL) < 0) {
+		error = errno;
 		warn("Failed to abort rebuild on drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 	close(fd);
 
@@ -492,8 +503,9 @@ drive_progress(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -502,8 +514,9 @@ drive_progress(int ac, char **av)
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 	close(fd);
 
@@ -551,8 +564,9 @@ drive_clear(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -561,16 +575,18 @@ drive_clear(int ac, char **av)
 
 	/* Get the info for this drive. */
 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to fetch info for drive %u", device_id);
-		return (errno);
+		return (error);
 	}
 
 	mbox_store_pdref(&mbox[0], &info.ref);
 	if (mfi_dcmd_command(fd, opcode, NULL, 0, mbox, 4, NULL) < 0) {
+		error = errno;
 		warn("Failed to %s clear on drive %u",
 		    opcode == MFI_DCMD_PD_CLEAR_START ? "start" : "stop",
 		    device_id);
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -604,8 +620,9 @@ drive_locate(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	error = mfi_lookup_drive(fd, av[1], &device_id);
@@ -617,10 +634,11 @@ drive_locate(int ac, char **av)
 	mbox[2] = 0;
 	mbox[3] = 0;
 	if (mfi_dcmd_command(fd, opcode, NULL, 0, mbox, 4, NULL) < 0) {
+		error = errno;
 		warn("Failed to %s locate on drive %u",
 		    opcode == MFI_DCMD_PD_LOCATE_START ? "start" : "stop",
 		    device_id);
-		return (errno);
+		return (error);
 	}
 	close(fd);
 

Modified: stable/7/usr.sbin/mfiutil/mfi_evt.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_evt.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_evt.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -32,7 +32,6 @@
 #include 
 #include 
 #include 
-//#include 
 #include 
 #include 
 #include 
@@ -67,7 +66,7 @@ static int
 show_logstate(int ac, char **av)
 {
 	struct mfi_evt_log_state info;
-	int fd;
+	int error, fd;
 
 	if (ac != 1) {
 		warnx("show logstate: extra arguments");
@@ -76,13 +75,15 @@ show_logstate(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_event_get_info(fd, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to get event log info");
-		return (errno);
+		return (error);
 	}
 
 	printf("mfi%d Event Log Sequence Numbers:\n", mfi_unit);
@@ -536,18 +537,20 @@ show_events(int ac, char **av)
 	ssize_t size;
 	uint32_t seq, start, stop;
 	uint8_t status;
-	int ch, fd, num_events, verbose;
+	int ch, error, fd, num_events, verbose;
 	u_int i;
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_event_get_info(fd, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to get event log info");
-		return (errno);
+		return (error);
 	}
 
 	/* Default settings. */
@@ -565,14 +568,16 @@ show_events(int ac, char **av)
 		switch (ch) {
 		case 'c':
 			if (parse_class(optarg, &filter.members.class) < 0) {
+				error = errno;
 				warn("Error parsing event class");
-				return (errno);
+				return (error);
 			}
 			break;
 		case 'l':
 			if (parse_locale(optarg, &filter.members.locale) < 0) {
+				error = errno;
 				warn("Error parsing event locale");
-				return (errno);
+				return (error);
 			}
 			break;
 		case 'n':
@@ -608,20 +613,23 @@ show_events(int ac, char **av)
 		return (EINVAL);
 	}
 	if (ac > 0 && parse_seq(&info, av[0], &start) < 0) {
+		error = errno;
 		warn("Error parsing starting sequence number");
-		return (errno);
+		return (error);
 	}
 	if (ac > 1 && parse_seq(&info, av[1], &stop) < 0) {
+		error = errno;
 		warn("Error parsing ending sequence number");
-		return (errno);
+		return (error);
 	}
 
 	list = malloc(size);
 	for (seq = start;;) {
 		if (mfi_get_events(fd, list, num_events, filter, seq,
 		    &status) < 0) {
+			error = errno;
 			warn("Failed to fetch events");
-			return (errno);
+			return (error);
 		}
 		if (status == MFI_STAT_NOT_FOUND) {
 			if (seq == start)

Modified: stable/7/usr.sbin/mfiutil/mfi_flash.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_flash.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_flash.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -72,16 +72,18 @@ display_firmware(struct mfi_info_compone
 	    fw_time_width, comp->build_time);
 }
 
-static void
+static int
 display_pending_firmware(int fd)
 {
 	struct mfi_ctrl_info info;
 	struct mfi_info_component header;
+	int error;
 	u_int i;
 
 	if (mfi_ctrl_get_info(fd, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to get controller info");
-		return;
+		return (error);
 	}
 
 	printf("mfi%d Pending Firmware Images:\n", mfi_unit);
@@ -97,6 +99,8 @@ display_pending_firmware(int fd)
 	display_firmware(&header);
 	for (i = 0; i < info.pending_image_component_count; i++)
 		display_firmware(&info.pending_image_component[i]);
+
+	return (0);
 }
 
 static void
@@ -117,7 +121,7 @@ flash_adapter(int ac, char **av)
 	size_t nread;
 	char *buf;
 	struct stat sb;
-	int fd, flash;
+	int error, fd, flash;
 	uint8_t mbox[4], status;
 
 	if (ac != 2) {
@@ -127,13 +131,15 @@ flash_adapter(int ac, char **av)
 
 	flash = open(av[1], O_RDONLY);
 	if (flash < 0) {
+		error = errno;
 		warn("flash: Failed to open %s", av[1]);
-		return (errno);
+		return (error);
 	}
 
 	if (fstat(flash, &sb) < 0) {
+		error = errno;
 		warn("fstat(%s)", av[1]);
-		return (errno);
+		return (error);
 	}
 	if (sb.st_size % 1024 != 0 || sb.st_size > 0x7fffffff) {
 		warnx("Invalid flash file size");
@@ -142,8 +148,9 @@ flash_adapter(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	/* First, ask the firmware to allocate space for the flash file. */
@@ -190,10 +197,10 @@ flash_adapter(int ac, char **av)
 		return (ENXIO);
 	}
 	printf("finished\n");
-	display_pending_firmware(fd);
+	error = display_pending_firmware(fd);
 
 	close(fd);
 
-	return (0);
+	return (error);
 }
 MFI_COMMAND(top, flash, flash_adapter);

Modified: stable/7/usr.sbin/mfiutil/mfi_patrol.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_patrol.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_patrol.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -62,11 +62,13 @@ mfi_get_time(int fd, uint32_t *at)
 static int
 patrol_get_props(int fd, struct mfi_pr_properties *prop)
 {
+	int error;
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_PR_GET_PROPERTIES, prop,
 	    sizeof(*prop), NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to get patrol read properties");
-		return (-1);
+		return (error);
 	}
 	return (0);
 }
@@ -81,19 +83,21 @@ show_patrol(int ac, char **av)
 	char label[16];
 	time_t now;
 	uint32_t at;
-	int fd;
+	int error, fd;
 	u_int i;
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	time(&now);
 	mfi_get_time(fd, &at);
-	if (patrol_get_props(fd, &prop) < 0)
-		return (errno);
+	error = patrol_get_props(fd, &prop);
+	if (error)
+		return (error);
 	printf("Operation Mode: ");
 	switch (prop.op_mode) {
 	case MFI_PR_OPMODE_AUTO:
@@ -122,8 +126,9 @@ show_patrol(int ac, char **av)
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_PR_GET_STATUS, &status,
 	    sizeof(status), NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to get patrol read properties");
-		return (errno);
+		return (error);
 	}
 	printf("Runs Completed: %u\n", status.num_iteration);
 	printf("Current State: ");
@@ -146,8 +151,9 @@ show_patrol(int ac, char **av)
 	}
 	if (status.state == MFI_PR_STATE_ACTIVE) {
 		if (mfi_pd_get_list(fd, &list, NULL) < 0) {
+			error = errno;
 			warn("Failed to get drive list");
-			return (errno);
+			return (error);
 		}
 
 		for (i = 0; i < list->count; i++) {
@@ -156,9 +162,10 @@ show_patrol(int ac, char **av)
 
 			if (mfi_pd_get_info(fd, list->addr[i].device_id, &info,
 			    NULL) < 0) {
+				error = errno;
 				warn("Failed to fetch info for drive %u",
 				    list->addr[i].device_id);
-				return (errno);
+				return (error);
 			}
 			if (info.prog_info.active & MFI_PD_PROGRESS_PATROL) {
 				snprintf(label, sizeof(label), "    Drive %u",
@@ -178,18 +185,20 @@ MFI_COMMAND(show, patrol, show_patrol);
 static int
 start_patrol(int ac, char **av)
 {
-	int fd;
+	int error, fd;
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_PR_START, NULL, 0, NULL, 0, NULL) <
 	    0) {
+		error = errno;
 		warn("Failed to start patrol read");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -201,18 +210,20 @@ MFI_COMMAND(start, patrol, start_patrol)
 static int
 stop_patrol(int ac, char **av)
 {
-	int fd;
+	int error, fd;
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_PR_STOP, NULL, 0, NULL, 0, NULL) <
 	    0) {
+		error = errno;
 		warn("Failed to stop patrol read");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);
@@ -227,10 +238,10 @@ patrol_config(int ac, char **av)
 	struct mfi_pr_properties prop;
 	long val;
 	time_t now;
+	int error, fd;
 	uint32_t at, next_exec, exec_freq;
 	char *cp;
 	uint8_t op_mode;
-	int fd;
 
 	exec_freq = 0;	/* GCC too stupid */
 	next_exec = 0;
@@ -272,12 +283,14 @@ patrol_config(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
-	if (patrol_get_props(fd, &prop) < 0)
-		return (errno);
+	error = patrol_get_props(fd, &prop);
+	if (error)
+		return (error);
 	prop.op_mode = op_mode;
 	if (op_mode == MFI_PR_OPMODE_AUTO) {
 		if (ac > 2)
@@ -294,8 +307,9 @@ patrol_config(int ac, char **av)
 	}
 	if (mfi_dcmd_command(fd, MFI_DCMD_PR_SET_PROPERTIES, &prop,
 	    sizeof(prop), NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to set patrol read properties");
-		return (errno);
+		return (error);
 	}
 
 	close(fd);

Modified: stable/7/usr.sbin/mfiutil/mfi_show.c
==============================================================================
--- stable/7/usr.sbin/mfiutil/mfi_show.c	Wed Nov  3 15:25:30 2010	(r214742)
+++ stable/7/usr.sbin/mfiutil/mfi_show.c	Wed Nov  3 15:25:40 2010	(r214743)
@@ -54,7 +54,7 @@ show_adapter(int ac, char **av)
 {
 	struct mfi_ctrl_info info;
 	char stripe[5];
-	int fd, comma;
+	int error, fd, comma;
 
 	if (ac != 1) {
 		warnx("show adapter: extra arguments");
@@ -63,13 +63,15 @@ show_adapter(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_ctrl_get_info(fd, &info, NULL) < 0) {
+		error = errno;
 		warn("Failed to get controller info");
-		return (errno);
+		return (error);
 	}
 	printf("mfi%d Adapter:\n", mfi_unit);
 	printf("    Product Name: %.80s\n", info.product_name);
@@ -137,7 +139,7 @@ show_battery(int ac, char **av)
 	struct mfi_bbu_capacity_info cap;
 	struct mfi_bbu_design_info design;
 	uint8_t status;
-	int fd;
+	int error, fd;
 
 	if (ac != 1) {
 		warnx("show battery: extra arguments");
@@ -146,8 +148,9 @@ show_battery(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_BBU_GET_CAPACITY_INFO, &cap,
@@ -156,14 +159,16 @@ show_battery(int ac, char **av)
 			printf("mfi%d: No battery present\n", mfi_unit);
 			return (0);
 		}
+		error = errno;
 		warn("Failed to get capacity info");
-		return (errno);
+		return (error);
 	}
 
 	if (mfi_dcmd_command(fd, MFI_DCMD_BBU_GET_DESIGN_INFO, &design,
 	    sizeof(design), NULL, 0, NULL) < 0) {
+		error = errno;
 		warn("Failed to get design info");
-		return (errno);
+		return (error);
 	}
 
 	printf("mfi%d: Battery State:\n", mfi_unit);
@@ -242,7 +247,7 @@ show_config(int ac, char **av)
 	struct mfi_pd_info pinfo;
 	uint16_t device_id;
 	char *p;
-	int fd, i, j;
+	int error, fd, i, j;
 
 	if (ac != 1) {
 		warnx("show config: extra arguments");
@@ -251,14 +256,16 @@ show_config(int ac, char **av)
 
 	fd = mfi_open(mfi_unit);
 	if (fd < 0) {
+		error = errno;
 		warn("mfi_open");
-		return (errno);
+		return (error);
 	}
 
 	/* Get the config from the controller. */
 	if (mfi_config_read(fd, &config) < 0) {
+		error = errno;
 		warn("Failed to get config");
-		return (errno);
+		return (error);
 	}
 
 	/* Dump out the configuration. */
@@ -337,8 +344,8 @@ show_volumes(int ac, char **av)
 {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***

From owner-svn-src-stable-7@FreeBSD.ORG  Wed Nov  3 15:31:11 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 424BD106566B;
	Wed,  3 Nov 2010 15:31:11 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 15F6B8FC0A;
	Wed,  3 Nov 2010 15:31:11 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA3FVAMD025164;
	Wed, 3 Nov 2010 15:31:10 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA3FVAQN025163;
	Wed, 3 Nov 2010 15:31:10 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011031531.oA3FVAQN025163@svn.freebsd.org>
From: John Baldwin 
Date: Wed, 3 Nov 2010 15:31:10 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214744 - in stable: 7/usr.sbin/mfiutil
	8/usr.sbin/mfiutil
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Wed, 03 Nov 2010 15:31:11 -0000

Author: jhb
Date: Wed Nov  3 15:31:10 2010
New Revision: 214744
URL: http://svn.freebsd.org/changeset/base/214744

Log:
  Trim unneeded mergeinfo from a file.

Modified:
Directory Properties:
  stable/7/usr.sbin/mfiutil/mfiutil.8   (props changed)

Changes in other areas also in this revision:
Modified:
Directory Properties:
  stable/8/usr.sbin/mfiutil/mfiutil.8   (props changed)

From owner-svn-src-stable-7@FreeBSD.ORG  Thu Nov  4 17:07:13 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id B77A3106566B;
	Thu,  4 Nov 2010 17:07:13 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id A51FE8FC15;
	Thu,  4 Nov 2010 17:07:13 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA4H7DE9066382;
	Thu, 4 Nov 2010 17:07:13 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA4H7Dni066378;
	Thu, 4 Nov 2010 17:07:13 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011041707.oA4H7Dni066378@svn.freebsd.org>
From: John Baldwin 
Date: Thu, 4 Nov 2010 17:07:13 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214785 - in stable/7/sys: conf x86 x86/pci
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Thu, 04 Nov 2010 17:07:13 -0000

Author: jhb
Date: Thu Nov  4 17:07:13 2010
New Revision: 214785
URL: http://svn.freebsd.org/changeset/base/214785

Log:
  MFC 211820,211821,212292:
  Intel QPI chipsets actually provide extra "non-core" PCI buses that
  provide PCI devices for various hardware such as memory controllers,
  etc.  for each socket.  These PCI buses are not enumerated via ACPI
  however.  Add qpi(4) psuedo bus and Host-PCI bridge drivers to
  enumerate these buses.  Currently the driver uses the CPU ID to
  determine the bridges' presence.

Added:
  stable/7/sys/x86/
  stable/7/sys/x86/pci/
     - copied from r211820, head/sys/x86/pci/
Modified:
  stable/7/sys/conf/files.amd64
  stable/7/sys/conf/files.i386
  stable/7/sys/x86/pci/qpi.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/conf/files.amd64
==============================================================================
--- stable/7/sys/conf/files.amd64	Thu Nov  4 17:06:54 2010	(r214784)
+++ stable/7/sys/conf/files.amd64	Thu Nov  4 17:07:13 2010	(r214785)
@@ -277,3 +277,7 @@ i386/cpufreq/hwpstate.c		optional	cpufre
 i386/cpufreq/p4tcc.c		optional	cpufreq
 #
 libkern/memset.c		standard
+#
+# x86 shared code between IA32, AMD64 and PC98 architectures
+#
+x86/pci/qpi.c			standard

Modified: stable/7/sys/conf/files.i386
==============================================================================
--- stable/7/sys/conf/files.i386	Thu Nov  4 17:06:54 2010	(r214784)
+++ stable/7/sys/conf/files.i386	Thu Nov  4 17:07:13 2010	(r214785)
@@ -463,3 +463,7 @@ i386/xbox/xbox.c		optional	xbox
 i386/xbox/xboxfb.c		optional	xboxfb
 dev/fb/boot_font.c		optional	xboxfb
 i386/xbox/pic16l.s		optional	xbox
+#
+# x86 shared code between IA32, AMD64 and PC98 architectures
+#
+x86/pci/qpi.c			standard

Modified: stable/7/sys/x86/pci/qpi.c
==============================================================================
--- head/sys/x86/pci/qpi.c	Wed Aug 25 19:12:05 2010	(r211820)
+++ stable/7/sys/x86/pci/qpi.c	Thu Nov  4 17:07:13 2010	(r214785)
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include "pcib_if.h"
@@ -62,7 +63,8 @@ qpi_identify(driver_t *driver, device_t 
 {
 
         /* Check CPUID to ensure this is an i7 CPU of some sort. */
-        if (!(cpu_vendor_id == CPU_VENDOR_INTEL && CPUID_TO_FAMILY(cpu_id) &&
+        if (!(cpu_vendor_id == CPU_VENDOR_INTEL &&
+	    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    (CPUID_TO_MODEL(cpu_id) == 0x1a || CPUID_TO_MODEL(cpu_id) == 0x2c)))
                 return;
 
@@ -83,31 +85,62 @@ qpi_probe(device_t dev)
 	return (BUS_PROBE_SPECIFIC);
 }
 
+/*
+ * Look for a PCI bus with the specified bus address.  If one is found,
+ * add a pcib device and return 0.  Otherwise, return an error code.
+ */
 static int
-qpi_attach(device_t dev)
+qpi_probe_pcib(device_t dev, int bus)
 {
 	struct qpi_device *qdev;
 	device_t child;
+	uint32_t devid;
 
 	/*
-	 * Add two Host-PCI bridge devices, one for PCI bus 254 and
-	 * one for PCI bus 255.
+	 * If a PCI bus already exists for this bus number, then
+	 * fail.
 	 */
-	child = BUS_ADD_CHILD(dev, 0, "pcib", -1);
-	if (child == NULL)
-		panic("%s: failed to add pci bus 254",
-		    device_get_nameunit(dev));
-	qdev = malloc(sizeof(struct qpi_device), M_QPI, M_WAITOK);
-	qdev->qd_pcibus = 254;
-	device_set_ivars(child, qdev);
+	if (pci_find_bsf(bus, 0, 0) != NULL)
+		return (EEXIST);
+
+	/*
+	 * Attempt to read the device id for device 0, function 0 on
+	 * the bus.  A value of 0xffffffff means that the bus is not
+	 * present.
+	 */
+	devid = pci_cfgregread(bus, 0, 0, PCIR_DEVVENDOR, 4);
+	if (devid == 0xffffffff)
+		return (ENOENT);
+
+	if ((devid & 0xffff) != 0x8086) {
+		device_printf(dev,
+		    "Device at pci%d.0.0 has non-Intel vendor 0x%x\n", bus,
+		    devid & 0xffff);
+		return (ENXIO);
+	}
 
 	child = BUS_ADD_CHILD(dev, 0, "pcib", -1);
 	if (child == NULL)
-		panic("%s: failed to add pci bus 255",
-		    device_get_nameunit(dev));
+		panic("%s: failed to add pci bus %d", device_get_nameunit(dev),
+		    bus);
 	qdev = malloc(sizeof(struct qpi_device), M_QPI, M_WAITOK);
-	qdev->qd_pcibus = 255;
+	qdev->qd_pcibus = bus;
 	device_set_ivars(child, qdev);
+	return (0);
+}
+
+static int
+qpi_attach(device_t dev)
+{
+	int bus;
+
+	/*
+	 * Each processor socket has a dedicated PCI bus counting down from
+	 * 255.  We keep probing buses until one fails.
+	 */
+	for (bus = 255;; bus--)
+		if (qpi_probe_pcib(dev, bus) != 0)
+			break;
 
 	return (bus_generic_attach(dev));
 }

From owner-svn-src-stable-7@FreeBSD.ORG  Thu Nov  4 17:13:30 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id B32AD1065675;
	Thu,  4 Nov 2010 17:13:30 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 8744E8FC18;
	Thu,  4 Nov 2010 17:13:30 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA4HDU2c066614;
	Thu, 4 Nov 2010 17:13:30 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA4HDUpA066611;
	Thu, 4 Nov 2010 17:13:30 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011041713.oA4HDUpA066611@svn.freebsd.org>
From: John Baldwin 
Date: Thu, 4 Nov 2010 17:13:30 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214787 - stable/7/sys/dev/pci
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Thu, 04 Nov 2010 17:13:30 -0000

Author: jhb
Date: Thu Nov  4 17:13:30 2010
New Revision: 214787
URL: http://svn.freebsd.org/changeset/base/214787

Log:
  MFC 214203:
  - Add a new PCI quirk to whitelist an old chipset that doesn't support
    PCI-express or PCI-X capabilities if we are running in a virtual machine.
  - Whitelist the Intel 82440 chipset used by QEMU.

Modified:
  stable/7/sys/dev/pci/pci.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/dev/pci/pci.c
==============================================================================
--- stable/7/sys/dev/pci/pci.c	Thu Nov  4 17:12:29 2010	(r214786)
+++ stable/7/sys/dev/pci/pci.c	Thu Nov  4 17:13:30 2010	(r214787)
@@ -178,6 +178,7 @@ struct pci_quirk {
 	int	type;
 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
+#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
 	int	arg1;
 	int	arg2;
 };
@@ -214,6 +215,12 @@ struct pci_quirk pci_quirks[] = {
 	 */
 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 
+	/*
+	 * Some virtualization environments emulate an older chipset
+	 * but support MSI just fine.  QEMU uses the Intel 82440.
+	 */
+	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
+
 	{ 0 }
 };
 
@@ -1771,6 +1778,23 @@ pci_msi_device_blacklisted(device_t dev)
 }
 
 /*
+ * Returns true if a specified chipset supports MSI when it is
+ * emulated hardware in a virtual machine.
+ */
+static int
+pci_msi_vm_chipset(device_t dev)
+{
+	struct pci_quirk *q;
+
+	for (q = &pci_quirks[0]; q->devid; q++) {
+		if (q->devid == pci_get_devid(dev) &&
+		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
+			return (1);
+	}
+	return (0);
+}
+
+/*
  * Determine if MSI is blacklisted globally on this sytem.  Currently,
  * we just check for blacklisted chipsets as represented by the
  * host-PCI bridge at device 0:0:0.  In the future, it may become
@@ -1786,8 +1810,14 @@ pci_msi_blacklisted(void)
 		return (0);
 
 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
-	if (!(pcie_chipset || pcix_chipset))
+	if (!(pcie_chipset || pcix_chipset)) {
+		if (vm_guest != VM_GUEST_NO) {
+			dev = pci_find_bsf(0, 0, 0);
+			if (dev != NULL)
+				return (pci_msi_vm_chipset(dev) == 0);
+		}
 		return (1);
+	}
 
 	dev = pci_find_bsf(0, 0, 0);
 	if (dev != NULL)

From owner-svn-src-stable-7@FreeBSD.ORG  Thu Nov  4 17:23:07 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 4F1D61065673;
	Thu,  4 Nov 2010 17:23:07 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 3DB1F8FC16;
	Thu,  4 Nov 2010 17:23:07 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA4HN72E066963;
	Thu, 4 Nov 2010 17:23:07 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA4HN7ji066961;
	Thu, 4 Nov 2010 17:23:07 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011041723.oA4HN7ji066961@svn.freebsd.org>
From: John Baldwin 
Date: Thu, 4 Nov 2010 17:23:07 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214790 - stable/7/sys/kern
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Thu, 04 Nov 2010 17:23:07 -0000

Author: jhb
Date: Thu Nov  4 17:23:06 2010
New Revision: 214790
URL: http://svn.freebsd.org/changeset/base/214790

Log:
  MFC 214449:
  Set bootverbose directly in mi_startup() rather than via a SYSINIT.  This
  ensures 'bootverbose' is in a valid state for all SYSINITs.

Modified:
  stable/7/sys/kern/init_main.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/init_main.c
==============================================================================
--- stable/7/sys/kern/init_main.c	Thu Nov  4 17:22:49 2010	(r214789)
+++ stable/7/sys/kern/init_main.c	Thu Nov  4 17:23:06 2010	(r214790)
@@ -178,6 +178,9 @@ mi_startup(void)
 	int verbose;
 #endif
 
+	if (boothowto & RB_VERBOSE)
+		bootverbose++;
+
 	if (sysinit == NULL) {
 		sysinit = SET_BEGIN(sysinit_set);
 		sysinit_end = SET_LIMIT(sysinit_set);
@@ -325,15 +328,6 @@ SYSINIT(diagwarn2, SI_SUB_RUN_SCHEDULER,
     print_caddr_t, diag_warn);
 #endif
 
-static void
-set_boot_verbose(void *data __unused)
-{
-
-	if (boothowto & RB_VERBOSE)
-		bootverbose++;
-}
-SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL);
-
 struct sysentvec null_sysvec = {
 	.sv_size	= 0,
 	.sv_table	= NULL,

From owner-svn-src-stable-7@FreeBSD.ORG  Thu Nov  4 17:25:41 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 44AC010656C3;
	Thu,  4 Nov 2010 17:25:41 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 32C788FC0A;
	Thu,  4 Nov 2010 17:25:41 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA4HPfLP067134;
	Thu, 4 Nov 2010 17:25:41 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA4HPfct067132;
	Thu, 4 Nov 2010 17:25:41 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011041725.oA4HPfct067132@svn.freebsd.org>
From: John Baldwin 
Date: Thu, 4 Nov 2010 17:25:41 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214792 - stable/7/usr.bin/kdump
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Thu, 04 Nov 2010 17:25:41 -0000

Author: jhb
Date: Thu Nov  4 17:25:40 2010
New Revision: 214792
URL: http://svn.freebsd.org/changeset/base/214792

Log:
  MFC 214625: Correct the abbreviations for general I/O and signal traces.

Modified:
  stable/7/usr.bin/kdump/kdump.1
Directory Properties:
  stable/7/usr.bin/kdump/   (props changed)

Modified: stable/7/usr.bin/kdump/kdump.1
==============================================================================
--- stable/7/usr.bin/kdump/kdump.1	Thu Nov  4 17:25:31 2010	(r214791)
+++ stable/7/usr.bin/kdump/kdump.1	Thu Nov  4 17:25:40 2010	(r214792)
@@ -162,13 +162,13 @@ Seven bytes were written by the
 system call, so 7 is the return value.
 .Pp
 The possible operations are:
-.Bl -column -offset indent ".Li GENIO" ".No data from user process"
+.Bl -column -offset indent ".Li CALL" ".No data from user process"
 .It Sy Name Ta Sy Operation Ta Sy Fourth field
 .It Li CALL Ta enter syscall Ta syscall name and arguments
 .It Li RET Ta return from syscall Ta syscall name and return value
 .It Li NAMI Ta file name lookup Ta path to file
-.It Li GENIO Ta general I/O Ta fd, read/write, number of bytes
-.It Li SIG Ta signal Ta signal name, handler, mask, code
+.It Li GIO Ta general I/O Ta fd, read/write, number of bytes
+.It Li PSIG Ta signal Ta signal name, handler, mask, code
 .It Li CSW Ta context switch Ta stop/resume user/kernel
 .It Li USER Ta data from user process Ta the data
 .It Li STRU Ta various syscalls Ta structure

From owner-svn-src-stable-7@FreeBSD.ORG  Thu Nov  4 21:50:19 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id A7CAB1065693;
	Thu,  4 Nov 2010 21:50:19 +0000 (UTC)
	(envelope-from dougb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 923548FC1B;
	Thu,  4 Nov 2010 21:50:19 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA4LoJVM074229;
	Thu, 4 Nov 2010 21:50:19 GMT (envelope-from dougb@svn.freebsd.org)
Received: (from dougb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA4LoJH8074225;
	Thu, 4 Nov 2010 21:50:19 GMT (envelope-from dougb@svn.freebsd.org)
Message-Id: <201011042150.oA4LoJH8074225@svn.freebsd.org>
From: Doug Barton 
Date: Thu, 4 Nov 2010 21:50:19 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214812 - in stable/7/contrib/bind9: . bin/named
	doc/arm doc/draft doc/rfc lib/dns lib/dns/include/dns
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Thu, 04 Nov 2010 21:50:19 -0000

Author: dougb
Date: Thu Nov  4 21:50:19 2010
New Revision: 214812
URL: http://svn.freebsd.org/changeset/base/214812

Log:
  MFV version 9.4-ESV-R3
  
  This version contains several fixes for DNSSEC and DLV, as well as
  fixes relevant to any resolving name server.

Added:
  stable/7/contrib/bind9/doc/draft/draft-ietf-behave-dns64-10.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/draft/draft-ietf-behave-dns64-10.txt
  stable/7/contrib/bind9/doc/draft/draft-ietf-dnsext-dnssec-registry-fixes-06.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/draft/draft-ietf-dnsext-dnssec-registry-fixes-06.txt
  stable/7/contrib/bind9/doc/draft/draft-ietf-dnsop-dnssec-key-timing-00.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/draft/draft-ietf-dnsop-dnssec-key-timing-00.txt
  stable/7/contrib/bind9/doc/draft/draft-mekking-dnsop-auto-cpsync-00.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/draft/draft-mekking-dnsop-auto-cpsync-00.txt
  stable/7/contrib/bind9/doc/draft/draft-yao-dnsext-bname-04.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/draft/draft-yao-dnsext-bname-04.txt
  stable/7/contrib/bind9/doc/rfc/rfc5933.txt
     - copied unchanged from r214805, vendor/bind9/dist-9.4/doc/rfc/rfc5933.txt
Deleted:
  stable/7/contrib/bind9/doc/draft/draft-ietf-behave-dns64-09.txt
  stable/7/contrib/bind9/doc/draft/draft-ietf-dnsext-dnssec-gost-07.txt
Modified:
  stable/7/contrib/bind9/CHANGES
  stable/7/contrib/bind9/bin/named/query.c
  stable/7/contrib/bind9/doc/arm/isc-logo.eps
  stable/7/contrib/bind9/doc/arm/isc-logo.pdf
  stable/7/contrib/bind9/doc/rfc/index
  stable/7/contrib/bind9/lib/dns/api
  stable/7/contrib/bind9/lib/dns/include/dns/ncache.h
  stable/7/contrib/bind9/lib/dns/include/dns/types.h
  stable/7/contrib/bind9/lib/dns/ncache.c
  stable/7/contrib/bind9/lib/dns/resolver.c
  stable/7/contrib/bind9/lib/dns/validator.c
  stable/7/contrib/bind9/version
Directory Properties:
  stable/7/contrib/bind9/   (props changed)

Modified: stable/7/contrib/bind9/CHANGES
==============================================================================
--- stable/7/contrib/bind9/CHANGES	Thu Nov  4 21:48:39 2010	(r214811)
+++ stable/7/contrib/bind9/CHANGES	Thu Nov  4 21:50:19 2010	(r214812)
@@ -1,3 +1,32 @@
+	--- 9.4-ESV-R3 released ---
+
+2925.	[bug]		Named failed to accept uncachable negative responses
+			from insecure zones. [RT# 21555]
+
+2921.	[bug]		The resolver could attempt to destroy a fetch context
+			too soon.  [RT #19878]
+
+2904.   [bug]           When using DLV, sub-zones of the zones in the DLV,
+			could be incorrectly marked as insecure instead of
+			secure leading to negative proofs failing.  This was
+			a unintended outcome from change 2890. [RT# 21392]
+
+2900.	[bug]	 	The placeholder negative caching element was not
+			properly constructed triggering a INSIST in 
+			dns_ncache_towire(). [RT #21346]
+
+2890.	[bug]		Handle the introduction of new trusted-keys and
+			DS, DLV RRsets better. [RT #21097]
+
+2869.	[bug]		Fix arguments to dns_keytable_findnextkeynode() call.
+			[RT #20877]
+
+2678.	[func]		Treat DS queries as if "minimal-response yes;"
+			was set. [RT #20258]
+
+2427.	[func]		Treat DNSKEY queries as if "minimal-response yes;"
+			was set. [RT #18528]
+
 	--- 9.4-ESV-R2 released ---
 
 2876.	[bug]		Named could return SERVFAIL for negative responses

Modified: stable/7/contrib/bind9/bin/named/query.c
==============================================================================
--- stable/7/contrib/bind9/bin/named/query.c	Thu Nov  4 21:48:39 2010	(r214811)
+++ stable/7/contrib/bind9/bin/named/query.c	Thu Nov  4 21:50:19 2010	(r214812)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
  * Copyright (C) 1999-2003  Internet Software Consortium.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: query.c,v 1.257.18.53 2009/12/30 08:55:48 jinmei Exp $ */
+/* $Id: query.c,v 1.257.18.55 2010/07/03 23:45:26 tbox Exp $ */
 
 /*! \file */
 
@@ -4654,6 +4654,13 @@ ns_query_start(ns_client_t *client) {
 	}
 
 	/*
+	 * Turn on minimal response for DNSKEY and DS queries.
+	 */
+	if (qtype == dns_rdatatype_dnskey || qtype == dns_rdatatype_ds)
+		client->query.attributes |= (NS_QUERYATTR_NOAUTHORITY |
+					     NS_QUERYATTR_NOADDITIONAL);
+
+	/*
 	 * If the client has requested that DNSSEC checking be disabled,
 	 * allow lookups to return pending data and instruct the resolver
 	 * to return data before validation has completed.

Modified: stable/7/contrib/bind9/doc/arm/isc-logo.eps
==============================================================================
Binary file (source and/or target). No diff available.

Modified: stable/7/contrib/bind9/doc/arm/isc-logo.pdf
==============================================================================
Binary file (source and/or target). No diff available.

Copied: stable/7/contrib/bind9/doc/draft/draft-ietf-behave-dns64-10.txt (from r214805, vendor/bind9/dist-9.4/doc/draft/draft-ietf-behave-dns64-10.txt)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/7/contrib/bind9/doc/draft/draft-ietf-behave-dns64-10.txt	Thu Nov  4 21:50:19 2010	(r214812, copy of r214805, vendor/bind9/dist-9.4/doc/draft/draft-ietf-behave-dns64-10.txt)
@@ -0,0 +1,1736 @@
+
+
+
+BEHAVE WG                                                     M. Bagnulo
+Internet-Draft                                                      UC3M
+Intended status: Standards Track                             A. Sullivan
+Expires: January 6, 2011                                        Shinkuro
+                                                             P. Matthews
+                                                          Alcatel-Lucent
+                                                          I. van Beijnum
+                                                          IMDEA Networks
+                                                            July 5, 2010
+
+
+DNS64: DNS extensions for Network Address Translation from IPv6 Clients
+                            to IPv4 Servers
+                       draft-ietf-behave-dns64-10
+
+Abstract
+
+   DNS64 is a mechanism for synthesizing AAAA records from A records.
+   DNS64 is used with an IPv6/IPv4 translator to enable client-server
+   communication between an IPv6-only client and an IPv4-only server,
+   without requiring any changes to either the IPv6 or the IPv4 node,
+   for the class of applications that work through NATs.  This document
+   specifies DNS64, and provides suggestions on how it should be
+   deployed in conjunction with IPv6/IPv4 translators.
+
+Status of this Memo
+
+   This Internet-Draft is submitted in full conformance with the
+   provisions of BCP 78 and BCP 79.
+
+   Internet-Drafts are working documents of the Internet Engineering
+   Task Force (IETF).  Note that other groups may also distribute
+   working documents as Internet-Drafts.  The list of current Internet-
+   Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+   Internet-Drafts are draft documents valid for a maximum of six months
+   and may be updated, replaced, or obsoleted by other documents at any
+   time.  It is inappropriate to use Internet-Drafts as reference
+   material or to cite them other than as "work in progress."
+
+   This Internet-Draft will expire on January 6, 2011.
+
+Copyright Notice
+
+   Copyright (c) 2010 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 1]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 2]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+Table of Contents
+
+   1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  5
+   2.  Overview . . . . . . . . . . . . . . . . . . . . . . . . . . .  5
+   3.  Background to DNS64-DNSSEC interaction . . . . . . . . . . . .  8
+   4.  Terminology  . . . . . . . . . . . . . . . . . . . . . . . . .  9
+   5.  DNS64 Normative Specification  . . . . . . . . . . . . . . . . 10
+     5.1.  Resolving AAAA queries and the answer section  . . . . . . 11
+       5.1.1.  The answer when there is AAAA data available . . . . . 11
+       5.1.2.  The answer when there is an error  . . . . . . . . . . 11
+       5.1.3.  Dealing with timeouts  . . . . . . . . . . . . . . . . 12
+       5.1.4.  Special exclusion set for AAAA records . . . . . . . . 12
+       5.1.5.  Dealing with CNAME and DNAME . . . . . . . . . . . . . 12
+       5.1.6.  Data for the answer when performing synthesis  . . . . 13
+       5.1.7.  Performing the synthesis . . . . . . . . . . . . . . . 13
+       5.1.8.  Querying in parallel . . . . . . . . . . . . . . . . . 14
+     5.2.  Generation of the IPv6 representations of IPv4
+           addresses  . . . . . . . . . . . . . . . . . . . . . . . . 14
+     5.3.  Handling other Resource Records and the Additional
+           Section  . . . . . . . . . . . . . . . . . . . . . . . . . 15
+       5.3.1.  PTR Resource Record  . . . . . . . . . . . . . . . . . 15
+       5.3.2.  Handling the additional section  . . . . . . . . . . . 16
+       5.3.3.  Other Resource Records . . . . . . . . . . . . . . . . 17
+     5.4.  Assembling a synthesized response to a AAAA query  . . . . 17
+     5.5.  DNSSEC processing: DNS64 in recursive resolver mode  . . . 17
+   6.  Deployment notes . . . . . . . . . . . . . . . . . . . . . . . 18
+     6.1.  DNS resolvers and DNS64  . . . . . . . . . . . . . . . . . 19
+     6.2.  DNSSEC validators and DNS64  . . . . . . . . . . . . . . . 19
+     6.3.  DNS64 and multihomed and dual-stack hosts  . . . . . . . . 19
+       6.3.1.  IPv6 multihomed hosts  . . . . . . . . . . . . . . . . 19
+       6.3.2.  Accidental dual-stack DNS64 use  . . . . . . . . . . . 20
+       6.3.3.  Intentional dual-stack DNS64 use . . . . . . . . . . . 20
+   7.  Deployment scenarios and examples  . . . . . . . . . . . . . . 21
+     7.1.  Example of An-IPv6-network-to-IPv4-Internet setup with
+           DNS64 in DNS server mode . . . . . . . . . . . . . . . . . 22
+     7.2.  An example of an-IPv6-network-to-IPv4-Internet setup
+           with DNS64 in stub-resolver mode . . . . . . . . . . . . . 23
+     7.3.  Example of IPv6-Internet-to-an-IPv4-network setup
+           DNS64 in DNS server mode . . . . . . . . . . . . . . . . . 24
+   8.  Security Considerations  . . . . . . . . . . . . . . . . . . . 27
+   9.  IANA Considerations  . . . . . . . . . . . . . . . . . . . . . 27
+   10. Contributors . . . . . . . . . . . . . . . . . . . . . . . . . 27
+   11. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 27
+   12. References . . . . . . . . . . . . . . . . . . . . . . . . . . 28
+     12.1. Normative References . . . . . . . . . . . . . . . . . . . 28
+     12.2. Informative References . . . . . . . . . . . . . . . . . . 28
+   Appendix A.  Motivations and Implications of synthesizing AAAA
+                Resource Records when real AAAA Resource Records
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 3]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+                exist . . . . . . . . . . . . . . . . . . . . . . . . 29
+   Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 31
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 4]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+1.  Introduction
+
+   This document specifies DNS64, a mechanism that is part of the
+   toolbox for IPv6-IPv4 transition and co-existence.  DNS64, used
+   together with an IPv6/IPv4 translator such as stateful NAT64
+   [I-D.ietf-behave-v6v4-xlate-stateful], allows an IPv6-only client to
+   initiate communications by name to an IPv4-only server.
+
+   DNS64 is a mechanism for synthesizing AAAA resource records (RRs)
+   from A RRs.  A synthetic AAAA RR created by the DNS64 from an
+   original A RR contains the same owner name of the original A RR but
+   it contains an IPv6 address instead of an IPv4 address.  The IPv6
+   address is an IPv6 representation of the IPv4 address contained in
+   the original A RR.  The IPv6 representation of the IPv4 address is
+   algorithmically generated from the IPv4 address returned in the A RR
+   and a set of parameters configured in the DNS64 (typically, an IPv6
+   prefix used by IPv6 representations of IPv4 addresses and optionally
+   other parameters).
+
+   Together with an IPv6/IPv4 translator, these two mechanisms allow an
+   IPv6-only client to initiate communications to an IPv4-only server
+   using the FQDN of the server.
+
+   These mechanisms are expected to play a critical role in the IPv4-
+   IPv6 transition and co-existence.  Due to IPv4 address depletion, it
+   is likely that in the future, many IPv6-only clients will want to
+   connect to IPv4-only servers.  In the typical case, the approach only
+   requires the deployment of IPv6/IPv4 translators that connect an
+   IPv6-only network to an IPv4-only network, along with the deployment
+   of one or more DNS64-enabled name servers.  However, some advanced
+   features require performing the DNS64 function directly in the end-
+   hosts themselves.
+
+
+2.  Overview
+
+   This section provides a non-normative introduction to the DNS64
+   mechanism.
+
+   We assume that we have one or more IPv6/IPv4 translator boxes
+   connecting an IPv4 network and an IPv6 network.  The IPv6/IPv4
+   translator device provides translation services between the two
+   networks enabling communication between IPv4-only hosts and IPv6-only
+   hosts.  (NOTE: By IPv6-only hosts we mean hosts running IPv6-only
+   applications, hosts that can only use IPv6, as well as cases where
+   only IPv6 connectivity is available to the client.  By IPv4-only
+   servers we mean servers running IPv4-only applications, servers that
+   can only use IPv4, as well as cases where only IPv4 connectivity is
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 5]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   available to the server).  Each IPv6/IPv4 translator used in
+   conjunction with DNS64 must allow communications initiated from the
+   IPv6-only host to the IPv4-only host.
+
+   To allow an IPv6 initiator to do a standard AAAA RR DNS lookup to
+   learn the address of the responder, DNS64 is used to synthesize a
+   AAAA record from an A record containing a real IPv4 address of the
+   responder, whenever the DNS64 cannot retrieve a AAAA record for the
+   queried name.  The DNS64 service appears as a regular DNS server or
+   resolver to the IPv6 initiator.  The DNS64 receives a AAAA DNS query
+   generated by the IPv6 initiator.  It first attempts a resolution for
+   the requested AAAA records.  If there are no AAAA records available
+   for the target node (which is the normal case when the target node is
+   an IPv4-only node), DNS64 performs a query for A records.  For each A
+   record discovered, DNS64 creates a synthetic AAAA RR from the
+   information retrieved in the A RR.
+
+   The owner name of a synthetic AAAA RR is the same as that of the
+   original A RR, but an IPv6 representation of the IPv4 address
+   contained in the original A RR is included in the AAAA RR.  The IPv6
+   representation of the IPv4 address is algorithmically generated from
+   the IPv4 address and additional parameters configured in the DNS64.
+   Among those parameters configured in the DNS64, there is at least one
+   IPv6 prefix.  If not explicitly mentioned, all prefixes are treated
+   equally and the operations described in this document are performed
+   using the prefixes available.  So as to be general, we will call any
+   of these prefixes Pref64::/n, and describe the operations made with
+   the generic prefix Pref64::/n.  The IPv6 address representing IPv4
+   addresses included in the AAAA RR synthesized by the DNS64 contain
+   Pref64::/n and they also embed the original IPv4 address.
+
+   The same algorithm and the same Pref64::/n prefix(es) must be
+   configured both in the DNS64 device and the IPv6/IPv4 translator(s),
+   so that both can algorithmically generate the same IPv6
+   representation for a given IPv4 address.  In addition, it is required
+   that IPv6 packets addressed to an IPv6 destination address that
+   contains the Pref64::/n be delivered to an IPv6/IPv4 translator that
+   has that particular Pref64::/n configured, so they can be translated
+   into IPv4 packets.
+
+   Once the DNS64 has synthesized the AAAA RRs, the synthetic AAAA RRs
+   are passed back to the IPv6 initiator, which will initiate an IPv6
+   communication with the IPv6 address associated with the IPv4
+   receiver.  The packet will be routed to an IPv6/IPv4 translator which
+   will forward it to the IPv4 network.
+
+   In general, the only shared state between the DNS64 and the IPv6/IPv4
+   translator is the Pref64::/n and an optional set of static
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 6]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   parameters.  The Pref64::/n and the set of static parameters must be
+   configured to be the same on both; there is no communication between
+   the DNS64 device and IPv6/IPv4 translator functions.  The mechanism
+   to be used for configuring the parameters of the DNS64 is beyond the
+   scope of this memo.
+
+   The prefixes to be used as Pref64::/n and their applicability are
+   discussed in [I-D.ietf-behave-address-format].  There are two types
+   of prefixes that can be used as Pref64::/n.
+
+      The Pref64::/n can be the Well-Known Prefix 64:FF9B::/96 reserved
+      by [I-D.ietf-behave-address-format] for the purpose of
+      representing IPv4 addresses in IPv6 address space.
+
+      The Pref64::/n can be a Network-Specific Prefix (NSP).  An NSP is
+      an IPv6 prefix assigned by an organization to create IPv6
+      representations of IPv4 addresses.
+
+   The main difference in the nature of the two types of prefixes is
+   that the NSP is a locally assigned prefix that is under control of
+   the organization that is providing the translation services, while
+   the Well-Known Prefix is a prefix that has a global meaning since it
+   has been assigned for the specific purpose of representing IPv4
+   addresses in IPv6 address space.
+
+   The DNS64 function can be performed in any of three places.  The
+   terms below are more formally defined in Section 4.
+
+   The first option is to locate the DNS64 function in authoritative
+   servers for a zone.  In this case, the authoritative server provides
+   synthetic AAAA RRs for an IPv4-only host in its zone.  This is one
+   type of DNS64 server.
+
+   Another option is to locate the DNS64 function in recursive name
+   servers serving end hosts.  In this case, when an IPv6-only host
+   queries the name server for AAAA RRs for an IPv4-only host, the name
+   server can perform the synthesis of AAAA RRs and pass them back to
+   the IPv6-only initiator.  The main advantage of this mode is that
+   current IPv6 nodes can use this mechanism without requiring any
+   modification.  This mode is called "DNS64 in DNS recursive resolver
+   mode" .  This is a second type of DNS64 server, and it is also one
+   type of DNS64 resolver.
+
+   The last option is to place the DNS64 function in the end hosts,
+   coupled to the local (stub) resolver.  In this case, the stub
+   resolver will try to obtain (real) AAAA RRs and in case they are not
+   available, the DNS64 function will synthesize AAAA RRs for internal
+   usage.  This mode is compatible with some advanced functions like
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 7]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   DNSSEC validation in the end host.  The main drawback of this mode is
+   its deployability, since it requires changes in the end hosts.  This
+   mode is called "DNS64 in stub-resolver mode".  This is the second
+   type of DNS64 resolver.
+
+
+3.  Background to DNS64-DNSSEC interaction
+
+   DNSSEC ([RFC4033], [RFC4034], [RFC4035]) presents a special challenge
+   for DNS64, because DNSSEC is designed to detect changes to DNS
+   answers, and DNS64 may alter answers coming from an authoritative
+   server.
+
+   A recursive resolver can be security-aware or security-oblivious.
+   Moreover, a security-aware recursive resolver can be validating or
+   non-validating, according to operator policy.  In the cases below,
+   the recursive resolver is also performing DNS64, and has a local
+   policy to validate.  We call this general case vDNS64, but in all the
+   cases below the DNS64 functionality should be assumed needed.
+
+   DNSSEC includes some signaling bits that offer some indicators of
+   what the query originator understands.
+
+   If a query arrives at a vDNS64 device with the "DNSSEC OK" (DO) bit
+   set, the query originator is signaling that it understands DNSSEC.
+   The DO bit does not indicate that the query originator will validate
+   the response.  It only means that the query originator can understand
+   responses containing DNSSEC data.  Conversely, if the DO bit is
+   clear, that is evidence that the querying agent is not aware of
+   DNSSEC.
+
+   If a query arrives at a vDNS64 device with the "Checking Disabled"
+   (CD) bit set, it is an indication that the querying agent wants all
+   the validation data so it can do checking itself.  By local policy,
+   vDNS64 could still validate, but it must return all data to the
+   querying agent anyway.
+
+   Here are the possible cases:
+
+   1.  A DNS64 (DNSSEC-aware or DNSSEC-oblivious) receives a query with
+       the DO bit clear.  In this case, DNSSEC is not a concern, because
+       the querying agent does not understand DNSSEC responses.
+
+   2.  A security-oblivious DNS64 receives a query with the DO bit set,
+       and the CD bit clear or set.  This is just like the case of a
+       non-DNS64 case: the server doesn't support it, so the querying
+       agent is out of luck.
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 8]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   3.  A security-aware and non-validating DNS64 receives a query with
+       the DO bit set and the CD bit clear.  Such a resolver is not
+       validating responses, likely due to local policy (see [RFC4035],
+       section 4.2).  For that reason, this case amounts to the same as
+       the previous case, and no validation happens.
+
+   4.  A security-aware and non-validating DNS64 receives a query with
+       the DO bit set and the CD bit set.  In this case, the resolver is
+       supposed to pass on all the data it gets to the query initiator
+       (see section 3.2.2 of [RFC4035]).  This case will not work with
+       DNS64, unless the validating resolver is prepared to do DNS64
+       itself.  If the DNS64 server modifies the record, the client will
+       get the data back and try to validate it, and the data will be
+       invalid as far as the client is concerned.
+
+   5.  A security-aware and validating DNS64 node receives a query with
+       the DO bit clear and CD clear.  In this case, the resolver
+       validates the data.  If it fails, it returns RCODE 2 (Server
+       failure); otherwise, it returns the answer.  This is the ideal
+       case for vDNS64.  The resolver validates the data, and then
+       synthesizes the new record and passes that to the client.  The
+       client, which is presumably not validating (else it should have
+       set DO and CD), cannot tell that DNS64 is involved.
+
+   6.  A security-aware and validating DNS64 node receives a query with
+       the DO bit set and CD clear.  This works like the previous case,
+       except that the resolver should also set the "Authentic Data"
+       (AD) bit on the response.
+
+   7.  A security-aware and validating DNS64 node receives a query with
+       the DO bit set and CD set.  This is effectively the same as the
+       case where a security-aware and non-validating recursive resolver
+       receives a similar query, and the same thing will happen: the
+       downstream validator will mark the data as invalid if DNS64 has
+       performed synthesis.  The node needs to do DNS64 itself, or else
+       communication will fail.
+
+
+4.  Terminology
+
+   This section provides definitions for the special terms used in the
+   document.
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in RFC 2119 [RFC2119].
+
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011                [Page 9]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   Authoritative server:  A DNS server that can answer authoritatively a
+      given DNS question.
+
+   DNS64:  A logical function that synthesizes DNS resource records (e.g
+      AAAA records containing IPv6 addresses) from DNS resource records
+      actually contained in the DNS (e.g., A records containing IPv4
+      addresses).
+
+   DNS64 recursor:  A recursive resolver that provides the DNS64
+      functionality as part of its operation.  This is the same thing as
+      "DNS64 in recursive resolver mode".
+
+   DNS64 resolver:  Any resolver (stub resolver or recursive resolver)
+      that provides the DNS64 function.
+
+   DNS64 server:  Any server providing the DNS64 function.
+
+   Recursive resolver:  A DNS server that accepts requests from one
+      resolver, and asks another server (of some description) for the
+      answer on behalf of the first resolver.
+
+   Synthetic RR:  A DNS resource record (RR) that is not contained in
+      any zone data file, but has been synthesized from other RRs.  An
+      example is a synthetic AAAA record created from an A record.
+
+   IPv6/IPv4 translator:  A device that translates IPv6 packets to IPv4
+      packets and vice-versa.  It is only required that the
+      communication initiated from the IPv6 side be supported.
+
+   For a detailed understanding of this document, the reader should also
+   be familiar with DNS terminology from [RFC1034], [RFC1035] and
+   current NAT terminology from [RFC4787].  Some parts of this document
+   assume familiarity with the terminology of the DNS security
+   extensions outlined in [RFC4035].  It is worth emphasizing that while
+   DNS64 is a logical function separate from the DNS, it is nevertheless
+   closely associated with that protocol.  It depends on the DNS
+   protocol, and some behavior of DNS64 will interact with regular DNS
+   responses.
+
+
+5.  DNS64 Normative Specification
+
+   DNS64 is a logical function that synthesizes AAAA records from A
+   records.  The DNS64 function may be implemented in a stub resolver,
+   in a recursive resolver, or in an authoritative name server.  It
+   works within those DNS functions, and appears on the network as
+   though it were a "plain" DNS resolver or name server conforming to
+   [RFC1034], and [RFC1035].
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 10]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   The implementation SHOULD support mapping of separate IPv4 address
+   ranges to separate IPv6 prefixes for AAAA record synthesis.  This
+   allows handling of special use IPv4 addresses [RFC5735].
+
+   DNS64 also responds to PTR queries involving addresses containing any
+   of the IPv6 prefixes it uses for synthesis of AAAA RRs.
+
+5.1.  Resolving AAAA queries and the answer section
+
+   When the DNS64 receives a query for RRs of type AAAA and class IN, it
+   first attempts to retrieve non-synthetic RRs of this type and class,
+   either by performing a query or, in the case of an authoritative
+   server, by examining its own results.  The query may be answered from
+   a local cache, if one is available.  DNS64 operation for classes
+   other than IN is undefined, and a DNS64 MUST behave as though no
+   DNS64 function is configured.
+
+5.1.1.  The answer when there is AAAA data available
+
+   If the query results in one or more AAAA records in the answer
+   section, the result is returned to the requesting client as per
+   normal DNS semantics, except in the case where any of the AAAA
+   records match a special exclusion set of prefixes, considered in
+   Section 5.1.4.  If there is (non-excluded) AAAA data available, DNS64
+   SHOULD NOT include synthetic AAAA RRs in the response (see Appendix A
+   for an analysis of the motivations for and the implications of not
+   complying with this recommendation).  By default DNS64
+   implementations MUST NOT synthesize AAAA RRs when real AAAA RRs
+   exist.
+
+5.1.2.  The answer when there is an error
+
+   If the query results in a response with RCODE other than 0 (No error
+   condition), then there are two possibilities.  A result with RCODE=3
+   (Name Error) is handled according to normal DNS operation (which is
+   normally to return the error to the client).  This stage is still
+   prior to any synthesis having happened, so a response to be returned
+   to the client does not need any special assembly than would usually
+   happen in DNS operation.
+
+   Any other RCODE is treated as though the RCODE were 0 and the answer
+   section were empty.  This is because of the large number of different
+   responses from deployed name servers when they receive AAAA queries
+   without a AAAA record being available (see [RFC4074]).  Note that
+   this means, for practical purposes, that several different classes of
+   error in the DNS are all treated as though a AAAA record is not
+   available for that owner name.
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 11]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   It is important to note that, as of this writing, some servers
+   respond with RCODE=3 to a AAAA query even if there is an A record
+   available for that owner name.  Those servers are in clear violation
+   of the meaning of RCODE 3, and it is expected that they will decline
+   in use as IPv6 deployment increases.
+
+5.1.3.  Dealing with timeouts
+
+   If the query receives no answer before the timeout (which might be
+   the timeout from every authoritative server, depending on whether the
+   DNS64 is in recursive resolver mode), it is treated as RCODE=2
+   (Server failure). .
+
+5.1.4.  Special exclusion set for AAAA records
+
+   Some IPv6 addresses are not actually usable by IPv6-only hosts.  If
+   they are returned to IPv6-only querying agents as AAAA records,
+   therefore, the goal of decreasing the number of failure modes will
+   not be attained.  Examples include AAAA records with addresses in the
+   ::ffff:0:0/96 network, and possibly (depending on the context) AAAA
+   records with the site's Pref::64/n or the Well-Known Prefix (see
+   below for more about the Well-Known Prefix).  A DNS64 implementation
+   SHOULD provide a mechanism to specify IPv6 prefix ranges to be
+   treated as though the AAAA containing them were an empty answer.  An
+   implementation SHOULD include the ::ffff/96 network in that range by
+   default.  Failure to provide this facility will mean that clients
+   querying the DNS64 function may not be able to communicate with hosts
+   that would be reachable from a dual-stack host.
+
+   When the DNS64 performs its initial AAAA query, if it receives an
+   answer with only AAAA records containing addresses in the excluded
+   range(s), then it MUST treat the answer as though it were an empty
+   answer, and proceed accordingly.  If it receives an answer with at
+   least one AAAA record containing an address outside any of the
+   excluded range(s), then it MAY build an answer section for a response
+   including only the AAAA record(s) that do not contain any of the
+   addresses inside the excluded ranges.  That answer section is used in
+   the assembly of a response as detailed in Section 5.4.
+   Alternatively, it MAY treat the answer as though it were an empty
+   answer, and proceed accordingly.  It MUST NOT return the offending
+   AAAA records as part of a response.
+
+5.1.5.  Dealing with CNAME and DNAME
+
+   If the response contains a CNAME or a DNAME, then the CNAME or DNAME
+   chain is followed until the first terminating A or AAAA record is
+   reached.  This may require the DNS64 to ask for an A record, in case
+   the response to the original AAAA query is a CNAME or DNAME without a
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 12]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   AAAA record to follow.  The resulting AAAA or A record is treated
+   like any other AAAA or A case, as appropriate.
+
+   When assembling the answer section, any chains of CNAME or DNAME RRs
+   are included as part of the answer along with the synthetic AAAA (if
+   appropriate).
+
+5.1.6.  Data for the answer when performing synthesis
+
+   If the query results in no error but an empty answer section in the
+   response, the DNS64 attempts to retrieve A records for the name in
+   question, either by performing another query or, in the case of an
+   authoritative server, by examining its own results.  If this new A RR
+   query results in an empty answer or in an error, then the empty
+   result or error is used as the basis for the answer returned to the
+   querying client.  If instead the query results in one or more A RRs,
+   the DNS64 synthesizes AAAA RRs based on the A RRs according to the
+   procedure outlined in Section 5.1.7.  The DNS64 returns the
+   synthesized AAAA records in the answer section, removing the A
+   records that form the basis of the synthesis.
+
+5.1.7.  Performing the synthesis
+
+   A synthetic AAAA record is created from an A record as follows:
+
+   o  The NAME field is set to the NAME field from the A record
+
+   o  The TYPE field is set to 28 (AAAA)
+
+   o  The CLASS field is set to the original CLASS field, 1.  Under this
+      specification, DNS64 for any CLASS other than 1 is undefined.
+
+   o  The TTL field is set to the minimum of the TTL of the original A
+      RR and the SOA RR for the queried domain.  (Note that in order to
+      obtain the TTL of the SOA RR, the DNS64 does not need to perform a
+      new query, but it can remember the TTL from the SOA RR in the
+      negative response to the AAAA query.  If the SOA RR was not
+      delivered with the negative response to the AAAA query, then the
+      DNS64 SHOULD use a default value of 600 seconds.  It is possible
+      instead to query explicitly for the SOA RR and use the result of
+      that query, but this will increase query load and time to
+      resolution for little additional benefit.)  This is in keeping
+      with the approach used in negative caching ([RFC2308]
+
+   o  The RDLENGTH field is set to 16
+
+   o  The RDATA field is set to the IPv6 representation of the IPv4
+      address from the RDATA field of the A record.  The DNS64 SHOULD
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 13]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+      check each A RR against configured IPv4 address ranges and select
+      the corresponding IPv6 prefix to use in synthesizing the AAAA RR.
+      See Section 5.2 for discussion of the algorithms to be used in
+      effecting the transformation.
+
+5.1.8.  Querying in parallel
+
+   The DNS64 MAY perform the query for the AAAA RR and for the A RR in
+   parallel, in order to minimize the delay.  However, this would result
+   in performing unnecessary A RR queries in the case where no AAAA RR
+   synthesis is required.  A possible trade-off would be to perform them
+   sequentially but with a very short interval between them, so if we
+   obtain a fast reply, we avoid doing the additional query.  (Note that
+   this discussion is relevant only if the DNS64 function needs to
+   perform external queries to fetch the RR.  If the needed RR
+   information is available locally, as in the case of an authoritative
+   server, the issue is no longer relevant.)
+
+5.2.  Generation of the IPv6 representations of IPv4 addresses
+
+   DNS64 supports multiple algorithms for the generation of the IPv6
+   representation of an IPv4 address.  The constraints imposed on the
+   generation algorithms are the following:
+
+      The same algorithm to create an IPv6 address from an IPv4 address
+      MUST be used by both a DNS64 to create the IPv6 address to be
+      returned in the synthetic AAAA RR from the IPv4 address contained
+      in an original A RR, and by a IPv6/IPv4 translator to create the
+      IPv6 address to be included in the source address field of the
+      outgoing IPv6 packets from the IPv4 address included in the source
+      address field of the incoming IPv4 packet.
+
+      The algorithm MUST be reversible; i.e., it MUST be possible to
+      derive the original IPv4 address from the IPv6 representation.
+
+      The input for the algorithm MUST be limited to the IPv4 address,
+      the IPv6 prefix (denoted Pref64::/n) used in the IPv6
+      representations and optionally a set of stable parameters that are
+      configured in the DNS64 and in the NAT64 (such as fixed string to
+      be used as a suffix).
+
+         For each prefix Pref64::/n, n MUST be less than or equal to 96.
+         If one or more Pref64::/n are configured in the DNS64 through
+         any means (such as manually configured, or other automatic
+         means not specified in this document), the default algorithm
+         MUST use these prefixes (and not use the Well-Known Prefix).
+         If no prefix is available, the algorithm MUST use the Well-
+         Known Prefix 64:FF9B::/96 defined in
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 14]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+         [I-D.ietf-behave-address-format] to represent the IPv4 unicast
+         address range
+
+      [[anchor8: Note in document: The value 64:FF9B::/96 is proposed as
+      the value for the Well-Known prefix and needs to be confirmed
+      whenis published as RFC.]][I-D.ietf-behave-address-format]
+
+   A DNS64 MUST support the algorithm for generating IPv6
+   representations of IPv4 addresses defined in Section 2 of
+   [I-D.ietf-behave-address-format].  Moreover, the aforementioned
+   algorithm MUST be the default algorithm used by the DNS64.  While the
+   normative description of the algorithm is provided in
+   [I-D.ietf-behave-address-format], a sample description of the
+   algorithm and its application to different scenarios is provided in
+   Section 7 for illustration purposes.
+
+5.3.  Handling other Resource Records and the Additional Section
+
+5.3.1.  PTR Resource Record
+
+   If a DNS64 server receives a PTR query for a record in the IP6.ARPA
+   domain, it MUST strip the IP6.ARPA labels from the QNAME, reverse the
+   address portion of the QNAME according to the encoding scheme
+   outlined in section 2.5 of [RFC3596], and examine the resulting
+   address to see whether its prefix matches any of the locally-
+   configured Pref64::/n.  There are two alternatives for a DNS64 server
+   to respond to such PTR queries.  A DNS64 server MUST provide one of
+   these, and SHOULD NOT provide both at the same time unless different
+   IP6.ARPA zones require answers of different sorts:
+
+   1.  The first option is for the DNS64 server to respond
+       authoritatively for its prefixes.  If the address prefix matches
+       any Pref64::/n used in the site, either a NSP or the Well-Known
+       Prefix (i.e. 64:FF9B::/96), then the DNS64 server MAY answer the
+       query using locally-appropriate RDATA.  The DNS64 server MAY use
+       the same RDATA for all answers.  Note that the requirement is to
+       match any Pref64::/n used at the site, and not merely the
+       locally-configured Pref64::/n.  This is because end clients could
+       ask for a PTR record matching an address received through a
+       different (site-provided) DNS64, and if this strategy is in
+       effect, those queries should never be sent to the global DNS.
+       The advantage of this strategy is that it makes plain to the
+       querying client that the prefix is one operated by the (DNS64)
+       site, and that the answers the client is getting are generated by
+       DNS64.  The disadvantage is that any useful reverse-tree
+       information that might be in the global DNS is unavailable to the
+       clients querying the DNS64.
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 15]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+   2.  The second option is for the DNS64 nameserver to synthesize a
+       CNAME mapping the IP6.ARPA namespace to the corresponding IN-
+       ADDR.ARPA name.  The rest of the response would be the normal DNS
+       processing.  The CNAME can be signed on the fly if need be.  The
+       advantage of this approach is that any useful information in the
+       reverse tree is available to the querying client.  The
+       disadvantage is that it adds additional load to the DNS64
+       (because CNAMEs have to be synthesized for each PTR query that
+       matches the Pref64::/n), and that it may require signing on the
+       fly.  In addition, the generated CNAME could correspond to an
+       unpopulated in-addr.arpa zone, so the CNAME would provide a
+       reference to a non-existent record.
+
+   If the address prefix does not match any Pref64::/n, then the DNS64
+   server MUST process the query as though it were any other query; i.e.
+   a recursive nameserver MUST attempt to resolve the query as though it
+   were any other (non-A/AAAA) query, and an authoritative server MUST
+   respond authoritatively or with a referral, as appropriate.
+
+5.3.2.  Handling the additional section
+
+   DNS64 synthesis MUST NOT be performed on any records in the
+   additional section of synthesized answers.  The DNS64 MUST pass the
+   additional section unchanged.
+
+   It may appear that adding synthetic records to the additional section
+   is desirable, because clients sometimes use the data in the
+   additional section to proceed without having to re-query.  There is
+   in general no promise, however, that the additional section will
+   contain all the relevant records, so any client that depends on the
+   additional section being able to satisfy its needs (i.e. without
+   additional queries) is necessarily broken.  An IPv6-only client that
+   needs a AAAA record, therefore, will send a query for the necessary
+   AAAA record if it is unable to find such a record in the additional
+   section of an answer it is consuming.  For a correctly-functioning
+   client, the effect would be no different if the additional section
+   were empty.
+
+   The alternative, of removing the A records in the additional section
+   and replacing them with synthetic AAAA records, may cause a host
+   behind a NAT64 to query directly a nameserver that is unaware of the
+   NAT64 in question.  The result in this case will be resolution
+   failure anyway, only later in the resolution operation.
+
+   The prohibition on synthetic data in the additional section reduces,
+   but does not eliminate, the possibility of resolution failures due to
+   cached DNS data from behind the DNS64.  See Section 6.
+
+
+
+
+Bagnulo, et al.          Expires January 6, 2011               [Page 16]
+
+Internet-Draft                    DNS64                        July 2010
+
+
+5.3.3.  Other Resource Records
+
+   If the DNS64 is in recursive resolver mode, then considerations
+   outlined in [I-D.ietf-dnsop-default-local-zones] may be relevant.
+
+   All other RRs MUST be returned unchanged.  This includes responses to
+   queries for A RRs.
+
+5.4.  Assembling a synthesized response to a AAAA query
+
+   A DNS64 uses different pieces of data to build the response returned
+   to the querying client.
+
+   The query that is used as the basis for synthesis results either in
+   an error, an answer that can be used as a basis for synthesis, or an
+   empty (authoritative) answer.  If there is an empty answer, then the
+   DNS64 responds to the original querying client with the answer the
+   DNS64 received to the original (initiator's) query.  Otherwise, the
+   response is assembled as follows.
+
+   The header fields are set according to the usual rules for recursive
+   or authoritative servers, depending on the role that the DNS64 is
+   serving.  The question section is copied from the original
+   (initiator's) query.  The answer section is populated according to
+   the rules in Section 5.1.7.  The authority and additional sections
+   are copied from the response to the final query that the DNS64
+   performed, and used as the basis for synthesis.
+
+   The final response from the DNS64 is subject to all the standard DNS
+   rules, including truncation [RFC1035] and EDNS0 handling [RFC2671].
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***

From owner-svn-src-stable-7@FreeBSD.ORG  Fri Nov  5 13:45:26 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id D0374106564A;
	Fri,  5 Nov 2010 13:45:26 +0000 (UTC) (envelope-from jhb@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id A33168FC12;
	Fri,  5 Nov 2010 13:45:26 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA5DjQQt098717;
	Fri, 5 Nov 2010 13:45:26 GMT (envelope-from jhb@svn.freebsd.org)
Received: (from jhb@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA5DjQFu098715;
	Fri, 5 Nov 2010 13:45:26 GMT (envelope-from jhb@svn.freebsd.org)
Message-Id: <201011051345.oA5DjQFu098715@svn.freebsd.org>
From: John Baldwin 
Date: Fri, 5 Nov 2010 13:45:26 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214837 - stable/7/share/man/man9
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Fri, 05 Nov 2010 13:45:26 -0000

Author: jhb
Date: Fri Nov  5 13:45:26 2010
New Revision: 214837
URL: http://svn.freebsd.org/changeset/base/214837

Log:
  MFC 214673: Fix a few typos and style nits in the example code.

Modified:
  stable/7/share/man/man9/sysctl_ctx_init.9
Directory Properties:
  stable/7/share/man/man9/   (props changed)

Modified: stable/7/share/man/man9/sysctl_ctx_init.9
==============================================================================
--- stable/7/share/man/man9/sysctl_ctx_init.9	Fri Nov  5 13:45:13 2010	(r214836)
+++ stable/7/share/man/man9/sysctl_ctx_init.9	Fri Nov  5 13:45:26 2010	(r214837)
@@ -188,27 +188,27 @@ This example uses contexts to keep track
 struct sysctl_ctx_list clist;
 struct sysctl_oid *oidp;
 int a_int;
-char *string = "dynamic sysctl";
+const char *string = "dynamic sysctl";
  ...
 
 sysctl_ctx_init(&clist);
-oidp = SYSCTL_ADD_NODE( &clist, SYSCTL_STATIC_CHILDREN(/* tree top */),
-	OID_AUTO, "newtree", CTFLAG_RW, 0, "new top level tree");
-oidp = SYSCTL_ADD_INT( &clist, SYSCTL_CHILDREN(oidp),
+oidp = SYSCTL_ADD_NODE(&clist, SYSCTL_STATIC_CHILDREN(/* tree top */),
+	OID_AUTO, "newtree", CTLFLAG_RW, 0, "new top level tree");
+oidp = SYSCTL_ADD_INT(&clist, SYSCTL_CHILDREN(oidp),
 	OID_AUTO, "newint", CTLFLAG_RW, &a_int, 0, "new int leaf");
  ...
-oidp = SYSCTL_ADD_NODE( &clist, SYSCTL_STATIC_CHILDREN(_debug),
-	OID_AUTO, "newtree", CTFLAG_RW, 0, "new tree under debug");
-oidp = SYSCTL_ADD_STRING( &clist, SYSCTL_CHILDREN(oidp),
-	OID_AUTO, "newstring", CTLFLAG_R, string, 0, "new string leaf");
+oidp = SYSCTL_ADD_NODE(&clist, SYSCTL_STATIC_CHILDREN(_debug),
+	OID_AUTO, "newtree", CTLFLAG_RW, 0, "new tree under debug");
+oidp = SYSCTL_ADD_STRING(&clist, SYSCTL_CHILDREN(oidp),
+	OID_AUTO, "newstring", CTLFLAG_RD, string, 0, "new string leaf");
  ...
 /* Now we can free up the oids */
-if(sysctl_ctx_free(&clist)) {
+if (sysctl_ctx_free(&clist)) {
 	printf("can't free this context - other oids depend on it");
-	return(ENOTEMPTY);
+	return (ENOTEMPTY);
 } else {
-	printf("Success!\\n"):
-	return(0);
+	printf("Success!\\n");
+	return (0);
 }
 .Ed
 .Pp

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 11:09:05 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 1A02C106566B;
	Sat,  6 Nov 2010 11:09:05 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 070A98FC14;
	Sat,  6 Nov 2010 11:09:05 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6B94Wa028876;
	Sat, 6 Nov 2010 11:09:04 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6B94ba028873;
	Sat, 6 Nov 2010 11:09:04 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061109.oA6B94ba028873@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 11:09:04 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214868 - in stable/7/sys: kern modules modules/alq
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 11:09:05 -0000

Author: lstewart
Date: Sat Nov  6 11:09:04 2010
New Revision: 214868
URL: http://svn.freebsd.org/changeset/base/214868

Log:
  MFC r205959:
  
  Add support for ALQ(9) to be compiled and loaded as a kernel module.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	dwmalone, jeff, rpaulo, rwatson

Added:
  stable/7/sys/modules/alq/
     - copied from r205959, head/sys/modules/alq/
Modified:
  stable/7/sys/kern/kern_alq.c
  stable/7/sys/modules/Makefile
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 10:54:33 2010	(r214867)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:09:04 2010	(r214868)
@@ -1,7 +1,13 @@
 /*-
  * Copyright (c) 2002, Jeffrey Roberson 
+ * Copyright (c) 2008-2009, Lawrence Stewart 
+ * Copyright (c) 2009-2010, The FreeBSD Foundation
  * All rights reserved.
  *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -182,9 +188,16 @@ ald_daemon(void)
 	ALD_LOCK();
 
 	for (;;) {
-		while ((alq = LIST_FIRST(&ald_active)) == NULL)
+		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
+		    !ald_shutingdown)
 			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
 
+		/* Don't shutdown until all active ALQs are flushed. */
+		if (ald_shutingdown && alq == NULL) {
+			ALD_UNLOCK();
+			break;
+		}
+
 		ALQ_LOCK(alq);
 		ald_deactivate(alq);
 		ALD_UNLOCK();
@@ -194,6 +207,8 @@ ald_daemon(void)
 			wakeup(alq);
 		ALD_LOCK();
 	}
+
+	kproc_exit(0);
 }
 
 static void
@@ -202,14 +217,29 @@ ald_shutdown(void *arg, int howto)
 	struct alq *alq;
 
 	ALD_LOCK();
+
+	/* Ensure no new queues can be created. */
 	ald_shutingdown = 1;
 
+	/* Shutdown all ALQs prior to terminating the ald_daemon. */
 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
 		LIST_REMOVE(alq, aq_link);
 		ALD_UNLOCK();
 		alq_shutdown(alq);
 		ALD_LOCK();
 	}
+
+	/* At this point, all ALQs are flushed and shutdown. */
+
+	/*
+	 * Wake ald_daemon so that it exits. It won't be able to do
+	 * anything until we msleep because we hold the ald_mtx.
+	 */
+	wakeup(&ald_active);
+
+	/* Wait for ald_daemon to exit. */
+	msleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
+
 	ALD_UNLOCK();
 }
 
@@ -517,3 +547,53 @@ alq_close(struct alq *alq)
 	free(alq->aq_entbuf, M_ALD);
 	free(alq, M_ALD);
 }
+
+static int
+alq_load_handler(module_t mod, int what, void *arg)
+{
+	int ret;
+	
+	ret = 0;
+
+	switch (what) {
+	case MOD_LOAD:
+	case MOD_SHUTDOWN:
+		break;
+
+	case MOD_QUIESCE:
+		ALD_LOCK();
+		/* Only allow unload if there are no open queues. */
+		if (LIST_FIRST(&ald_queues) == NULL) {
+			ald_shutingdown = 1;
+			ALD_UNLOCK();
+			ald_shutdown(NULL, 0);
+			mtx_destroy(&ald_mtx);
+		} else {
+			ALD_UNLOCK();
+			ret = EBUSY;
+		}
+		break;
+
+	case MOD_UNLOAD:
+		/* If MOD_QUIESCE failed we must fail here too. */
+		if (ald_shutingdown == 0)
+			ret = EBUSY;
+		break;
+
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+	return (ret);
+}
+
+static moduledata_t alq_mod =
+{
+	"alq",
+	alq_load_handler,
+	NULL
+};
+
+DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
+MODULE_VERSION(alq, 1);

Modified: stable/7/sys/modules/Makefile
==============================================================================
--- stable/7/sys/modules/Makefile	Sat Nov  6 10:54:33 2010	(r214867)
+++ stable/7/sys/modules/Makefile	Sat Nov  6 11:09:04 2010	(r214868)
@@ -18,6 +18,7 @@ SUBDIR=	${_3dfx} \
 	aio \
 	alc \
 	ale \
+	alq \
 	${_amd} \
 	${_amdsbwd} \
 	${_amdtemp} \

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 11:17:30 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id E810C106564A;
	Sat,  6 Nov 2010 11:17:30 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id BBA9B8FC15;
	Sat,  6 Nov 2010 11:17:30 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6BHUXT029086;
	Sat, 6 Nov 2010 11:17:30 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6BHU1J029084;
	Sat, 6 Nov 2010 11:17:30 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061117.oA6BHU1J029084@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 11:17:30 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214869 - stable/7/sys/kern
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 11:17:31 -0000

Author: lstewart
Date: Sat Nov  6 11:17:30 2010
New Revision: 214869
URL: http://svn.freebsd.org/changeset/base/214869

Log:
  MFC r206026:
  
  - Factor code to destroy an ALQ out of alq_close() into a private alq_destroy().
  
  - Use the new alq_destroy() to properly handle a failure case in alq_open().
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	dwmalone, jeff, rpaulo, rwatson (as part of a larger patch)

Modified:
  stable/7/sys/kern/kern_alq.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:09:04 2010	(r214868)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:17:30 2010	(r214869)
@@ -103,6 +103,7 @@ static void ald_deactivate(struct alq *)
 
 /* Internal queue functions */
 static void alq_shutdown(struct alq *);
+static void alq_destroy(struct alq *);
 static int alq_doio(struct alq *);
 
 
@@ -265,6 +266,18 @@ alq_shutdown(struct alq *alq)
 	crfree(alq->aq_cred);
 }
 
+void
+alq_destroy(struct alq *alq)
+{
+	/* Drain all pending IO. */
+	alq_shutdown(alq);
+
+	mtx_destroy(&alq->aq_mtx);
+	free(alq->aq_first, M_ALD);
+	free(alq->aq_entbuf, M_ALD);
+	free(alq, M_ALD);
+}
+
 /*
  * Flush all pending data to disk.  This operation will block.
  */
@@ -423,8 +436,11 @@ alq_open(struct alq **alqp, const char *
 
 	alp->ae_next = alq->aq_first;
 
-	if ((error = ald_add(alq)) != 0)
+	if ((error = ald_add(alq)) != 0) {
+		alq_destroy(alq);
 		return (error);
+	}
+
 	*alqp = alq;
 
 	return (0);
@@ -530,22 +546,9 @@ alq_flush(struct alq *alq)
 void
 alq_close(struct alq *alq)
 {
-	/*
-	 * If we're already shuting down someone else will flush and close
-	 * the vnode.
-	 */
-	if (ald_rem(alq) != 0)
-		return;
-
-	/*
-	 * Drain all pending IO.
-	 */
-	alq_shutdown(alq);
-
-	mtx_destroy(&alq->aq_mtx);
-	free(alq->aq_first, M_ALD);
-	free(alq->aq_entbuf, M_ALD);
-	free(alq, M_ALD);
+	/* Only flush and destroy alq if not already shutting down. */
+	if (ald_rem(alq) == 0)
+		alq_destroy(alq);
 }
 
 static int

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 11:20:21 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 301F51065670;
	Sat,  6 Nov 2010 11:20:21 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 1DE5F8FC1C;
	Sat,  6 Nov 2010 11:20:21 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6BKLax029188;
	Sat, 6 Nov 2010 11:20:21 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6BKLxD029186;
	Sat, 6 Nov 2010 11:20:21 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061120.oA6BKLxD029186@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 11:20:20 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214870 - stable/7/sys/kern
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 11:20:21 -0000

Author: lstewart
Date: Sat Nov  6 11:20:20 2010
New Revision: 214870
URL: http://svn.freebsd.org/changeset/base/214870

Log:
  MFC r206027:
  
  According to SLEEP(9), msleep() is deprecated in favour of mtx_sleep().
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	dwmalone, jeff, rpaulo, rwatson (as part of a larger patch)

Modified:
  stable/7/sys/kern/kern_alq.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:17:30 2010	(r214869)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:20:20 2010	(r214870)
@@ -191,7 +191,7 @@ ald_daemon(void)
 	for (;;) {
 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
 		    !ald_shutingdown)
-			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
+			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
 
 		/* Don't shutdown until all active ALQs are flushed. */
 		if (ald_shutingdown && alq == NULL) {
@@ -234,12 +234,12 @@ ald_shutdown(void *arg, int howto)
 
 	/*
 	 * Wake ald_daemon so that it exits. It won't be able to do
-	 * anything until we msleep because we hold the ald_mtx.
+	 * anything until we mtx_sleep because we hold the ald_mtx.
 	 */
 	wakeup(&ald_active);
 
 	/* Wait for ald_daemon to exit. */
-	msleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
+	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
 
 	ALD_UNLOCK();
 }

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 11:23:46 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 8FE5D106566B;
	Sat,  6 Nov 2010 11:23:46 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 7DD478FC15;
	Sat,  6 Nov 2010 11:23:46 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6BNkg3029315;
	Sat, 6 Nov 2010 11:23:46 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6BNkq6029313;
	Sat, 6 Nov 2010 11:23:46 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061123.oA6BNkq6029313@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 11:23:46 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214871 - stable/7/sys/kern
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 11:23:46 -0000

Author: lstewart
Date: Sat Nov  6 11:23:46 2010
New Revision: 214871
URL: http://svn.freebsd.org/changeset/base/214871

Log:
  MFC r206028:
  
  The ALQ should not be considered drained until it has been made inactive.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	dwmalone, jeff, rpaulo, rwatson (as part of a larger patch)

Modified:
  stable/7/sys/kern/kern_alq.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:20:20 2010	(r214870)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:23:46 2010	(r214871)
@@ -253,7 +253,7 @@ alq_shutdown(struct alq *alq)
 	alq->aq_flags |= AQ_SHUTDOWN;
 
 	/* Drain IO */
-	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
+	while (alq->aq_flags & AQ_ACTIVE) {
 		alq->aq_flags |= AQ_WANTED;
 		ALQ_UNLOCK(alq);
 		tsleep(alq, PWAIT, "aldclose", 0);

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 11:38:40 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id DC832106564A;
	Sat,  6 Nov 2010 11:38:40 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id C89B38FC08;
	Sat,  6 Nov 2010 11:38:40 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6Bce7v029654;
	Sat, 6 Nov 2010 11:38:40 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6BceFi029650;
	Sat, 6 Nov 2010 11:38:40 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061138.oA6BceFi029650@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 11:38:40 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214872 - in stable/7: share/man/man9 sys/kern sys/sys
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 11:38:41 -0000

Author: lstewart
Date: Sat Nov  6 11:38:40 2010
New Revision: 214872
URL: http://svn.freebsd.org/changeset/base/214872

Log:
  MFC r207223:
  
  - Rework the underlying ALQ storage to be a circular buffer, which amongst other
    things allows variable length messages to be easily supported.
  
  - Extend KPI with alq_writen() and alq_getn() to support variable length
    messages, which is enabled at ALQ creation time depending on the arguments
    passed to alq_open(). Also add variants of alq_open() and alq_post() that
    accept a flags argument. The KPI is still fully backwards compatible and
    shouldn't require any change in ALQ consumers unless they wish to utilise the
    new features.
  
  - Introduce the ALQ_NOACTIVATE and ALQ_ORDERED flags to allow ALQ consumers to
    have more control over IO scheduling and resource acquisition respectively.
  
  - Strengthen invariants checking.
  
  - Document ALQ changes in ALQ(9) man page.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	gnn, jeff, rpaulo, rwatson

Modified:
  stable/7/share/man/man9/alq.9
  stable/7/sys/kern/kern_alq.c
  stable/7/sys/sys/alq.h
Directory Properties:
  stable/7/share/man/   (props changed)
  stable/7/share/man/man1/   (props changed)
  stable/7/share/man/man3/   (props changed)
  stable/7/share/man/man4/   (props changed)
  stable/7/share/man/man5/   (props changed)
  stable/7/share/man/man7/   (props changed)
  stable/7/share/man/man8/   (props changed)
  stable/7/share/man/man9/   (props changed)
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/share/man/man9/alq.9
==============================================================================
--- stable/7/share/man/man9/alq.9	Sat Nov  6 11:23:46 2010	(r214871)
+++ stable/7/share/man/man9/alq.9	Sat Nov  6 11:38:40 2010	(r214872)
@@ -1,7 +1,13 @@
 .\"
 .\" Copyright (c) 2003 Hiten Pandya 
+.\" Copyright (c) 2009-2010 The FreeBSD Foundation
 .\" All rights reserved.
 .\"
+.\" Portions of this software were developed at the Centre for Advanced
+.\" Internet Architectures, Swinburne University of Technology, Melbourne,
+.\" Australia by Lawrence Stewart under sponsorship from the FreeBSD
+.\" Foundation.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
@@ -25,21 +31,34 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 16, 2003
+.Dd April 26, 2010
 .Dt ALQ 9
 .Os
 .Sh NAME
 .Nm alq ,
+.Nm alq_open_flags ,
 .Nm alq_open ,
+.Nm alq_writen ,
 .Nm alq_write ,
 .Nm alq_flush ,
 .Nm alq_close ,
+.Nm alq_getn ,
 .Nm alq_get ,
+.Nm alq_post_flags ,
 .Nm alq_post
 .Nd Asynchronous Logging Queues
 .Sh SYNOPSIS
 .In sys/alq.h
 .Ft int
+.Fo alq_open_flags
+.Fa "struct alq **app"
+.Fa "const char *file"
+.Fa "struct ucred *cred"
+.Fa "int cmode"
+.Fa "int size"
+.Fa "int flags"
+.Fc
+.Ft int
 .Fo alq_open
 .Fa "struct alq **app"
 .Fa "const char *file"
@@ -49,19 +68,25 @@
 .Fa "int count"
 .Fc
 .Ft int
-.Fn alq_write "struct alq *alq" "void *data" "int waitok"
+.Fn alq_writen "struct alq *alq" "void *data" "int len" "int flags"
+.Ft int
+.Fn alq_write "struct alq *alq" "void *data" "int flags"
 .Ft void
 .Fn alq_flush "struct alq *alq"
 .Ft void
 .Fn alq_close "struct alq *alq"
 .Ft struct ale *
-.Fn alq_get "struct alq *alq" "int waitok"
+.Fn alq_getn "struct alq *alq" "int len" "int flags"
+.Ft struct ale *
+.Fn alq_get "struct alq *alq" "int flags"
+.Ft void
+.Fn alq_post_flags "struct alq *alq" "struct ale *ale" "int flags"
 .Ft void
 .Fn alq_post "struct alq *alq" "struct ale *ale"
 .Sh DESCRIPTION
 The
 .Nm
-facility provides an asynchronous fixed length recording
+facility provides an asynchronous fixed or variable length recording
 mechanism, known as Asynchronous Logging Queues.
 It can record to any
 .Xr vnode 9 ,
@@ -81,26 +106,37 @@ is defined as
 which has the following members:
 .Bd -literal -offset indent
 struct ale {
-	struct ale	*ae_next;	/* Next Entry */
-	char		*ae_data;	/* Entry buffer */
-	int		ae_flags;	/* Entry flags */
+	intptr_t	ae_bytesused;	/* # bytes written to ALE. */
+	char		*ae_data;	/* Write ptr. */
+	int		ae_pad;		/* Unused, compat. */
 };
 .Ed
 .Pp
-The
-.Va ae_flags
-field is for internal use, clients of the
+An
 .Nm
-interface should not modify this field.
-Behaviour is undefined if this field is modified.
+can be created in either fixed or variable length mode.
+A variable length
+.Nm
+accommodates writes of varying length using
+.Fn alq_writen
+and
+.Fn alq_getn .
+A fixed length
+.Nm
+accommodates a fixed number of writes using
+.Fn alq_write
+and
+.Fn alq_get ,
+each of fixed size (set at queue creation time).
+Fixed length mode is deprecated in favour of variable length mode.
 .Sh FUNCTIONS
 The
-.Fn alq_open
-function creates a new logging queue.
+.Fn alq_open_flags
+function creates a new variable length asynchronous logging queue.
 The
 .Fa file
-argument is the name of the file to open for logging; if the file does not
-yet exist,
+argument is the name of the file to open for logging.
+If the file does not yet exist,
 .Fn alq_open
 will attempt to create it.
 The
@@ -112,33 +148,99 @@ as the requested creation mode, to be us
 Consumers of this API may wish to pass
 .Dv ALQ_DEFAULT_CMODE ,
 a default creation mode suitable for most applications.
-The argument
+The
 .Fa cred
-specifies the credentials to use when opening and performing I/O on the file.
-The size of each entry in the queue is determined by
-.Fa size .
+argument specifies the credentials to use when opening and performing I/O on the file.
 The
+.Fa size
+argument sets the size (in bytes) of the underlying queue.
+The ALQ_ORDERED flag may be passed in via
+.Fa flags
+to indicate that the ordering of writer threads waiting for a busy
+.Nm
+to free up resources should be preserved.
+.Pp
+The deprecated
+.Fn alq_open
+function is implemented as a wrapper around
+.Fn alq_open_flags
+to provide backwards compatibility to consumers that have not been updated to
+utilise the newer
+.Fn alq_open_flags
+function.
+It passes all arguments through to
+.Fn alq_open_flags
+untouched except for
+.Fa size
+and
+.Fa count ,
+and sets
+.Fa flags
+to 0.
+To create a variable length mode
+.Nm ,
+the
+.Fa size
+argument should be set to the size (in bytes) of the underlying queue and the
+.Fa count
+argument should be set to 0.
+To create a fixed length mode
+.Nm ,
+the
+.Fa size
+argument should be set to the size (in bytes) of each write and the
 .Fa count
-argument determines the number of items to be stored in the
-asynchronous queue over an approximate period of a disk
-write operation.
+argument should be set to the number of
+.Fa size
+byte chunks to reserve capacity for.
 .Pp
 The
-.Fn alq_write
+.Fn alq_writen
 function writes
+.Fa len
+bytes from
 .Fa data
-to the designated queue,
+to the designated variable length mode queue
 .Fa alq .
-In the event that
-.Fn alq_write
-could not write the entry immediately, and
+If
+.Fn alq_writen
+could not write the entry immediately and
 .Dv ALQ_WAITOK
-is passed to
-.Fa waitok ,
-then
+is set in
+.Fa flags ,
+the function will be allowed to
+.Xr msleep_spin 9
+with the
+.Dq Li alqwnord
+or
+.Dq Li alqwnres
+wait message.
+A write will automatically schedule the queue
+.Fa alq
+to be flushed to disk.
+This behaviour can be controlled by passing ALQ_NOACTIVATE via
+.Fa flags
+to indicate that the write should not schedule
+.Fa alq
+to be flushed to disk.
+.Pp
+The deprecated
+.Fn alq_write
+function is implemented as a wrapper around
+.Fn alq_writen
+to provide backwards compatibility to consumers that have not been updated to
+utilise variable length mode queues.
+The function will write
+.Fa size
+bytes of data (where
+.Fa size
+was specified at queue creation time) from the
+.Fa data
+buffer to the
+.Fa alq .
+Note that it is an error to call
 .Fn alq_write
-will be allowed to
-.Xr tsleep 9 .
+on a variable length mode queue.
 .Pp
 The
 .Fn alq_flush
@@ -146,61 +248,136 @@ function is used for flushing
 .Fa alq
 to the log medium that was passed to
 .Fn alq_open .
+If
+.Fa alq
+has data to flush and is not already in the process of being flushed, the
+function will block doing IO.
+Otherwise, the function will return immediately.
 .Pp
 The
 .Fn alq_close
-function will close the asynchronous logging queue,
-.Fa alq ,
+function will close the asynchronous logging queue
+.Fa alq
 and flush all pending write requests to the log medium.
 It will free all resources that were previously allocated.
 .Pp
 The
-.Fn alq_get
-function returns the next available asynchronous logging entry
-from the queue,
-.Fa alq .
-This function leaves the queue in a locked state, until a subsequent
+.Fn alq_getn
+function returns an asynchronous log entry from
+.Fa alq ,
+initialised to point at a buffer capable of receiving
+.Fa len
+bytes of data.
+This function leaves
+.Fa alq
+in a locked state, until a subsequent
 .Fn alq_post
+or
+.Fn alq_post_flags
 call is made.
-In the event that
-.Fn alq_get
-could not retrieve an entry immediately, it will
-.Xr tsleep 9
+If
+.Fn alq_getn
+could not obtain
+.Fa len
+bytes of buffer immediately and
+.Dv ALQ_WAITOK
+is set in
+.Fa flags ,
+the function will be allowed to
+.Xr msleep_spin 9
 with the
-.Dq Li alqget
+.Dq Li alqgnord
+or
+.Dq Li alqgnres
 wait message.
+The caller can choose to write less than
+.Fa len
+bytes of data to the returned asynchronous log entry by setting the entry's
+ae_bytesused field to the number of bytes actually written.
+This must be done prior to calling
+.Fn alq_post .
 .Pp
-The
-.Fn alq_post
-function schedules the asynchronous logging entry,
-.Fa ale ,
-which is retrieved using the
+The deprecated
 .Fn alq_get
-function,
-for writing to the asynchronous logging queue,
+function is implemented as a wrapper around
+.Fn alq_getn
+to provide backwards compatibility to consumers that have not been updated to
+utilise variable length mode queues.
+The asynchronous log entry returned will be initialised to point at a buffer
+capable of receiving
+.Fa size
+bytes of data (where
+.Fa size
+was specified at queue creation time).
+Note that it is an error to call
+.Fn alq_get
+on a variable length mode queue.
+.Pp
+The
+.Fn alq_post_flags
+function schedules the asynchronous log entry
+.Fa ale
+(obtained from
+.Fn alq_getn
+or
+.Fn alq_get )
+for writing to
 .Fa alq .
-This function leaves the queue,
-.Fa alq ,
+The ALQ_NOACTIVATE flag may be passed in via
+.Fa flags
+to indicate that the queue should not be immediately scheduled to be flushed to
+disk.
+This function leaves
+.Fa alq
 in an unlocked state.
+.Pp
+The
+.Fn alq_post
+function is implemented as a wrapper around
+.Fn alq_post_flags
+to provide backwards compatibility to consumers that have not been updated to
+utilise the newer
+.Fn alq_post_flags
+function.
+It simply passes all arguments through to
+.Fn alq_post_flags
+untouched, and sets
+.Fa flags
+to 0.
 .Sh IMPLEMENTATION NOTES
 The
+.Fn alq_writen
+and
 .Fn alq_write
-function is a wrapper around the
+functions both perform a
+.Xr bcopy 3
+from the supplied
+.Fa data
+buffer into the underlying
+.Nm
+buffer.
+Performance critical code paths may wish to consider using
+.Fn alq_getn
+(variable length queues) or
+.Fn alq_get
+(fixed length queues) to avoid the extra memory copy. Note that a queue
+remains locked between calls to
+.Fn alq_getn
+or
 .Fn alq_get
 and
 .Fn alq_post
-functions; by using these functions separately, a call
-to
-.Fn bcopy
-can be avoided for performance critical code paths.
+or
+.Fn alq_post_flags ,
+so this method of writing to a queue is unsuitable for situations where the
+time between calls may be substantial.
 .Sh LOCKING
-Each asynchronous queue is protected by a spin mutex.
+Each asynchronous logging queue is protected by a spin mutex.
 .Pp
 Functions
-.Fn alq_flush ,
-.Fn alq_open
+.Fn alq_flush
 and
-.Fn alq_post
+.Fn alq_open
 may attempt to acquire an internal sleep mutex, and should
 consequently not be used in contexts where sleeping is
 not allowed.
@@ -214,32 +391,36 @@ if it fails to open
 or else it returns 0.
 .Pp
 The
+.Fn alq_writen
+and
 .Fn alq_write
-function returns
+functions return
 .Er EWOULDBLOCK
 if
 .Dv ALQ_NOWAIT
-was provided as a value to
-.Fa waitok
-and either the queue is full, or when the system is shutting down.
+was set in
+.Fa flags
+and either the queue is full or the system is shutting down.
 .Pp
 The
+.Fn alq_getn
+and
 .Fn alq_get
-function returns
-.Dv NULL ,
+functions return
+.Dv NULL
 if
 .Dv ALQ_NOWAIT
-was provided as a value to
-.Fa waitok
-and either the queue is full, or when the system is shutting down.
+was set in
+.Fa flags
+and either the queue is full or the system is shutting down.
 .Pp
 NOTE: invalid arguments to non-void functions will result in
 undefined behaviour.
 .Sh SEE ALSO
-.Xr syslog 3 ,
-.Xr kthread 9 ,
+.Xr kproc 9 ,
 .Xr ktr 9 ,
-.Xr tsleep 9 ,
+.Xr msleep_spin 9 ,
+.Xr syslog 3 ,
 .Xr vnode 9
 .Sh HISTORY
 The
@@ -250,7 +431,11 @@ Asynchronous Logging Queues (ALQ) facili
 The
 .Nm
 facility was written by
-.An Jeffrey Roberson Aq jeff@FreeBSD.org .
+.An Jeffrey Roberson Aq jeff@FreeBSD.org
+and extended by
+.An Lawrence Stewart Aq lstewart@freebsd.org .
 .Pp
 This manual page was written by
-.An Hiten Pandya Aq hmp@FreeBSD.org .
+.An Hiten Pandya Aq hmp@FreeBSD.org
+and revised by
+.An Lawrence Stewart Aq lstewart@freebsd.org .

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:23:46 2010	(r214871)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:38:40 2010	(r214872)
@@ -55,16 +55,23 @@ __FBSDID("$FreeBSD$");
 
 /* Async. Logging Queue */
 struct alq {
+	char	*aq_entbuf;		/* Buffer for stored entries */
 	int	aq_entmax;		/* Max entries */
 	int	aq_entlen;		/* Entry length */
-	char	*aq_entbuf;		/* Buffer for stored entries */
+	int	aq_freebytes;		/* Bytes available in buffer */
+	int	aq_buflen;		/* Total length of our buffer */
+	int	aq_writehead;		/* Location for next write */
+	int	aq_writetail;		/* Flush starts at this location */
+	int	aq_wrapearly;		/* # bytes left blank at end of buf */
 	int	aq_flags;		/* Queue flags */
+	int	aq_waiters;		/* Num threads waiting for resources
+					 * NB: Used as a wait channel so must
+					 * not be first field in the alq struct
+					 */
+	struct	ale	aq_getpost;	/* ALE for use by get/post */
 	struct mtx	aq_mtx;		/* Queue lock */
 	struct vnode	*aq_vp;		/* Open vnode handle */
 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
-	struct ale	*aq_first;	/* First ent */
-	struct ale	*aq_entfree;	/* First free ent */
-	struct ale	*aq_entvalid;	/* First ent valid for writing */
 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
 };
@@ -73,10 +80,14 @@ struct alq {
 #define	AQ_ACTIVE	0x0002		/* on the active list */
 #define	AQ_FLUSHING	0x0004		/* doing IO */
 #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
+#define	AQ_ORDERED	0x0010		/* Queue enforces ordered writes */
+#define	AQ_LEGACY	0x0020		/* Legacy queue (fixed length writes) */
 
 #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
 #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
 
+#define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
+
 static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
 
 /*
@@ -205,7 +216,7 @@ ald_daemon(void)
 		needwakeup = alq_doio(alq);
 		ALQ_UNLOCK(alq);
 		if (needwakeup)
-			wakeup(alq);
+			wakeup_one(alq);
 		ALD_LOCK();
 	}
 
@@ -252,6 +263,20 @@ alq_shutdown(struct alq *alq)
 	/* Stop any new writers. */
 	alq->aq_flags |= AQ_SHUTDOWN;
 
+	/*
+	 * If the ALQ isn't active but has unwritten data (possible if
+	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
+	 * ALQ here so that the pending data gets flushed by the ald_daemon.
+	 */
+	if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) {
+		alq->aq_flags |= AQ_ACTIVE;
+		ALQ_UNLOCK(alq);
+		ALD_LOCK();
+		ald_activate(alq);
+		ALD_UNLOCK();
+		ALQ_LOCK(alq);
+	}
+
 	/* Drain IO */
 	while (alq->aq_flags & AQ_ACTIVE) {
 		alq->aq_flags |= AQ_WANTED;
@@ -273,7 +298,6 @@ alq_destroy(struct alq *alq)
 	alq_shutdown(alq);
 
 	mtx_destroy(&alq->aq_mtx);
-	free(alq->aq_first, M_ALD);
 	free(alq->aq_entbuf, M_ALD);
 	free(alq, M_ALD);
 }
@@ -289,46 +313,54 @@ alq_doio(struct alq *alq)
 	struct vnode *vp;
 	struct uio auio;
 	struct iovec aiov[2];
-	struct ale *ale;
-	struct ale *alstart;
 	int totlen;
 	int iov;
 	int vfslocked;
+	int wrapearly;
+
+	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
 
 	vp = alq->aq_vp;
 	td = curthread;
 	totlen = 0;
-	iov = 0;
-
-	alstart = ale = alq->aq_entvalid;
-	alq->aq_entvalid = NULL;
+	iov = 1;
+	wrapearly = alq->aq_wrapearly;
 
 	bzero(&aiov, sizeof(aiov));
 	bzero(&auio, sizeof(auio));
 
-	do {
-		if (aiov[iov].iov_base == NULL)
-			aiov[iov].iov_base = ale->ae_data;
-		aiov[iov].iov_len += alq->aq_entlen;
-		totlen += alq->aq_entlen;
-		/* Check to see if we're wrapping the buffer */
-		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
-			iov++;
-		ale->ae_flags &= ~AE_VALID;
-		ale = ale->ae_next;
-	} while (ale->ae_flags & AE_VALID);
+	/* Start the write from the location of our buffer tail pointer. */
+	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
+
+	if (alq->aq_writetail < alq->aq_writehead) {
+		/* Buffer not wrapped. */
+		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
+	} else if (alq->aq_writehead == 0) {
+		/* Buffer not wrapped (special case to avoid an empty iov). */
+		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
+		    wrapearly;
+	} else {
+		/*
+		 * Buffer wrapped, requires 2 aiov entries:
+		 * - first is from writetail to end of buffer
+		 * - second is from start of buffer to writehead
+		 */
+		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
+		    wrapearly;
+		iov++;
+		aiov[1].iov_base = alq->aq_entbuf;
+		aiov[1].iov_len =  alq->aq_writehead;
+		totlen = aiov[0].iov_len + aiov[1].iov_len;
+	}
 
 	alq->aq_flags |= AQ_FLUSHING;
 	ALQ_UNLOCK(alq);
 
-	if (iov == 2 || aiov[iov].iov_base == NULL)
-		iov--;
-
 	auio.uio_iov = &aiov[0];
 	auio.uio_offset = 0;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_WRITE;
-	auio.uio_iovcnt = iov + 1;
+	auio.uio_iovcnt = iov;
 	auio.uio_resid = totlen;
 	auio.uio_td = td;
 
@@ -353,8 +385,28 @@ alq_doio(struct alq *alq)
 	ALQ_LOCK(alq);
 	alq->aq_flags &= ~AQ_FLUSHING;
 
-	if (alq->aq_entfree == NULL)
-		alq->aq_entfree = alstart;
+	/* Adjust writetail as required, taking into account wrapping. */
+	alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
+	    alq->aq_buflen;
+	alq->aq_freebytes += totlen + wrapearly;
+
+	/*
+	 * If we just flushed part of the buffer which wrapped, reset the
+	 * wrapearly indicator.
+	 */
+	if (wrapearly)
+		alq->aq_wrapearly = 0;
+
+	/*
+	 * If we just flushed the buffer completely, reset indexes to 0 to
+	 * minimise buffer wraps.
+	 * This is also required to ensure alq_getn() can't wedge itself.
+	 */
+	if (!HAS_PENDING_DATA(alq))
+		alq->aq_writehead = alq->aq_writetail = 0;
+
+	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
+	    ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));
 
 	if (alq->aq_flags & AQ_WANTED) {
 		alq->aq_flags &= ~AQ_WANTED;
@@ -379,27 +431,27 @@ SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, 
 /*
  * Create the queue data structure, allocate the buffer, and open the file.
  */
+
 int
-alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
-    int size, int count)
+alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
+    int size, int flags)
 {
 	struct thread *td;
 	struct nameidata nd;
-	struct ale *ale;
-	struct ale *alp;
 	struct alq *alq;
-	char *bufp;
-	int flags;
+	int oflags;
 	int error;
-	int i, vfslocked;
+	int vfslocked;
+
+	KASSERT((size > 0), ("%s: size <= 0", __func__));
 
 	*alqp = NULL;
 	td = curthread;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
-	flags = FWRITE | O_NOFOLLOW | O_CREAT;
+	oflags = FWRITE | O_NOFOLLOW | O_CREAT;
 
-	error = vn_open_cred(&nd, &flags, cmode, cred, NULL);
+	error = vn_open_cred(&nd, &oflags, cmode, cred, NULL);
 	if (error)
 		return (error);
 
@@ -410,31 +462,20 @@ alq_open(struct alq **alqp, const char *
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
 	alq->aq_vp = nd.ni_vp;
 	alq->aq_cred = crhold(cred);
-	alq->aq_entmax = count;
-	alq->aq_entlen = size;
-	alq->aq_entfree = alq->aq_first;
 
 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
 
-	bufp = alq->aq_entbuf;
-	ale = alq->aq_first;
-	alp = NULL;
-
-	/* Match up entries with buffers */
-	for (i = 0; i < count; i++) {
-		if (alp)
-			alp->ae_next = ale;
-		ale->ae_data = bufp;
-		alp = ale;
-		ale++;
-		bufp += size;
-	}
-
-	alp->ae_next = alq->aq_first;
+	alq->aq_buflen = size;
+	alq->aq_entmax = 0;
+	alq->aq_entlen = 0;
+
+	alq->aq_freebytes = alq->aq_buflen;
+	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
+	alq->aq_writehead = alq->aq_writetail = 0;
+	if (flags & ALQ_ORDERED)
+		alq->aq_flags |= AQ_ORDERED;
 
 	if ((error = ald_add(alq)) != 0) {
 		alq_destroy(alq);
@@ -446,79 +487,405 @@ alq_open(struct alq **alqp, const char *
 	return (0);
 }
 
+int
+alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
+    int size, int count)
+{
+	int ret;
+
+	KASSERT((count >= 0), ("%s: count < 0", __func__));
+
+	if (count > 0) {
+		ret = alq_open_flags(alqp, file, cred, cmode, size*count, 0);
+		(*alqp)->aq_flags |= AQ_LEGACY;
+		(*alqp)->aq_entmax = count;
+		(*alqp)->aq_entlen = size;
+	} else
+		ret = alq_open_flags(alqp, file, cred, cmode, size, 0);
+
+	return (ret);
+}
+
+
 /*
  * Copy a new entry into the queue.  If the operation would block either
  * wait or return an error depending on the value of waitok.
  */
 int
-alq_write(struct alq *alq, void *data, int waitok)
+alq_writen(struct alq *alq, void *data, int len, int flags)
 {
-	struct ale *ale;
+	int activate, copy, ret;
+	void *waitchan;
+
+	KASSERT((len > 0 && len <= alq->aq_buflen),
+	    ("%s: len <= 0 || len > aq_buflen", __func__));
 
-	if ((ale = alq_get(alq, waitok)) == NULL)
+	activate = ret = 0;
+	copy = len;
+	waitchan = NULL;
+
+	ALQ_LOCK(alq);
+
+	/*
+	 * Fail to perform the write and return EWOULDBLOCK if:
+	 * - The message is larger than our underlying buffer.
+	 * - The ALQ is being shutdown.
+	 * - There is insufficient free space in our underlying buffer
+	 *   to accept the message and the user can't wait for space.
+	 * - There is insufficient free space in our underlying buffer
+	 *   to accept the message and the alq is inactive due to prior
+	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
+	 */
+	if (len > alq->aq_buflen ||
+	    alq->aq_flags & AQ_SHUTDOWN ||
+	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
+	    HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) {
+		ALQ_UNLOCK(alq);
 		return (EWOULDBLOCK);
+	}
 
-	bcopy(data, ale->ae_data, alq->aq_entlen);
-	alq_post(alq, ale);
+	/*
+	 * If we want ordered writes and there is already at least one thread
+	 * waiting for resources to become available, sleep until we're woken.
+	 */
+	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
+		KASSERT(!(flags & ALQ_NOWAIT),
+		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
+		alq->aq_waiters++;
+		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0);
+		alq->aq_waiters--;
+	}
 
-	return (0);
+	/*
+	 * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either
+	 * enter while loop and sleep until we have enough free bytes (former)
+	 * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will
+	 * be in this loop. Otherwise, multiple threads may be sleeping here
+	 * competing for ALQ resources.
+	 */
+	while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
+		KASSERT(!(flags & ALQ_NOWAIT),
+		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
+		alq->aq_flags |= AQ_WANTED;
+		alq->aq_waiters++;
+		if (waitchan)
+			wakeup(waitchan);
+		msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0);
+		alq->aq_waiters--;
+
+		/*
+		 * If we're the first thread to wake after an AQ_WANTED wakeup
+		 * but there isn't enough free space for us, we're going to loop
+		 * and sleep again. If there are other threads waiting in this
+		 * loop, schedule a wakeup so that they can see if the space
+		 * they require is available.
+		 */
+		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
+		    alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED))
+			waitchan = alq;
+		else
+			waitchan = NULL;
+	}
+
+	/*
+	 * If there are waiters, we need to signal the waiting threads after we
+	 * complete our work. The alq ptr is used as a wait channel for threads
+	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
+	 * are not allowed to concurrently compete for resources in the above
+	 * while loop, so we use a different wait channel in this case.
+	 */
+	if (alq->aq_waiters > 0) {
+		if (alq->aq_flags & AQ_ORDERED)
+			waitchan = &alq->aq_waiters;
+		else
+			waitchan = alq;
+	} else
+		waitchan = NULL;
+
+	/* Bail if we're shutting down. */
+	if (alq->aq_flags & AQ_SHUTDOWN) {
+		ret = EWOULDBLOCK;
+		goto unlock;
+	}
+
+	/*
+	 * If we need to wrap the buffer to accommodate the write,
+	 * we'll need 2 calls to bcopy.
+	 */
+	if ((alq->aq_buflen - alq->aq_writehead) < len)
+		copy = alq->aq_buflen - alq->aq_writehead;
+
+	/* Copy message (or part thereof if wrap required) to the buffer. */
+	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
+	alq->aq_writehead += copy;
+
+	if (alq->aq_writehead >= alq->aq_buflen) {
+		KASSERT((alq->aq_writehead == alq->aq_buflen),
+		    ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)",
+		    __func__,
+		    alq->aq_writehead,
+		    alq->aq_buflen));
+		alq->aq_writehead = 0;
+	}
+
+	if (copy != len) {
+		/*
+		 * Wrap the buffer by copying the remainder of our message
+		 * to the start of the buffer and resetting aq_writehead.
+		 */
+		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
+		alq->aq_writehead = len - copy;
+	}
+
+	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
+	    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__));
+
+	alq->aq_freebytes -= len;
+
+	if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) {
+		alq->aq_flags |= AQ_ACTIVE;
+		activate = 1;
+	}
+
+	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
+
+unlock:
+	ALQ_UNLOCK(alq);
+
+	if (activate) {
+		ALD_LOCK();
+		ald_activate(alq);
+		ALD_UNLOCK();
+	}
+
+	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
+	if (waitchan != NULL)
+		wakeup_one(waitchan);
+
+	return (ret);
 }
 
+int
+alq_write(struct alq *alq, void *data, int flags)
+{
+	/* Should only be called in fixed length message (legacy) mode. */
+	KASSERT((alq->aq_flags & AQ_LEGACY),
+	    ("%s: fixed length write on variable length queue", __func__));
+	return (alq_writen(alq, data, alq->aq_entlen, flags));
+}
+
+/*
+ * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy.
+ */
 struct ale *
-alq_get(struct alq *alq, int waitok)
+alq_getn(struct alq *alq, int len, int flags)
 {
-	struct ale *ale;
-	struct ale *aln;
+	int contigbytes;
+	void *waitchan;
+
+	KASSERT((len > 0 && len <= alq->aq_buflen),
+	    ("%s: len <= 0 || len > alq->aq_buflen", __func__));
 
-	ale = NULL;
+	waitchan = NULL;
 
 	ALQ_LOCK(alq);
 
-	/* Loop until we get an entry or we're shutting down */
-	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 && 
-	    (ale = alq->aq_entfree) == NULL &&
-	    (waitok & ALQ_WAITOK)) {
-		alq->aq_flags |= AQ_WANTED;
+	/*
+	 * Determine the number of free contiguous bytes.
+	 * We ensure elsewhere that if aq_writehead == aq_writetail because
+	 * the buffer is empty, they will both be set to 0 and therefore
+	 * aq_freebytes == aq_buflen and is fully contiguous.
+	 * If they are equal and the buffer is not empty, aq_freebytes will
+	 * be 0 indicating the buffer is full.
+	 */
+	if (alq->aq_writehead <= alq->aq_writetail)
+		contigbytes = alq->aq_freebytes;
+	else {
+		contigbytes = alq->aq_buflen - alq->aq_writehead;
+
+		if (contigbytes < len) {
+			/*
+			 * Insufficient space at end of buffer to handle a
+			 * contiguous write. Wrap early if there's space at
+			 * the beginning. This will leave a hole at the end
+			 * of the buffer which we will have to skip over when
+			 * flushing the buffer to disk.
+			 */
+			if (alq->aq_writetail >= len || flags & ALQ_WAITOK) {
+				/* Keep track of # bytes left blank. */
+				alq->aq_wrapearly = contigbytes;
+				/* Do the wrap and adjust counters. */
+				contigbytes = alq->aq_freebytes =
+				    alq->aq_writetail;
+				alq->aq_writehead = 0;
+			}
+		}
+	}
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 12:41:47 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id D22591065679;
	Sat,  6 Nov 2010 12:41:47 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id C025A8FC0A;
	Sat,  6 Nov 2010 12:41:47 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6Cfl87031304;
	Sat, 6 Nov 2010 12:41:47 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6Cflv9031302;
	Sat, 6 Nov 2010 12:41:47 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061241.oA6Cflv9031302@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 12:41:47 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214874 - stable/7/sys/kern
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 12:41:47 -0000

Author: lstewart
Date: Sat Nov  6 12:41:47 2010
New Revision: 214874
URL: http://svn.freebsd.org/changeset/base/214874

Log:
  The kthread/kproc KPI differs between 7.x and 8.x+ and requires the use of
  kthread_exit() in order to get a wakeup delivered to the "struct proc *" which
  ALQ relies on.
  
  This is an intentional direct commit to the 7-STABLE branch.

Modified:
  stable/7/sys/kern/kern_alq.c

Modified: stable/7/sys/kern/kern_alq.c
==============================================================================
--- stable/7/sys/kern/kern_alq.c	Sat Nov  6 11:38:49 2010	(r214873)
+++ stable/7/sys/kern/kern_alq.c	Sat Nov  6 12:41:47 2010	(r214874)
@@ -220,7 +220,7 @@ ald_daemon(void)
 		ALD_LOCK();
 	}
 
-	kproc_exit(0);
+	kthread_exit(0);
 }
 
 static void

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 13:03:33 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id DC77F106564A;
	Sat,  6 Nov 2010 13:03:33 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id C78718FC0A;
	Sat,  6 Nov 2010 13:03:33 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6D3XtG031858;
	Sat, 6 Nov 2010 13:03:33 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6D3Xh1031853;
	Sat, 6 Nov 2010 13:03:33 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061303.oA6D3Xh1031853@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 13:03:33 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214875 - in stable/7: share/man/man4 sys/modules
	sys/modules/siftr sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 13:03:34 -0000

Author: lstewart
Date: Sat Nov  6 13:03:33 2010
New Revision: 214875
URL: http://svn.freebsd.org/changeset/base/214875

Log:
  MFC r209662,209665:
  
  Import the Statistical Information For TCP Research (SIFTR) kernel module into
  FreeBSD. SIFTR logs a range of statistics on active TCP connections to a log
  file, providing the ability to make highly granular measurements of TCP
  connection state. The tool is aimed at system administrators, developers and
  researchers alike. Please take it for a spin and test it out - the man page
  should have all the information required to get you going.
  
  Many thanks go to the Cisco University Research Program Fund at Community
  Foundation Silicon Valley and the FreeBSD Foundation. Their support of our work
  at the Centre for Advanced Internet Architectures, Swinburne University of
  Technology is greatly appreciated.
  
  The base SIFTR code from r209662 was modified as part of this MFC in order to
  work correctly on FreeBSD 7.
  
  r209980:
  
  Catch up with the rename of DPCPU_SUM to DPCPU_VARSUM.
  
  r209982:
  
  The SIFTR DPCPU statistics struct was not being zeroed between enable/disable
  cycles so the values would accumulate rather than reset for each cycle.
  
  Sponsored by:	Cisco URP (r209662), FreeBSD Foundation
  Reviewed by:	dwmalone, gnn, rpaulo (r209662)
  Tested by:	Many on freebsd-current@ and elsewhere over the years

Added:
  stable/7/share/man/man4/siftr.4
     - copied unchanged from r209662, head/share/man/man4/siftr.4
  stable/7/sys/modules/siftr/
     - copied from r209662, head/sys/modules/siftr/
  stable/7/sys/netinet/siftr.c
     - copied, changed from r209662, head/sys/netinet/siftr.c
Modified:
  stable/7/share/man/man4/Makefile
  stable/7/sys/modules/Makefile
Directory Properties:
  stable/7/share/man/   (props changed)
  stable/7/share/man/man1/   (props changed)
  stable/7/share/man/man3/   (props changed)
  stable/7/share/man/man4/   (props changed)
  stable/7/share/man/man5/   (props changed)
  stable/7/share/man/man7/   (props changed)
  stable/7/share/man/man8/   (props changed)
  stable/7/share/man/man9/   (props changed)
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/share/man/man4/Makefile
==============================================================================
--- stable/7/share/man/man4/Makefile	Sat Nov  6 12:41:47 2010	(r214874)
+++ stable/7/share/man/man4/Makefile	Sat Nov  6 13:03:33 2010	(r214875)
@@ -314,6 +314,8 @@ MAN=	aac.4 \
 	sf.4 \
 	sge.4 \
 	si.4 \
+	siftr.4 \
+	siis.4 \
 	sio.4 \
 	sis.4 \
 	sk.4 \

Copied: stable/7/share/man/man4/siftr.4 (from r209662, head/share/man/man4/siftr.4)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/7/share/man/man4/siftr.4	Sat Nov  6 13:03:33 2010	(r214875, copy of r209662, head/share/man/man4/siftr.4)
@@ -0,0 +1,752 @@
+.\"
+.\" Copyright (c) 2010 The FreeBSD Foundation
+.\" All rights reserved.
+.\"
+.\" Portions of this software were developed at the Centre for Advanced
+.\" Internet Architectures, Swinburne University of Technology, Melbourne,
+.\" Australia by Lawrence Stewart under sponsorship from the FreeBSD
+.\" Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions, and the following disclaimer,
+.\"    without modification, immediately at the beginning of the file.
+.\" 2. The name of the author may not be used to endorse or promote products
+.\"    derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 23, 2010
+.Dt SIFTR 4
+.Os
+.Sh NAME
+.Nm SIFTR
+.Nd Statistical Information For TCP Research
+.Sh SYNOPSIS
+To load
+.Ns Nm
+as a module at run-time, run the following command as root:
+.Bd -literal -offset indent
+kldload siftr
+.Ed
+.Pp
+Alternatively, to load
+.Ns Nm
+as a module at boot time, add the following line into the
+.Xr loader.conf 5
+file:
+.Bd -literal -offset indent
+siftr_load="YES"
+.Ed
+.Sh DESCRIPTION
+.Nm
+.Ns ( Em S Ns tatistical
+.Em I Ns nformation
+.Em F Ns or
+.Em T Ns CP
+.Em R Ns esearch )
+is a kernel module that logs a range of statistics on active TCP connections to
+a log file.
+It provides the ability to make highly granular measurements of TCP connection
+state, aimed at system administrators, developers and researchers.
+.Ss Compile-time Configuration
+The default operation of
+.Nm
+is to capture IPv4 TCP/IP packets.
+.Nm
+can be configured to support IPv4 and IPv6 by uncommenting:
+.Bd -literal -offset indent
+CFLAGS+=-DSIFTR_IPV6
+.Ed
+.Pp
+in
+.Aq sys/modules/siftr/Makefile
+and recompiling.
+.Pp
+In the IPv4-only (default) mode, standard dotted decimal notation (e.g.
+"136.186.229.95") is used to format IPv4 addresses for logging.
+In IPv6 mode, standard dotted decimal notation is used to format IPv4 addresses,
+and standard colon-separated hex notation (see RFC 4291) is used to format IPv6
+addresses for logging. Note that SIFTR uses uncompressed notation to format IPv6
+addresses.
+For example, the address "fe80::20f:feff:fea2:531b" would be logged as
+"fe80:0:0:0:20f:feff:fea2:531b".
+.Ss Run-time Configuration
+.Nm
+utilises the
+.Xr sysctl 8
+interface to export its configuration variables to user-space.
+The following variables are available:
+.Bl -tag -offset indent
+.It Va net.inet.siftr.enabled
+controls whether the module performs its
+measurements or not.
+By default, the value is set to 0, which means the module
+will not be taking any measurements.
+Having the module loaded with
+.Va net.inet.siftr.enabled
+set to 0 will have no impact on the performance of the network stack, as the
+packet filtering hooks are only inserted when
+.Va net.inet.siftr.enabled
+is set to 1.
+.El
+.Bl -tag -offset indent
+.It Va net.inet.siftr.ppl
+controls how many inbound/outbound packets for a given TCP connection will cause
+a log message to be generated for the connection.
+By default, the value is set to 1, which means the module will log a message for
+every packet of every TCP connection.
+The value can be set to any integer in the range [1,2^32], and can be changed at
+any time, even while the module is enabled.
+.El
+.Bl -tag -offset indent
+.It Va net.inet.siftr.logfile
+controls the path to the file that the module writes its log messages to.
+By default, the file /var/log/siftr.log is used.
+The path can be changed at any time, even while the module is enabled.
+.El
+.Bl -tag -offset indent
+.It Va net.inet.siftr.genhashes
+controls whether a hash is generated for each TCP packet seen by
+.Nm .
+By default, the value is set to 0, which means no hashes are generated.
+The hashes are useful to correlate which TCP packet triggered the generation of
+a particular log message, but calculating them adds additional computational
+overhead into the fast path.
+.El
+.Ss Log Format
+A typical
+.Nm
+log file will contain 3 different types of log message.
+All messages are written in plain ASCII text.
+.Pp
+Note: The
+.Qq \e
+present in the example log messages in this section indicates a
+line continuation and is not part of the actual log message
+.Pp
+The first type of log message is written to the file when the module is
+enabled and starts collecting data from the running kernel. The text below
+shows an example module enable log. The fields are tab delimited key-value
+pairs which describe some basic information about the system.
+.Bd -literal -offset indent
+enable_time_secs=1238556193    enable_time_usecs=462104 \\
+siftrver=1.2.2    hz=1000    tcp_rtt_scale=32 \\
+sysname=FreeBSD    sysver=604000    ipmode=4
+.Ed
+.Pp
+Field descriptions are as follows:
+.Bl -tag -offset indent
+.It Va enable_time_secs
+time at which the module was enabled, in seconds since the UNIX epoch.
+.El
+.Bl -tag -offset indent
+.It Va enable_time_usecs
+time at which the module was enabled, in microseconds since enable_time_secs.
+.El
+.Bl -tag -offset indent
+.It Va siftrver
+version of
+.Nm .
+.El
+.Bl -tag -offset indent
+.It Va hz
+tick rate of the kernel in ticks per second.
+.El
+.Bl -tag -offset indent
+.It Va tcp_rtt_scale
+smoothed RTT estimate scaling factor
+.El
+.Bl -tag -offset indent
+.It Va sysname
+operating system name
+.El
+.Bl -tag -offset indent
+.It Va sysver
+operating system version
+.El
+.Bl -tag -offset indent
+.It Va ipmode
+IP mode as defined at compile time.
+An ipmode of "4" means IPv6 is not supported and IP addresses are logged in
+regular dotted quad format.
+An ipmode of "6" means IPv6 is supported, and IP addresses are logged in dotted
+quad or hex format, as described in the
+.Qq Compile-time Configuration
+subsection.
+.El
+.Pp
+The second type of log message is written to the file when a data log message
+is generated.
+The text below shows an example data log triggered by an IPv4
+TCP/IP packet.
+The data is CSV formatted.
+.Bd -literal -offset indent
+o,0xbec491a5,1238556193.463551,172.16.7.28,22,172.16.2.5,55931, \\
+1073725440,172312,6144,66560,66608,8,1,4,1448,936,1,996,255, \\
+33304,208,66608,0,208
+.Ed
+.Pp
+Field descriptions are as follows:
+.Bl -tag -offset indent
+.It Va 1
+Direction of packet that triggered the log message.
+Either
+.Qq i
+for in, or
+.Qq o
+for out.
+.El
+.Bl -tag -offset indent
+.It Va 2
+Hash of the packet that triggered the log message.
+.El
+.Bl -tag -offset indent
+.It Va 3
+Time at which the packet that triggered the log message was processed by
+the
+.Xr pfil 9
+hook function, in seconds and microseconds since the UNIX epoch.
+.El
+.Bl -tag -offset indent
+.It Va 4
+The IPv4 or IPv6 address of the local host, in dotted quad (IPv4 packet)
+or colon-separated hex (IPv6 packet) notation.
+.El
+.Bl -tag -offset indent
+.It Va 5
+The TCP port that the local host is communicating via.
+.El
+.Bl -tag -offset indent
+.It Va 6
+The IPv4 or IPv6 address of the foreign host, in dotted quad (IPv4 packet)
+or colon-separated hex (IPv6 packet) notation.
+.El
+.Bl -tag -offset indent
+.It Va 7
+The TCP port that the foreign host is communicating via.
+.El
+.Bl -tag -offset indent
+.It Va 8
+The slow start threshold for the flow, in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 9
+The current congestion window for the flow, in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 10
+The current bandwidth-controlled window for the flow, in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 11
+The current sending window for the flow, in bytes.
+The post scaled value is reported, except during the initial handshake (first
+few packets), during which time the unscaled value is reported.
+.El
+.Bl -tag -offset indent
+.It Va 12
+The current receive window for the flow, in bytes.
+The post scaled value is always reported.
+.El
+.Bl -tag -offset indent
+.It Va 13
+The current window scaling factor for the sending window.
+.El
+.Bl -tag -offset indent
+.It Va 14
+The current window scaling factor for the receiving window.
+.El
+.Bl -tag -offset indent
+.It Va 15
+The current state of the TCP finite state machine, as defined
+in
+.Aq Pa netinet/tcp_fsm.h .
+.El
+.Bl -tag -offset indent
+.It Va 16
+The maximum segment size for the flow, in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 17
+The current smoothed RTT estimate for the flow, in units of TCP_RTT_SCALE * HZ,
+where TCP_RTT_SCALE is a define found in tcp_var.h, and HZ is the kernel's tick
+timer.
+Divide by TCP_RTT_SCALE * HZ to get the RTT in secs. TCP_RTT_SCALE and HZ are
+reported in the enable log message.
+.El
+.Bl -tag -offset indent
+.It Va 18
+SACK enabled indicator. 1 if SACK enabled, 0 otherwise.
+.El
+.Bl -tag -offset indent
+.It Va 19
+The current state of the TCP flags for the flow.
+See
+.Aq Pa netinet/tcp_var.h
+for information about the various flags.
+.El
+.Bl -tag -offset indent
+.It Va 20
+The current retransmission timeout length for the flow, in units of HZ, where HZ
+is the kernel's tick timer.
+Divide by HZ to get the timeout length in seconds. HZ is reported in the
+enable log message.
+.El
+.Bl -tag -offset indent
+.It Va 21
+The current size of the socket send buffer in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 22
+The current number of bytes in the socket send buffer.
+.El
+.Bl -tag -offset indent
+.It Va 23
+The current size of the socket receive buffer in bytes.
+.El
+.Bl -tag -offset indent
+.It Va 24
+The current number of bytes in the socket receive buffer.
+.El
+.Bl -tag -offset indent
+.It Va 25
+The current number of unacknowledged bytes in-flight.
+Bytes acknowledged via SACK are not excluded from this count.
+.El
+.Pp
+The third type of log message is written to the file when the module is disabled
+and ceases collecting data from the running kernel.
+The text below shows an example module disable log.
+The fields are tab delimited key-value pairs which provide statistics about
+operations since the module was most recently enabled.
+.Bd -literal -offset indent
+disable_time_secs=1238556197    disable_time_usecs=933607 \\
+num_inbound_tcp_pkts=356    num_outbound_tcp_pkts=627 \\
+total_tcp_pkts=983    num_inbound_skipped_pkts_malloc=0 \\
+num_outbound_skipped_pkts_malloc=0    num_inbound_skipped_pkts_mtx=0 \\
+num_outbound_skipped_pkts_mtx=0    num_inbound_skipped_pkts_tcb=0 \\
+num_outbound_skipped_pkts_tcb=0    num_inbound_skipped_pkts_icb=0 \\
+num_outbound_skipped_pkts_icb=0    total_skipped_tcp_pkts=0 \\
+flow_list=172.16.7.28;22-172.16.2.5;55931,
+.Ed
+.Pp
+Field descriptions are as follows:
+.Bl -tag -offset indent
+.It Va disable_time_secs
+Time at which the module was disabled, in seconds since the UNIX epoch.
+.El
+.Bl -tag -offset indent
+.It Va disable_time_usecs
+Time at which the module was disabled, in microseconds since disable_time_secs.
+.El
+.Bl -tag -offset indent
+.It Va num_inbound_tcp_pkts
+Number of TCP packets that traversed up the network stack.
+This only includes inbound TCP packets during the periods when
+.Nm
+was enabled.
+.El
+.Bl -tag -offset indent
+.It Va num_outbound_tcp_pkts
+Number of TCP packets that traversed down the network stack.
+This only includes outbound TCP packets during the periods when
+.Nm
+was enabled.
+.El
+.Bl -tag -offset indent
+.It Va total_tcp_pkts
+The summation of num_inbound_tcp_pkts and num_outbound_tcp_pkts.
+.El
+.Bl -tag -offset indent
+.It Va num_inbound_skipped_pkts_malloc
+Number of inbound packets that were not processed because of failed malloc() calls.
+.El
+.Bl -tag -offset indent
+.It Va num_outbound_skipped_pkts_malloc
+Number of outbound packets that were not processed because of failed malloc() calls.
+.El
+.Bl -tag -offset indent
+.It Va num_inbound_skipped_pkts_mtx
+Number of inbound packets that were not processed because of failure to add the
+packet to the packet processing queue.
+.El
+.Bl -tag -offset indent
+.It Va num_outbound_skipped_pkts_mtx
+Number of outbound packets that were not processed because of failure to add the
+packet to the packet processing queue.
+.El
+.Bl -tag -offset indent
+.It Va num_inbound_skipped_pkts_tcb
+Number of inbound packets that were not processed because of failure to find the
+TCP control block associated with the packet.
+.El
+.Bl -tag -offset indent
+.It Va num_outbound_skipped_pkts_tcb
+Number of outbound packets that were not processed because of failure to find
+the TCP control block associated with the packet.
+.El
+.Bl -tag -offset indent
+.It Va num_inbound_skipped_pkts_icb
+Number of inbound packets that were not processed because of failure to find the
+IP control block associated with the packet.
+.El
+.Bl -tag -offset indent
+.It Va num_outbound_skipped_pkts_icb
+Number of outbound packets that were not processed because of failure to find
+the IP control block associated with the packet.
+.El
+.Bl -tag -offset indent
+.It Va total_skipped_tcp_pkts
+The summation of all skipped packet counters.
+.El
+.Bl -tag -offset indent
+.It Va flow_list
+A CSV list of TCP flows that triggered data log messages to be generated since
+the module was loaded.
+Each flow entry in the CSV list is
+formatted as
+.Qq local_ip;local_port-foreign_ip;foreign_port .
+If there are no entries in the list (i.e. no data log messages were generated),
+the value will be blank.
+If there is at least one entry in the list, a trailing comma will always be
+present.
+.El
+.Pp
+The total number of data log messages found in the log file for a module
+enable/disable cycle should equate to total_tcp_pkts - total_skipped_tcp_pkts.
+.Sh IMPLEMENTATION NOTES
+.Nm
+hooks into the network stack using the
+.Xr pfil 9
+interface.
+In its current incarnation, it hooks into the AF_INET/AF_INET6 (IPv4/IPv6)
+.Xr pfil 9
+filtering points, which means it sees packets at the IP layer of the network
+stack.
+This means that TCP packets inbound to the stack are intercepted before
+they have been processed by the TCP layer.
+Packets outbound from the stack are intercepted after they have been processed
+by the TCP layer.
+.Pp
+The diagram below illustrates how
+.Nm
+inserts itself into the stack.
+.Bd -literal -offset indent
+----------------------------------
+           Upper Layers
+----------------------------------
+    ^                       |
+    |                       |
+    |                       |
+    |                       v
+ TCP in                  TCP out
+----------------------------------
+    ^                      |
+    |________     _________|
+            |     |
+            |     v
+           ---------
+           | SIFTR |
+           ---------
+            ^     |
+    ________|     |__________
+    |                       |
+    |                       v
+IPv{4/6} in            IPv{4/6} out
+----------------------------------
+    ^                       |
+    |                       |
+    |                       v
+Layer 2 in             Layer 2 out
+----------------------------------
+          Physical Layer
+----------------------------------
+.Ed
+.Pp
+.Nm
+uses the
+.Xr alq 9
+interface to manage writing data to disk.
+.Pp
+At first glance, you might mistakenly think that
+.Nm
+extracts information from
+individual TCP packets.
+This is not the case.
+.Nm
+uses TCP packet events (inbound and outbound) for each TCP flow originating from
+the system to trigger a dump of the state of the TCP control block for that
+flow.
+With the PPL set to 1, we are in effect sampling each TCP flow's control block
+state as frequently as flow packets enter/leave the system.
+For example, setting PPL to 2 halves the sampling rate i.e. every second flow
+packet (inbound OR outbound) causes a dump of the control block state.
+.Pp
+The distinction between interrogating individual packets vs interrogating the
+control block is important, because
+.Nm
+does not remove the need for packet capturing tools like
+.Xr tcpdump 1 .
+.Nm
+allows you to correlate and observe the cause-and-affect relationship between
+what you see on the wire (captured using a tool like
+.Xr tcpdump 1 Ns )
+and changes in the TCP control block corresponding to the flow of interest.
+It is therefore useful to use
+.Nm
+and a tool like
+.Xr tcpdump 1
+to gather the necessary data to piece together the complete picture.
+Use of either tool on its own will not be able to provide all of the necessary
+data.
+.Pp
+As a result of needing to interrogate the TCP control block, certain packets
+during the lifecycle of a connection are unable to trigger a
+.Nm
+log message.
+The initial handshake takes place without the existence of a control block and
+the final ACK is exchanged when the connection is in the TIMEWAIT state.
+.Pp
+.Nm
+was designed to minimise the delay introduced to packets traversing the network
+stack.
+This design called for a highly optimised and minimal hook function that
+extracted the minimal details necessary whilst holding the packet up, and
+passing these details to another thread for actual processing and logging.
+.Pp
+This multithreaded design does introduce some contention issues when accessing
+the data structure shared between the threads of operation.
+When the hook function tries to place details in the structure, it must first
+acquire an exclusive lock.
+Likewise, when the processing thread tries to read details from the structure,
+it must also acquire an exclusive lock to do so.
+If one thread holds the lock, the other must wait before it can obtain it.
+This does introduce some additional bounded delay into the kernel's packet
+processing code path.
+.Pp
+In some cases (e.g. low memory, connection termination), TCP packets that enter
+the
+.Nm
+.Xr pfil 9
+hook function will not trigger a log message to be generated.
+.Nm
+refers to this outcome as a
+.Qq skipped packet .
+Note that
+.Nm
+always ensures that packets are allowed to continue through the stack, even if
+they could not successfully trigger a data log message.
+.Nm
+will therefore not introduce any packet loss for TCP/IP packets traversing the
+network stack.
+.Ss Important Behaviours
+The behaviour of a log file path change whilst the module is enabled is as
+follows:
+.Bl -enum
+.It
+Attempt to open the new file path for writing.
+If this fails, the path change will fail and the existing path will continue to
+be used.
+.It
+Assuming the new path is valid and opened successfully:
+.Bl -dash
+.It
+Flush all pending log messages to the old file path.
+.It
+Close the old file path.
+.It
+Switch the active log file pointer to point at the new file path.
+.It
+Commence logging to the new file.
+.El
+.El
+.Pp
+During the time between the flush of pending log messages to the old file and
+commencing logging to the new file, new log messages will still be generated and
+buffered.
+As soon as the new file path is ready for writing, the accumulated log messages
+will be written out to the file.
+.Sh EXAMPLES
+To enable the module's operations, run the following command as root:
+sysctl net.inet.siftr.enabled=1
+.Pp
+To change the granularity of log messages such that 1 log message is
+generated for every 10 TCP packets per connection, run the following
+command as root:
+sysctl net.inet.siftr.ppl=10
+.Pp
+To change the log file location to /tmp/siftr.log, run the following
+command as root:
+sysctl net.inet.siftr.logfile=/tmp/siftr.log
+.Sh SEE ALSO
+.Xr alq 9 ,
+.Xr pfil 9
+.Xr sysctl 8 ,
+.Xr tcp 4 ,
+.Xr tcpdump 1 ,
+.Sh ACKNOWLEDGEMENTS
+Development of this software was made possible in part by grants from the
+Cisco University Research Program Fund at Community Foundation Silicon Valley,
+and the FreeBSD Foundation.
+.Sh HISTORY
+.Nm
+first appeared in
+.Fx 9.0 .
+.Pp
+.Nm
+was first released in 2007 by Lawrence Stewart and James Healy whilst working on
+the NewTCP research project at Swinburne University's Centre for Advanced
+Internet Architectures, Melbourne, Australia, which was made possible in part by
+a grant from the Cisco University Research Program Fund at Community Foundation
+Silicon Valley.
+More details are available at:
+.Pp
+http://caia.swin.edu.au/urp/newtcp/
+.Pp
+Work on
+.Nm
+v1.2.x was sponsored by the FreeBSD Foundation as part of
+the
+.Qq Enhancing the FreeBSD TCP Implementation
+project 2008-2009.
+More details are available at:
+.Pp
+http://www.freebsdfoundation.org/
+.Pp
+http://caia.swin.edu.au/freebsd/etcp09/
+.Sh AUTHORS
+.An -nosplit
+.Nm
+was written by
+.An Lawrence Stewart Aq lstewart@FreeBSD.org
+and
+.An James Healy Aq jimmy@deefa.com .
+.Pp
+This manual page was written by
+.An Lawrence Stewart Aq lstewart@FreeBSD.org .
+.Sh BUGS
+Current known limitations and any relevant workarounds are outlined below:
+.Bl -dash
+.It
+The internal queue used to pass information between the threads of operation is
+currently unbounded.
+This allows
+.Nm
+to cope with bursty network traffic, but sustained high packet-per-second
+traffic can cause exhaustion of kernel memory if the processing thread cannot
+keep up with the packet rate.
+.It
+If using
+.Nm
+on a machine that is also running other modules utilising the
+.Xr pfil 9
+framework e.g.
+.Xr dummynet 4 ,
+.Xr ipfw 8 ,
+.Xr pf 4 Ns ,
+the order in which you load the modules is important.
+You should kldload the other modules first, as this will ensure TCP packets
+undergo any necessary manipulations before
+.Nm
+.Qq sees
+and processes them.
+.It
+There is a known, harmless lock order reversal warning between the
+.Xr pfil 9
+mutex and tcbinfo TCP lock reported by
+.Xr witness 4
+when
+.Nm
+is enabled in a kernel compiled with
+.Xr witness 4
+support.
+.It
+There is no way to filter which TCP flows you wish to capture data for.
+Post processing is required to separate out data belonging to particular flows
+of interest.
+.It
+The module does not detect deletion of the log file path.
+New log messages will simply be lost if the log file being used by
+.Nm
+is deleted whilst the module is set to use the file.
+Switching to a new log file using the
+.Em net.inet.siftr.logfile
+variable will create the new file and allow log messages to begin being written
+to disk again.
+The new log file path must differ from the path to the deleted file.
+.It
+The hash table used within the code is sized to hold 65536 flows.  This is not a
+hard limit, because chaining is used to handle collisions within the hash table
+structure.
+However, we suspect (based on analogies with other hash table performance data)
+that the hash table look up performance (and therefore the module's packet
+processing performance) will degrade in an exponential manner as the number of
+unique flows handled in a module enable/disable cycle approaches and surpasses
+65536.
+.It
+There is no garbage collection performed on the flow hash table.
+The only way currently to flush it is to disable
+.Nm .
+.It
+The PPL variable applies to packets that make it into the processing thread,
+not total packets received in the hook function.
+Packets are skipped before the PPL variable is applied, which means there may be
+a slight discrepancy in the triggering of log messages.
+For example, if PPL was set to 10, and the 8th packet since the last log message
+is skipped, the 11th packet will actually trigger the log message to be
+generated.
+This is discussed in greater depth in CAIA technical report 070824A.
+.It
+At the time of writing, there was no simple way to hook into the TCP layer
+to intercept packets.
+.Nm Ap s
+use of IP layer hook points means all IP
+traffic will be processed by the
+.Nm
+.Xr pfil 9
+hook function, which introduces minor, but nonetheless unnecessary packet delay
+and processing overhead on the system for non-TCP packets as well.
+Hooking in at the IP layer is also not ideal from the data gathering point of
+view.
+Packets traversing up the stack will be intercepted and cause a log message
+generation BEFORE they have been processed by the TCP layer, which means we
+cannot observe the cause-and-affect relationship between inbound events and the
+corresponding TCP control block as precisely as could be.
+Ideally,
+.Nm
+should intercept packets after they have been processed by the TCP layer i.e.
+intercept packets coming up the stack after they have been processed by
+tcp_input(), and intercept packets coming down the stack after they have been
+processed by tcp_output().
+The current code still gives satisfactory granularity though, as inbound events
+tend to trigger outbound events, allowing the cause-and-effect to be observed
+indirectly by capturing the state on outbound events as well.
+.It
+The
+.Qq inflight bytes
+value logged by
+.Nm
+does not take into account bytes that have been
+.No SACK Ap ed
+by the receiving host.
+.It
+Packet hash generation does not currently work for IPv6 based TCP packets.
+.It
+Compressed notation is not used for IPv6 address representation.
+This consumes more bytes than is necessary in log output.
+.El

Modified: stable/7/sys/modules/Makefile
==============================================================================
--- stable/7/sys/modules/Makefile	Sat Nov  6 12:41:47 2010	(r214874)
+++ stable/7/sys/modules/Makefile	Sat Nov  6 13:03:33 2010	(r214875)
@@ -255,6 +255,7 @@ SUBDIR=	${_3dfx} \
 	sem \
 	sf \
 	sge \
+	siftr \
 	${_sio} \
 	sis \
 	sk \

Copied and modified: stable/7/sys/netinet/siftr.c (from r209662, head/sys/netinet/siftr.c)
==============================================================================
--- head/sys/netinet/siftr.c	Sat Jul  3 13:32:39 2010	(r209662, copy source)
+++ stable/7/sys/netinet/siftr.c	Sat Nov  6 13:03:33 2010	(r214875)
@@ -259,7 +259,13 @@ struct siftr_stats
 	uint32_t nskip_out_dejavu;
 };
 
-static DPCPU_DEFINE(struct siftr_stats, ss);
+/* Pre 8 and pre DPCPU. */
+static struct proc *siftr_pkt_manager_proc = NULL;
+#define	V_tcbinfo	tcbinfo
+static struct siftr_stats	nondpcpu_ss;
+#define	DPCPU_PTR(n)		&nondpcpu_##n
+#define	DPCPU_VARSUM(n, var)	nondpcpu_##n.var
+#define	DPCPU_ZERO(n)		bzero(&nondpcpu_##n, sizeof(nondpcpu_##n))
 
 static volatile unsigned int siftr_exit_pkt_manager_thread = 0;
 static unsigned int siftr_enabled = 0;
@@ -612,8 +618,8 @@ siftr_pkt_manager_thread(void *arg)
 
 	mtx_unlock(&siftr_pkt_mgr_mtx);
 
-	/* Calls wakeup on this thread's struct thread ptr. */
-	kthread_exit();
+	/* Calls wakeup on this thread's struct proc ptr on 7.x. */
+	kthread_exit(0);
 }
 
 
@@ -1233,11 +1239,14 @@ siftr_manage_ops(uint8_t action)
 
 		STAILQ_INIT(&pkt_queue);
 
+		DPCPU_ZERO(ss);
+
 		siftr_exit_pkt_manager_thread = 0;
 
-		ret = kthread_add(&siftr_pkt_manager_thread, NULL, NULL,
-		    &siftr_pkt_manager_thr, RFNOWAIT, 0,
+		ret = kthread_create(&siftr_pkt_manager_thread, NULL,
+		    &siftr_pkt_manager_proc, RFNOWAIT, 0,
 		    "siftr_pkt_manager_thr");
+		siftr_pkt_manager_thr = FIRST_THREAD_IN_PROC(siftr_pkt_manager_proc);
 
 		siftr_pfil(HOOK);
 
@@ -1276,22 +1285,22 @@ siftr_manage_ops(uint8_t action)
 		wakeup(&wait_for_pkt);
 
 		/* Wait for the pkt_manager thread to exit. */
-		mtx_sleep(siftr_pkt_manager_thr, &siftr_pkt_mgr_mtx, PWAIT,
+		mtx_sleep(siftr_pkt_manager_proc, &siftr_pkt_mgr_mtx, PWAIT,
 		    "thrwait", 0);
-
+		siftr_pkt_manager_proc = NULL;
 		siftr_pkt_manager_thr = NULL;
 		mtx_unlock(&siftr_pkt_mgr_mtx);
 
-		totalss.n_in = DPCPU_SUM(ss, n_in);
-		totalss.n_out = DPCPU_SUM(ss, n_out);
-		totalss.nskip_in_malloc = DPCPU_SUM(ss, nskip_in_malloc);
-		totalss.nskip_out_malloc = DPCPU_SUM(ss, nskip_out_malloc);
-		totalss.nskip_in_mtx = DPCPU_SUM(ss, nskip_in_mtx);
-		totalss.nskip_out_mtx = DPCPU_SUM(ss, nskip_out_mtx);
-		totalss.nskip_in_tcpcb = DPCPU_SUM(ss, nskip_in_tcpcb);
-		totalss.nskip_out_tcpcb = DPCPU_SUM(ss, nskip_out_tcpcb);
-		totalss.nskip_in_inpcb = DPCPU_SUM(ss, nskip_in_inpcb);
-		totalss.nskip_out_inpcb = DPCPU_SUM(ss, nskip_out_inpcb);
+		totalss.n_in = DPCPU_VARSUM(ss, n_in);
+		totalss.n_out = DPCPU_VARSUM(ss, n_out);
+		totalss.nskip_in_malloc = DPCPU_VARSUM(ss, nskip_in_malloc);
+		totalss.nskip_out_malloc = DPCPU_VARSUM(ss, nskip_out_malloc);
+		totalss.nskip_in_mtx = DPCPU_VARSUM(ss, nskip_in_mtx);
+		totalss.nskip_out_mtx = DPCPU_VARSUM(ss, nskip_out_mtx);
+		totalss.nskip_in_tcpcb = DPCPU_VARSUM(ss, nskip_in_tcpcb);
+		totalss.nskip_out_tcpcb = DPCPU_VARSUM(ss, nskip_out_tcpcb);
+		totalss.nskip_in_inpcb = DPCPU_VARSUM(ss, nskip_in_inpcb);
+		totalss.nskip_out_inpcb = DPCPU_VARSUM(ss, nskip_out_inpcb);
 
 		total_skipped_pkts = totalss.nskip_in_malloc +
 		    totalss.nskip_out_malloc + totalss.nskip_in_mtx +

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 13:46:59 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 4A126106564A;
	Sat,  6 Nov 2010 13:46:59 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 378318FC19;
	Sat,  6 Nov 2010 13:46:59 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6Dkx2i032875;
	Sat, 6 Nov 2010 13:46:59 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6DkxWv032871;
	Sat, 6 Nov 2010 13:46:59 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061346.oA6DkxWv032871@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 13:46:59 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214878 - stable/7/sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 13:46:59 -0000

Author: lstewart
Date: Sat Nov  6 13:46:58 2010
New Revision: 214878
URL: http://svn.freebsd.org/changeset/base/214878

Log:
  MFC r213158:
  
  Internalise reassembly queue related functionality and variables which should
  not be used outside of the reassembly queue implementation. Provide a new
  function to flush all segments from a reassembly queue and call it from the
  appropriate places instead of manipulating the queue directly.
  
  The base code from r213158 was modified as part of this MFC in order to work
  correctly on FreeBSD 7.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	andre, gnn, rpaulo

Modified:
  stable/7/sys/netinet/tcp_reass.c
  stable/7/sys/netinet/tcp_subr.c
  stable/7/sys/netinet/tcp_var.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/netinet/tcp_reass.c
==============================================================================
--- stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 13:43:18 2010	(r214877)
+++ stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 13:46:58 2010	(r214878)
@@ -81,7 +81,7 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO
     &tcp_reass_maxseg, 0,
     "Global maximum number of TCP Segments in Reassembly Queue");
 
-int tcp_reass_qsize = 0;
+static int tcp_reass_qsize = 0;
 SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
     &tcp_reass_qsize, 0,
     "Global number of TCP Segments currently in Reassembly Queue");
@@ -96,6 +96,8 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO
     &tcp_reass_overflows, 0,
     "Global number of TCP Segment Reassembly Queue Overflows");
 
+static uma_zone_t	tcp_reass_zone;
+
 /* Initialize TCP reassembly queue */
 static void
 tcp_reass_zone_change(void *tag)
@@ -105,8 +107,6 @@ tcp_reass_zone_change(void *tag)
 	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
 }
 
-uma_zone_t	tcp_reass_zone;
-
 void
 tcp_reass_init(void)
 {
@@ -121,6 +121,26 @@ tcp_reass_init(void)
 	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
 }
 
+void
+tcp_reass_flush(struct tcpcb *tp)
+{
+	struct tseg_qent *qe;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
+		LIST_REMOVE(qe, tqe_q);
+		m_freem(qe->tqe_m);
+		uma_zfree(tcp_reass_zone, qe);
+		tp->t_segqlen--;
+		tcp_reass_qsize--;
+	}
+
+	KASSERT((tp->t_segqlen == 0),
+	    ("TCP reass queue %p segment count is %d instead of 0 after flush.",
+	    tp, tp->t_segqlen));
+}
+
 int
 tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
 {

Modified: stable/7/sys/netinet/tcp_subr.c
==============================================================================
--- stable/7/sys/netinet/tcp_subr.c	Sat Nov  6 13:43:18 2010	(r214877)
+++ stable/7/sys/netinet/tcp_subr.c	Sat Nov  6 13:46:58 2010	(r214878)
@@ -704,7 +704,6 @@ tcp_drop(struct tcpcb *tp, int errno)
 void
 tcp_discardcb(struct tcpcb *tp)
 {
-	struct tseg_qent *q;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
@@ -782,13 +781,7 @@ tcp_discardcb(struct tcpcb *tp)
 	}
 
 	/* free the reassembly queue, if any */
-	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
-		LIST_REMOVE(q, tqe_q);
-		m_freem(q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-	}
+	tcp_reass_flush(tp);
 	/* Disconnect offload device, if any. */
 	tcp_offload_detach(tp);
 		
@@ -840,7 +833,6 @@ tcp_drain(void)
 	if (do_tcpdrain) {
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
-		struct tseg_qent *te;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
@@ -856,14 +848,7 @@ tcp_drain(void)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
-				while ((te = LIST_FIRST(&tcpb->t_segq))
-			            != NULL) {
-					LIST_REMOVE(te, tqe_q);
-					m_freem(te->tqe_m);
-					uma_zfree(tcp_reass_zone, te);
-					tcpb->t_segqlen--;
-					tcp_reass_qsize--;
-				}
+				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_WUNLOCK(inpb);

Modified: stable/7/sys/netinet/tcp_var.h
==============================================================================
--- stable/7/sys/netinet/tcp_var.h	Sat Nov  6 13:43:18 2010	(r214877)
+++ stable/7/sys/netinet/tcp_var.h	Sat Nov  6 13:46:58 2010	(r214878)
@@ -48,8 +48,6 @@ struct tseg_qent {
 	struct	mbuf	*tqe_m;		/* mbuf contains packet */
 };
 LIST_HEAD(tsegqe_head, tseg_qent);
-extern int	tcp_reass_qsize;
-extern struct uma_zone *tcp_reass_zone;
 
 struct sackblk {
 	tcp_seq start;		/* start seq no. of sack block */
@@ -543,6 +541,7 @@ char	*tcp_log_vain(struct in_conninfo *,
 	    const void *);
 int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
 void	 tcp_reass_init(void);
+void	 tcp_reass_flush(struct tcpcb *);
 void	 tcp_input(struct mbuf *, int);
 u_long	 tcp_maxmtu(struct in_conninfo *, int *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, int *);

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 14:38:58 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id F3BB5106566B;
	Sat,  6 Nov 2010 14:38:57 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id C72678FC0C;
	Sat,  6 Nov 2010 14:38:57 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6Ecvk9034139;
	Sat, 6 Nov 2010 14:38:57 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6EcvW5034136;
	Sat, 6 Nov 2010 14:38:57 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061438.oA6EcvW5034136@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 14:38:57 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214881 - stable/7/sys/vm
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 14:38:58 -0000

Author: lstewart
Date: Sat Nov  6 14:38:57 2010
New Revision: 214881
URL: http://svn.freebsd.org/changeset/base/214881

Log:
  MFC r211396 (originally committed by andre):
  
  Add uma_zone_get_max() to obtain the effective limit after a call to
  uma_zone_set_max().
  
  The UMA zone limit is not exactly set to the value supplied but rounded up to
  completely fill the backing store increment (a page normally).  This can lead to
  surprising situations where the number of elements allocated from UMA is higher
  than the supplied limit value.  The new get function reads back the effective
  value so that the supplied limit value can be adjusted to the real limit.
  
  The base code from r211396 was modified as part of this MFC in order to work
  correctly on FreeBSD 7.
  
  Reviewed by:	jeffr

Modified:
  stable/7/sys/vm/uma.h
  stable/7/sys/vm/uma_core.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/vm/uma.h
==============================================================================
--- stable/7/sys/vm/uma.h	Sat Nov  6 14:22:50 2010	(r214880)
+++ stable/7/sys/vm/uma.h	Sat Nov  6 14:38:57 2010	(r214881)
@@ -431,6 +431,18 @@ int uma_zone_set_obj(uma_zone_t zone, st
 void uma_zone_set_max(uma_zone_t zone, int nitems);
 
 /*
+ * Obtains the effective limit on the number of items in a zone
+ *
+ * Arguments:
+ *	zone  The zone to obtain the effective limit from
+ *
+ * Return:
+ *	0  No limit
+ *	int  The effective limit of the zone
+ */
+int uma_zone_get_max(uma_zone_t zone);
+
+/*
  * The following two routines (uma_zone_set_init/fini)
  * are used to set the backend init/fini pair which acts on an
  * object as it becomes allocated and is placed in a slab within

Modified: stable/7/sys/vm/uma_core.c
==============================================================================
--- stable/7/sys/vm/uma_core.c	Sat Nov  6 14:22:50 2010	(r214880)
+++ stable/7/sys/vm/uma_core.c	Sat Nov  6 14:38:57 2010	(r214881)
@@ -2521,6 +2521,24 @@ uma_zone_set_max(uma_zone_t zone, int ni
 }
 
 /* See uma.h */
+int
+uma_zone_get_max(uma_zone_t zone)
+{
+	int nitems;
+	uma_keg_t keg;
+
+	ZONE_LOCK(zone);
+	keg = zone->uz_keg;
+	if (keg->uk_maxpages)
+		nitems = keg->uk_maxpages * keg->uk_ipers;
+	else
+		nitems = 0;
+	ZONE_UNLOCK(zone);
+
+	return (nitems);
+}
+
+/* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 14:49:10 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 6EAB1106566C;
	Sat,  6 Nov 2010 14:49:10 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 5B7778FC19;
	Sat,  6 Nov 2010 14:49:10 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6EnAjT034458;
	Sat, 6 Nov 2010 14:49:10 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6EnAPq034456;
	Sat, 6 Nov 2010 14:49:10 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061449.oA6EnAPq034456@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 14:49:10 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214883 - stable/7/sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 14:49:10 -0000

Author: lstewart
Date: Sat Nov  6 14:49:10 2010
New Revision: 214883
URL: http://svn.freebsd.org/changeset/base/214883

Log:
  MFC r210203:
  
  - Move common code from the hook functions that fills in a packet node struct to
    a separate inline function. This further reduces duplicate code that didn't
    have a good reason to stay as it was.
  
  - Reorder the malloc of a pkt_node struct in the hook functions such that it
    only occurs if we managed to find a usable tcpcb associated with the packet.
  
  - Make the inp_locally_locked variable's type consistent with the prototype of
    siftr_siftdata().
  
  Sponsored by:	FreeBSD Foundation

Modified:
  stable/7/sys/netinet/siftr.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/netinet/siftr.c
==============================================================================
--- stable/7/sys/netinet/siftr.c	Sat Nov  6 14:46:24 2010	(r214882)
+++ stable/7/sys/netinet/siftr.c	Sat Nov  6 14:49:10 2010	(r214883)
@@ -752,6 +752,67 @@ siftr_findinpcb(int ipver, struct ip *ip
 }
 
 
+static inline void
+siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp,
+    int ipver, int dir, int inp_locally_locked)
+{
+#ifdef SIFTR_IPV6
+	if (ipver == INP_IPV4) {
+		pn->ip_laddr[3] = inp->inp_laddr.s_addr;
+		pn->ip_faddr[3] = inp->inp_faddr.s_addr;
+#else
+		*((uint32_t *)pn->ip_laddr) = inp->inp_laddr.s_addr;
+		*((uint32_t *)pn->ip_faddr) = inp->inp_faddr.s_addr;
+#endif
+#ifdef SIFTR_IPV6
+	} else {
+		pn->ip_laddr[0] = inp->in6p_laddr.s6_addr32[0];
+		pn->ip_laddr[1] = inp->in6p_laddr.s6_addr32[1];
+		pn->ip_laddr[2] = inp->in6p_laddr.s6_addr32[2];
+		pn->ip_laddr[3] = inp->in6p_laddr.s6_addr32[3];
+		pn->ip_faddr[0] = inp->in6p_faddr.s6_addr32[0];
+		pn->ip_faddr[1] = inp->in6p_faddr.s6_addr32[1];
+		pn->ip_faddr[2] = inp->in6p_faddr.s6_addr32[2];
+		pn->ip_faddr[3] = inp->in6p_faddr.s6_addr32[3];
+	}
+#endif
+	pn->tcp_localport = inp->inp_lport;
+	pn->tcp_foreignport = inp->inp_fport;
+	pn->snd_cwnd = tp->snd_cwnd;
+	pn->snd_wnd = tp->snd_wnd;
+	pn->rcv_wnd = tp->rcv_wnd;
+	pn->snd_bwnd = tp->snd_bwnd;
+	pn->snd_ssthresh = tp->snd_ssthresh;
+	pn->snd_scale = tp->snd_scale;
+	pn->rcv_scale = tp->rcv_scale;
+	pn->conn_state = tp->t_state;
+	pn->max_seg_size = tp->t_maxseg;
+	pn->smoothed_rtt = tp->t_srtt;
+	pn->sack_enabled = (tp->t_flags & TF_SACK_PERMIT) != 0;
+	pn->flags = tp->t_flags;
+	pn->rxt_length = tp->t_rxtcur;
+	pn->snd_buf_hiwater = inp->inp_socket->so_snd.sb_hiwat;
+	pn->snd_buf_cc = inp->inp_socket->so_snd.sb_cc;
+	pn->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat;
+	pn->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
+	pn->sent_inflight_bytes = tp->snd_max - tp->snd_una;
+
+	/* We've finished accessing the tcb so release the lock. */
+	if (inp_locally_locked)
+		INP_RUNLOCK(inp);
+
+	pn->ipver = ipver;
+	pn->direction = dir;
+
+	/*
+	 * Significantly more accurate than using getmicrotime(), but slower!
+	 * Gives true microsecond resolution at the expense of a hit to
+	 * maximum pps throughput processing when SIFTR is loaded and enabled.
+	 */
+	microtime(&pn->tval);
+}
+
+
 /*
  * pfil hook that is called for each IPv4 packet making its way through the
  * stack in either direction.
@@ -764,13 +825,13 @@ static int
 siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
-	struct pkt_node *pkt_node;
+	struct pkt_node *pn;
 	struct ip *ip;
 	struct tcphdr *th;
 	struct tcpcb *tp;
 	struct siftr_stats *ss;
 	unsigned int ip_hl;
-	uint8_t inp_locally_locked;
+	int inp_locally_locked;
 
 	inp_locally_locked = 0;
 	ss = DPCPU_PTR(ss);
@@ -824,18 +885,6 @@ siftr_chkpkt(void *arg, struct mbuf **m,
 
 	INP_LOCK_ASSERT(inp);
 
-	pkt_node = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE,
-	    M_NOWAIT | M_ZERO);
-
-	if (pkt_node == NULL) {
-		if (dir == PFIL_IN)
-			ss->nskip_in_malloc++;
-		else
-			ss->nskip_out_malloc++;
-
-		goto inp_unlock;
-	}
-
 	/* Find the TCP control block that corresponds with this packet */
 	tp = intotcpcb(inp);
 
@@ -850,53 +899,21 @@ siftr_chkpkt(void *arg, struct mbuf **m,
 		else
 			ss->nskip_out_tcpcb++;
 
-		free(pkt_node, M_SIFTR_PKTNODE);
 		goto inp_unlock;
 	}
 
-	/* Fill in pkt_node data */
-#ifdef SIFTR_IPV6
-	pkt_node->ip_laddr[3] = inp->inp_laddr.s_addr;
-	pkt_node->ip_faddr[3] = inp->inp_faddr.s_addr;
-#else
-	*((uint32_t *)pkt_node->ip_laddr) = inp->inp_laddr.s_addr;
-	*((uint32_t *)pkt_node->ip_faddr) = inp->inp_faddr.s_addr;
-#endif
-	pkt_node->tcp_localport = inp->inp_lport;
-	pkt_node->tcp_foreignport = inp->inp_fport;
-	pkt_node->snd_cwnd = tp->snd_cwnd;
-	pkt_node->snd_wnd = tp->snd_wnd;
-	pkt_node->rcv_wnd = tp->rcv_wnd;
-	pkt_node->snd_bwnd = tp->snd_bwnd;
-	pkt_node->snd_ssthresh = tp->snd_ssthresh;
-	pkt_node->snd_scale = tp->snd_scale;
-	pkt_node->rcv_scale = tp->rcv_scale;
-	pkt_node->conn_state = tp->t_state;
-	pkt_node->max_seg_size = tp->t_maxseg;
-	pkt_node->smoothed_rtt = tp->t_srtt;
-	pkt_node->sack_enabled = (tp->t_flags & TF_SACK_PERMIT) != 0;
-	pkt_node->flags = tp->t_flags;
-	pkt_node->rxt_length = tp->t_rxtcur;
-	pkt_node->snd_buf_hiwater = inp->inp_socket->so_snd.sb_hiwat;
-	pkt_node->snd_buf_cc = inp->inp_socket->so_snd.sb_cc;
-	pkt_node->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat;
-	pkt_node->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
-	pkt_node->sent_inflight_bytes = tp->snd_max - tp->snd_una;
+	pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO);
 
-	/* We've finished accessing the tcb so release the lock. */
-	if (inp_locally_locked)
-		INP_RUNLOCK(inp);
+	if (pn == NULL) {
+		if (dir == PFIL_IN)
+			ss->nskip_in_malloc++;
+		else
+			ss->nskip_out_malloc++;
 
-	/* These are safe to access without the inp lock. */
-	pkt_node->ipver = INP_IPV4;
-	pkt_node->direction = dir;
+		goto inp_unlock;
+	}
 
-	/*
-	 * Significantly more accurate than using getmicrotime(), but slower!
-	 * Gives true microsecond resolution at the expense of a hit to
-	 * maximum pps throughput processing when SIFTR is loaded and enabled.
-	 */
-	microtime(&(pkt_node->tval));
+	siftr_siftdata(pn, inp, tp, INP_IPV4, dir, inp_locally_locked);
 
 	if (siftr_generate_hashes) {
 		if ((*m)->m_pkthdr.csum_flags & CSUM_TCP) {
@@ -956,11 +973,11 @@ siftr_chkpkt(void *arg, struct mbuf **m,
 		 * find a way to create the hash and checksum in the same pass
 		 * over the bytes.
 		 */
-		pkt_node->hash = hash_pkt(*m, ip_hl);
+		pn->hash = hash_pkt(*m, ip_hl);
 	}
 
 	mtx_lock(&siftr_pkt_queue_mtx);
-	STAILQ_INSERT_TAIL(&pkt_queue, pkt_node, nodes);
+	STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes);
 	mtx_unlock(&siftr_pkt_queue_mtx);
 	goto ret;
 
@@ -979,13 +996,13 @@ static int
 siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
-	struct pkt_node *pkt_node;
+	struct pkt_node *pn;
 	struct ip6_hdr *ip6;
 	struct tcphdr *th;
 	struct tcpcb *tp;
 	struct siftr_stats *ss;
 	unsigned int ip6_hl;
-	uint8_t inp_locally_locked;
+	int inp_locally_locked;
 
 	inp_locally_locked = 0;
 	ss = DPCPU_PTR(ss);
@@ -1043,18 +1060,6 @@ siftr_chkpkt6(void *arg, struct mbuf **m
 			inp_locally_locked = 1;
 	}
 
-	pkt_node = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE,
-	    M_NOWAIT | M_ZERO);
-
-	if (pkt_node == NULL) {
-		if (dir == PFIL_IN)
-			ss->nskip_in_malloc++;
-		else
-			ss->nskip_out_malloc++;
-
-		goto inp_unlock6;
-	}
-
 	/* Find the TCP control block that corresponds with this packet. */
 	tp = intotcpcb(inp);
 
@@ -1069,59 +1074,26 @@ siftr_chkpkt6(void *arg, struct mbuf **m
 		else
 			ss->nskip_out_tcpcb++;
 
-		free(pkt_node, M_SIFTR_PKTNODE);
 		goto inp_unlock6;
 	}
 
-	/* Fill in pkt_node data. */
-	pkt_node->ip_laddr[0] = inp->in6p_laddr.s6_addr32[0];
-	pkt_node->ip_laddr[1] = inp->in6p_laddr.s6_addr32[1];
-	pkt_node->ip_laddr[2] = inp->in6p_laddr.s6_addr32[2];
-	pkt_node->ip_laddr[3] = inp->in6p_laddr.s6_addr32[3];
-	pkt_node->ip_faddr[0] = inp->in6p_faddr.s6_addr32[0];
-	pkt_node->ip_faddr[1] = inp->in6p_faddr.s6_addr32[1];
-	pkt_node->ip_faddr[2] = inp->in6p_faddr.s6_addr32[2];
-	pkt_node->ip_faddr[3] = inp->in6p_faddr.s6_addr32[3];
-	pkt_node->tcp_localport = inp->inp_lport;
-	pkt_node->tcp_foreignport = inp->inp_fport;
-	pkt_node->snd_cwnd = tp->snd_cwnd;
-	pkt_node->snd_wnd = tp->snd_wnd;
-	pkt_node->rcv_wnd = tp->rcv_wnd;
-	pkt_node->snd_bwnd = tp->snd_bwnd;
-	pkt_node->snd_ssthresh = tp->snd_ssthresh;
-	pkt_node->snd_scale = tp->snd_scale;
-	pkt_node->rcv_scale = tp->rcv_scale;
-	pkt_node->conn_state = tp->t_state;
-	pkt_node->max_seg_size = tp->t_maxseg;
-	pkt_node->smoothed_rtt = tp->t_srtt;
-	pkt_node->sack_enabled = (tp->t_flags & TF_SACK_PERMIT) != 0;
-	pkt_node->flags = tp->t_flags;
-	pkt_node->rxt_length = tp->t_rxtcur;
-	pkt_node->snd_buf_hiwater = inp->inp_socket->so_snd.sb_hiwat;
-	pkt_node->snd_buf_cc = inp->inp_socket->so_snd.sb_cc;
-	pkt_node->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat;
-	pkt_node->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
-	pkt_node->sent_inflight_bytes = tp->snd_max - tp->snd_una;
+	pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO);
 
-	/* We've finished accessing the tcb so release the lock. */
-	if (inp_locally_locked)
-		INP_RUNLOCK(inp);
+	if (pn == NULL) {
+		if (dir == PFIL_IN)
+			ss->nskip_in_malloc++;
+		else
+			ss->nskip_out_malloc++;
 
-	/* These are safe to access without the inp lock. */
-	pkt_node->ipver = INP_IPV6;
-	pkt_node->direction = dir;
+		goto inp_unlock6;
+	}
 
-	/*
-	 * Significantly more accurate than using getmicrotime(), but slower!
-	 * Gives true microsecond resolution at the expense of a hit to
-	 * maximum pps throughput processing when SIFTR is loaded and enabled.
-	 */
-	microtime(&(pkt_node->tval));
+	siftr_siftdata(pn, inp, tp, INP_IPV6, dir, inp_locally_locked);
 
-	/* XXX: Figure out how to do hash calcs for IPv6 */
+	/* XXX: Figure out how to generate hashes for IPv6 packets. */
 
 	mtx_lock(&siftr_pkt_queue_mtx);
-	STAILQ_INSERT_TAIL(&pkt_queue, pkt_node, nodes);
+	STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes);
 	mtx_unlock(&siftr_pkt_queue_mtx);
 	goto ret6;
 

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 14:53:22 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 62C291065674;
	Sat,  6 Nov 2010 14:53:22 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 4FDC68FC16;
	Sat,  6 Nov 2010 14:53:22 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6ErMkn034600;
	Sat, 6 Nov 2010 14:53:22 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6ErMt5034597;
	Sat, 6 Nov 2010 14:53:22 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061453.oA6ErMt5034597@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 14:53:22 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214884 - in stable/7: share/man/man4 sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 14:53:22 -0000

Author: lstewart
Date: Sat Nov  6 14:53:22 2010
New Revision: 214884
URL: http://svn.freebsd.org/changeset/base/214884

Log:
  MFC r213162:
  
  Log the number of segments currently in the reassembly queue.
  
  Sponsored by:	FreeBSD Foundation

Modified:
  stable/7/share/man/man4/siftr.4
  stable/7/sys/netinet/siftr.c
Directory Properties:
  stable/7/share/man/   (props changed)
  stable/7/share/man/man1/   (props changed)
  stable/7/share/man/man3/   (props changed)
  stable/7/share/man/man4/   (props changed)
  stable/7/share/man/man5/   (props changed)
  stable/7/share/man/man7/   (props changed)
  stable/7/share/man/man8/   (props changed)
  stable/7/share/man/man9/   (props changed)
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/share/man/man4/siftr.4
==============================================================================
--- stable/7/share/man/man4/siftr.4	Sat Nov  6 14:49:10 2010	(r214883)
+++ stable/7/share/man/man4/siftr.4	Sat Nov  6 14:53:22 2010	(r214884)
@@ -30,7 +30,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 23, 2010
+.Dd September 25, 2010
 .Dt SIFTR 4
 .Os
 .Sh NAME
@@ -198,7 +198,7 @@ The data is CSV formatted.
 .Bd -literal -offset indent
 o,0xbec491a5,1238556193.463551,172.16.7.28,22,172.16.2.5,55931, \\
 1073725440,172312,6144,66560,66608,8,1,4,1448,936,1,996,255, \\
-33304,208,66608,0,208
+33304,208,66608,0,208,0
 .Ed
 .Pp
 Field descriptions are as follows:
@@ -328,6 +328,10 @@ The current number of bytes in the socke
 The current number of unacknowledged bytes in-flight.
 Bytes acknowledged via SACK are not excluded from this count.
 .El
+.Bl -tag -offset indent
+.It Va 26
+The current number of segments in the reassembly queue.
+.El
 .Pp
 The third type of log message is written to the file when the module is disabled
 and ceases collecting data from the running kernel.

Modified: stable/7/sys/netinet/siftr.c
==============================================================================
--- stable/7/sys/netinet/siftr.c	Sat Nov  6 14:49:10 2010	(r214883)
+++ stable/7/sys/netinet/siftr.c	Sat Nov  6 14:53:22 2010	(r214884)
@@ -55,7 +55,7 @@
  * SIFTR should be directed to him via email: lastewart@swin.edu.au
  *
  * Initial release date: June 2007
- * Most recent update: June 2010
+ * Most recent update: September 2010
  ******************************************************/
 
 #include 
@@ -105,7 +105,7 @@ __FBSDID("$FreeBSD$");
  */
 #define V_MAJOR		1
 #define V_BACKBREAK	2
-#define V_BACKCOMPAT	3
+#define V_BACKCOMPAT	4
 #define MODVERSION	__CONCAT(V_MAJOR, __CONCAT(V_BACKBREAK, V_BACKCOMPAT))
 #define MODVERSION_STR	__XSTRING(V_MAJOR) "." __XSTRING(V_BACKBREAK) "." \
     __XSTRING(V_BACKCOMPAT)
@@ -226,6 +226,8 @@ struct pkt_node {
 	u_int			rcv_buf_cc;
 	/* Number of bytes inflight that we are waiting on ACKs for. */
 	u_int			sent_inflight_bytes;
+	/* Number of segments currently in the reassembly queue. */
+	int			t_segqlen;
 	/* Link to next pkt_node in the list. */
 	STAILQ_ENTRY(pkt_node)	nodes;
 };
@@ -448,7 +450,7 @@ siftr_process_pkt(struct pkt_node * pkt_
 		    MAX_LOG_MSG_LEN,
 		    "%c,0x%08x,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:"
 		    "%x:%x:%x:%x:%x,%u,%ld,%ld,%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,"
-		    "%u,%d,%u,%u,%u,%u,%u\n",
+		    "%u,%d,%u,%u,%u,%u,%u,%u\n",
 		    direction[pkt_node->direction],
 		    pkt_node->hash,
 		    pkt_node->tval.tv_sec,
@@ -488,7 +490,8 @@ siftr_process_pkt(struct pkt_node * pkt_
 		    pkt_node->snd_buf_cc,
 		    pkt_node->rcv_buf_hiwater,
 		    pkt_node->rcv_buf_cc,
-		    pkt_node->sent_inflight_bytes);
+		    pkt_node->sent_inflight_bytes,
+		    pkt_node->t_segqlen);
 	} else { /* IPv4 packet */
 		pkt_node->ip_laddr[0] = FIRST_OCTET(pkt_node->ip_laddr[3]);
 		pkt_node->ip_laddr[1] = SECOND_OCTET(pkt_node->ip_laddr[3]);
@@ -504,7 +507,7 @@ siftr_process_pkt(struct pkt_node * pkt_
 		log_buf->ae_bytesused = snprintf(log_buf->ae_data,
 		    MAX_LOG_MSG_LEN,
 		    "%c,0x%08x,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%ld,%ld,"
-		    "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u\n",
+		    "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u\n",
 		    direction[pkt_node->direction],
 		    pkt_node->hash,
 		    (intmax_t)pkt_node->tval.tv_sec,
@@ -536,7 +539,8 @@ siftr_process_pkt(struct pkt_node * pkt_
 		    pkt_node->snd_buf_cc,
 		    pkt_node->rcv_buf_hiwater,
 		    pkt_node->rcv_buf_cc,
-		    pkt_node->sent_inflight_bytes);
+		    pkt_node->sent_inflight_bytes,
+		    pkt_node->t_segqlen);
 #ifdef SIFTR_IPV6
 	}
 #endif
@@ -796,6 +800,7 @@ siftr_siftdata(struct pkt_node *pn, stru
 	pn->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat;
 	pn->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
 	pn->sent_inflight_bytes = tp->snd_max - tp->snd_una;
+	pn->t_segqlen = tp->t_segqlen;
 
 	/* We've finished accessing the tcb so release the lock. */
 	if (inp_locally_locked)

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 15:10:31 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 9F86F1065672;
	Sat,  6 Nov 2010 15:10:31 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 731B38FC15;
	Sat,  6 Nov 2010 15:10:31 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6FAVnY035112;
	Sat, 6 Nov 2010 15:10:31 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6FAV1I035110;
	Sat, 6 Nov 2010 15:10:31 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061510.oA6FAV1I035110@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 15:10:31 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214887 - stable/7/sys/sys
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 15:10:31 -0000

Author: lstewart
Date: Sat Nov  6 15:10:31 2010
New Revision: 214887
URL: http://svn.freebsd.org/changeset/base/214887

Log:
  MFC r209050 (originally committed by jhb):
  
  Add helper macros to iterate over available CPUs in the system.
  CPU_FOREACH(i) iterates over the CPU IDs of all available CPUs.  The
  CPU_FIRST() and CPU_NEXT(i) macros can also be used to iterate over
  available CPU IDs.  CPU_NEXT(i) wraps around to CPU_FIRST() rather than
  returning some sort of terminator.
  
  Requested by:	rwatson
  Reviewed by:	attilio

Modified:
  stable/7/sys/sys/smp.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/sys/smp.h
==============================================================================
--- stable/7/sys/sys/smp.h	Sat Nov  6 15:04:56 2010	(r214886)
+++ stable/7/sys/sys/smp.h	Sat Nov  6 15:10:31 2010	(r214887)
@@ -68,6 +68,44 @@ extern cpumask_t all_cpus;
  */
 #define	CPU_ABSENT(x_cpu)	((all_cpus & (1 << (x_cpu))) == 0)
 
+/*
+ * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
+ * integer iterator and iterates over the available set of CPUs.
+ * CPU_FIRST() returns the id of the first non-absent CPU.  CPU_NEXT()
+ * returns the id of the next non-absent CPU.  It will wrap back to
+ * CPU_FIRST() once the end of the list is reached.  The iterators are
+ * currently implemented via inline functions.
+ */
+#define	CPU_FOREACH(i)							\
+	for ((i) = 0; (i) <= mp_maxid; (i)++)				\
+		if (!CPU_ABSENT((i)))
+
+static __inline int
+cpu_first(void)
+{
+	int i;
+
+	for (i = 0;; i++)
+		if (!CPU_ABSENT(i))
+			return (i);
+}
+
+static __inline int
+cpu_next(int i)
+{
+
+	for (;;) {
+		i++;
+		if (i > mp_maxid)
+			i = 0;
+		if (!CPU_ABSENT(i))
+			return (i);
+	}
+}
+
+#define	CPU_FIRST()	cpu_first()
+#define	CPU_NEXT(i)	cpu_next((i))
+
 #ifdef SMP
 /*
  * Machine dependent functions used to initialize MP support.

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 15:21:46 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id AC6361065672;
	Sat,  6 Nov 2010 15:21:46 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 998F18FC0C;
	Sat,  6 Nov 2010 15:21:46 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6FLkM9035413;
	Sat, 6 Nov 2010 15:21:46 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6FLkH9035409;
	Sat, 6 Nov 2010 15:21:46 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061521.oA6FLkH9035409@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 15:21:46 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214888 - in stable/7: share/man/man9 sys/vm
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 15:21:46 -0000

Author: lstewart
Date: Sat Nov  6 15:21:46 2010
New Revision: 214888
URL: http://svn.freebsd.org/changeset/base/214888

Log:
  MFC r213910:
  
  - Simplify implementation of uma_zone_get_max.
  - Add uma_zone_get_cur which returns the current approximate occupancy of a
    zone. This is useful for providing stats via sysctl amongst other things.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	gnn, jhb

Modified:
  stable/7/share/man/man9/zone.9
  stable/7/sys/vm/uma.h
  stable/7/sys/vm/uma_core.c
Directory Properties:
  stable/7/share/man/   (props changed)
  stable/7/share/man/man1/   (props changed)
  stable/7/share/man/man3/   (props changed)
  stable/7/share/man/man4/   (props changed)
  stable/7/share/man/man5/   (props changed)
  stable/7/share/man/man7/   (props changed)
  stable/7/share/man/man8/   (props changed)
  stable/7/share/man/man9/   (props changed)
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/share/man/man9/zone.9
==============================================================================
--- stable/7/share/man/man9/zone.9	Sat Nov  6 15:10:31 2010	(r214887)
+++ stable/7/share/man/man9/zone.9	Sat Nov  6 15:21:46 2010	(r214888)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 19, 2008
+.Dd October 9, 2010
 .Dt ZONE 9
 .Os
 .Sh NAME
@@ -35,7 +35,9 @@
 .Nm uma_zfree ,
 .Nm uma_zfree_arg ,
 .Nm uma_zdestroy ,
-.Nm uma_zone_set_max
+.Nm uma_zone_set_max,
+.Nm uma_zone_get_max,
+.Nm uma_zone_get_cur
 .Nd zone allocator
 .Sh SYNOPSIS
 .In sys/param.h
@@ -59,6 +61,10 @@
 .Fn uma_zdestroy "uma_zone_t zone"
 .Ft void
 .Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
+.Ft int
+.Fn uma_zone_get_max "uma_zone_t zone"
+.Ft int
+.Fn uma_zone_get_cur "uma_zone_t zone"
 .Sh DESCRIPTION
 The zone allocator provides an efficient interface for managing
 dynamically-sized collections of items of similar size.
@@ -177,21 +183,36 @@ must have been freed with
 .Fn uma_zfree
 before.
 .Pp
-The purpose of
+The
 .Fn uma_zone_set_max
-is to limit the maximum amount of memory that the system can dedicated
-toward the zone specified by the
-.Fa zone
-argument.
+function limits the number of items
+.Pq and therefore memory
+that can be allocated to
+.Fa zone .
 The
 .Fa nitems
-argument gives the upper limit of items in the zone.
-This limits the total number of items in the zone which includes:
+argument specifies the requested upper limit number of items.
+The effective limit may end up being higher than requested, as the
+implementation will round up to ensure all memory pages allocated to the zone
+are utilised to capacity.
+The limit applies to the total number of items in the zone, which includes
 allocated items, free items and free items in the per-cpu caches.
 On systems with more than one CPU it may not be possible to allocate
 the specified number of items even when there is no shortage of memory,
 because all of the remaining free items may be in the caches of the
 other CPUs when the limit is hit.
+.Pp
+The
+.Fn uma_zone_get_max
+function returns the effective upper limit number of items for a zone.
+.Pp
+The
+.Fn uma_zone_get_cur
+function returns the approximate current occupancy of the zone.
+The returned value is approximate because appropriate synchronisation to
+determine an exact value is not performend by the implementation.
+This ensures low overhead at the expense of potentially stale data being used
+in the calculation.
 .Sh RETURN VALUES
 The
 .Fn uma_zalloc

Modified: stable/7/sys/vm/uma.h
==============================================================================
--- stable/7/sys/vm/uma.h	Sat Nov  6 15:10:31 2010	(r214887)
+++ stable/7/sys/vm/uma.h	Sat Nov  6 15:21:46 2010	(r214888)
@@ -443,6 +443,17 @@ void uma_zone_set_max(uma_zone_t zone, i
 int uma_zone_get_max(uma_zone_t zone);
 
 /*
+ * Obtains the approximate current number of items allocated from a zone
+ *
+ * Arguments:
+ *	zone  The zone to obtain the current allocation count from
+ *
+ * Return:
+ *	int  The approximate current number of items allocated from the zone
+ */
+int uma_zone_get_cur(uma_zone_t zone);
+
+/*
  * The following two routines (uma_zone_set_init/fini)
  * are used to set the backend init/fini pair which acts on an
  * object as it becomes allocated and is placed in a slab within

Modified: stable/7/sys/vm/uma_core.c
==============================================================================
--- stable/7/sys/vm/uma_core.c	Sat Nov  6 15:10:31 2010	(r214887)
+++ stable/7/sys/vm/uma_core.c	Sat Nov  6 15:21:46 2010	(r214888)
@@ -2529,16 +2529,36 @@ uma_zone_get_max(uma_zone_t zone)
 
 	ZONE_LOCK(zone);
 	keg = zone->uz_keg;
-	if (keg->uk_maxpages)
-		nitems = keg->uk_maxpages * keg->uk_ipers;
-	else
-		nitems = 0;
+	nitems = keg->uk_maxpages * keg->uk_ipers;
 	ZONE_UNLOCK(zone);
 
 	return (nitems);
 }
 
 /* See uma.h */
+int
+uma_zone_get_cur(uma_zone_t zone)
+{
+	int64_t nitems;
+	u_int i;
+
+	ZONE_LOCK(zone);
+	nitems = zone->uz_allocs - zone->uz_frees;
+	CPU_FOREACH(i) {
+		/*
+		 * See the comment in sysctl_vm_zone_stats() regarding the
+		 * safety of accessing the per-cpu caches. With the zone lock
+		 * held, it is safe, but can potentially result in stale data.
+		 */
+		nitems += zone->uz_cpu[i].uc_allocs -
+		    zone->uz_cpu[i].uc_frees;
+	}
+	ZONE_UNLOCK(zone);
+
+	return (nitems < 0 ? 0 : nitems);
+}
+
+/* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 15:40:34 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id E29071065670;
	Sat,  6 Nov 2010 15:40:34 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 96DE58FC08;
	Sat,  6 Nov 2010 15:40:34 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6FeYoc035840;
	Sat, 6 Nov 2010 15:40:34 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6FeYSL035838;
	Sat, 6 Nov 2010 15:40:34 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061540.oA6FeYSL035838@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 15:40:34 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214889 - stable/7/sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 15:40:35 -0000

Author: lstewart
Date: Sat Nov  6 15:40:34 2010
New Revision: 214889
URL: http://svn.freebsd.org/changeset/base/214889

Log:
  MFC r213912:
  
  - Switch the "net.inet.tcp.reass.cursegments" and
    "net.inet.tcp.reass.maxsegments" sysctl variables to be based on UMA zone
    stats. The value returned by the cursegments sysctl is approximate owing to
    the way in which uma_zone_get_cur is implemented.
  
  - Discontinue use of V_tcp_reass_qsize as a global reassembly segment count
    variable in the reassembly implementation. The variable was used without
    proper synchronisation and was duplicating accounting done by UMA already. The
    lack of synchronisation was particularly problematic on SMP systems
    terminating many TCP sessions, resulting in poor TCP performance for
    connections with non-zero packet loss.
  
  The base code from r213912 was modified as part of this MFC in order to work
  correctly on FreeBSD 7.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	andre, gnn, rpaulo (as part of a larger patch)

Modified:
  stable/7/sys/netinet/tcp_reass.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/netinet/tcp_reass.c
==============================================================================
--- stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 15:21:46 2010	(r214888)
+++ stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 15:40:34 2010	(r214889)
@@ -73,17 +73,20 @@ __FBSDID("$FreeBSD$");
 #include 
 #endif /* TCPDEBUG */
 
+static int tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS);
+static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
+
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
 static int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
-    &tcp_reass_maxseg, 0,
+SYSCTL_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+    &tcp_reass_maxseg, 0, &tcp_reass_sysctl_maxseg, "I",
     "Global maximum number of TCP Segments in Reassembly Queue");
 
 static int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
-    &tcp_reass_qsize, 0,
+SYSCTL_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+    &tcp_reass_qsize, 0, &tcp_reass_sysctl_qsize, "I",
     "Global number of TCP Segments currently in Reassembly Queue");
 
 static int tcp_reass_maxqlen = 48;
@@ -133,7 +136,6 @@ tcp_reass_flush(struct tcpcb *tp)
 		m_freem(qe->tqe_m);
 		uma_zfree(tcp_reass_zone, qe);
 		tp->t_segqlen--;
-		tcp_reass_qsize--;
 	}
 
 	KASSERT((tp->t_segqlen == 0),
@@ -141,6 +143,20 @@ tcp_reass_flush(struct tcpcb *tp)
 	    tp, tp->t_segqlen));
 }
 
+static int
+tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS)
+{
+	tcp_reass_maxseg = uma_zone_get_max(tcp_reass_zone);
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static int
+tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
+{
+	tcp_reass_qsize = uma_zone_get_cur(tcp_reass_zone);
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
 int
 tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
 {
@@ -170,12 +186,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 	 * Limit the number of segments in the reassembly queue to prevent
 	 * holding on to too many segments (and thus running out of mbufs).
 	 * Make sure to let the missing segment through which caused this
-	 * queue.  Always keep one global queue entry spare to be able to
-	 * process the missing segment.
+	 * queue.
 	 */
 	if (th->th_seq != tp->rcv_nxt &&
-	    (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
-	     tp->t_segqlen >= tcp_reass_maxqlen)) {
+	     tp->t_segqlen >= tcp_reass_maxqlen) {
 		tcp_reass_overflows++;
 		tcpstat.tcps_rcvmemdrop++;
 		m_freem(m);
@@ -195,7 +209,6 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 		return (0);
 	}
 	tp->t_segqlen++;
-	tcp_reass_qsize++;
 
 	/*
 	 * Find a segment which begins after this one does.
@@ -222,7 +235,6 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 				m_freem(m);
 				uma_zfree(tcp_reass_zone, te);
 				tp->t_segqlen--;
-				tcp_reass_qsize--;
 				/*
 				 * Try to present any queued data
 				 * at the left window edge to the user.
@@ -259,7 +271,6 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 		m_freem(q->tqe_m);
 		uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
-		tcp_reass_qsize--;
 		q = nq;
 	}
 
@@ -296,7 +307,6 @@ present:
 			sbappendstream_locked(&so->so_rcv, q->tqe_m);
 		uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
-		tcp_reass_qsize--;
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
 	ND6_HINT(tp);

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 15:49:59 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id AA510106566B;
	Sat,  6 Nov 2010 15:49:59 +0000 (UTC)
	(envelope-from lstewart@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 97BED8FC13;
	Sat,  6 Nov 2010 15:49:59 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6Fnxaf036075;
	Sat, 6 Nov 2010 15:49:59 GMT (envelope-from lstewart@svn.freebsd.org)
Received: (from lstewart@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6FnxCs036073;
	Sat, 6 Nov 2010 15:49:59 GMT (envelope-from lstewart@svn.freebsd.org)
Message-Id: <201011061549.oA6FnxCs036073@svn.freebsd.org>
From: Lawrence Stewart 
Date: Sat, 6 Nov 2010 15:49:59 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214890 - stable/7/sys/netinet
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 15:49:59 -0000

Author: lstewart
Date: Sat Nov  6 15:49:59 2010
New Revision: 214890
URL: http://svn.freebsd.org/changeset/base/214890

Log:
  MFC r213913:
  
  Retire the system-wide, per-reassembly queue segment limit. The mechanism is far
  too coarse grained to be useful and the default value significantly degrades TCP
  performance on moderate to high bandwidth-delay product paths with non-zero loss
  (e.g. 5+Mbps connections across the public Internet often suffer).
  
  Replace the outgoing mechanism with an individual per-queue limit based on the
  number of MSS segments that fit into the socket's receive buffer.  This should
  strike a good balance between performance and the potential for resource
  exhaustion when FreeBSD is acting as a TCP receiver. With socket buffer
  autotuning (which is enabled by default), the reassembly queue tracks the socket
  buffer and benefits too.
  
  As the XXX comment suggests, my testing uncovered some unexpected behaviour
  which requires further investigation. By using so->so_rcv.sb_hiwat instead of
  sbspace(&so->so_rcv), we allow more segments to be held across both the socket
  receive buffer and reassembly queue than we probably should. The tradeoff is
  better performance in at least one common scenario, versus a devious sender's
  ability to consume more resources on a FreeBSD receiver.
  
  The base code from r213913 was modified as part of this MFC in order to work
  correctly on FreeBSD 7.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	andre, gnn, rpaulo

Modified:
  stable/7/sys/netinet/tcp_reass.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/netinet/tcp_reass.c
==============================================================================
--- stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 15:40:34 2010	(r214889)
+++ stable/7/sys/netinet/tcp_reass.c	Sat Nov  6 15:49:59 2010	(r214890)
@@ -89,11 +89,6 @@ SYSCTL_PROC(_net_inet_tcp_reass, OID_AUT
     &tcp_reass_qsize, 0, &tcp_reass_sysctl_qsize, "I",
     "Global number of TCP Segments currently in Reassembly Queue");
 
-static int tcp_reass_maxqlen = 48;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
-    &tcp_reass_maxqlen, 0,
-    "Maximum number of TCP Segments per individual Reassembly Queue");
-
 static int tcp_reass_overflows = 0;
 SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
     &tcp_reass_overflows, 0,
@@ -183,13 +178,23 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 		goto present;
 
 	/*
-	 * Limit the number of segments in the reassembly queue to prevent
-	 * holding on to too many segments (and thus running out of mbufs).
-	 * Make sure to let the missing segment through which caused this
-	 * queue.
+	 * Limit the number of segments that can be queued to reduce the
+	 * potential for mbuf exhaustion. For best performance, we want to be
+	 * able to queue a full window's worth of segments. The size of the
+	 * socket receive buffer determines our advertised window and grows
+	 * automatically when socket buffer autotuning is enabled. Use it as the
+	 * basis for our queue limit.
+	 * Always let the missing segment through which caused this queue.
+	 * NB: Access to the socket buffer is left intentionally unlocked as we
+	 * can tolerate stale information here.
+	 *
+	 * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
+	 * should work but causes packets to be dropped when they shouldn't.
+	 * Investigate why and re-evaluate the below limit after the behaviour
+	 * is understood.
 	 */
 	if (th->th_seq != tp->rcv_nxt &&
-	     tp->t_segqlen >= tcp_reass_maxqlen) {
+	     tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
 		tcp_reass_overflows++;
 		tcpstat.tcps_rcvmemdrop++;
 		m_freem(m);

From owner-svn-src-stable-7@FreeBSD.ORG  Sat Nov  6 15:56:44 2010
Return-Path: 
Delivered-To: svn-src-stable-7@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id C7316106564A;
	Sat,  6 Nov 2010 15:56:44 +0000 (UTC) (envelope-from bz@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id B47918FC16;
	Sat,  6 Nov 2010 15:56:44 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA6FuiSp036251;
	Sat, 6 Nov 2010 15:56:44 GMT (envelope-from bz@svn.freebsd.org)
Received: (from bz@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA6FuiHb036245;
	Sat, 6 Nov 2010 15:56:44 GMT (envelope-from bz@svn.freebsd.org)
Message-Id: <201011061556.oA6FuiHb036245@svn.freebsd.org>
From: "Bjoern A. Zeeb" 
Date: Sat, 6 Nov 2010 15:56:44 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
X-SVN-Group: stable-7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r214891 - in stable/7/sys: netinet netinet6 netipsec
X-BeenThere: svn-src-stable-7@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 7-stable src tree
	
List-Unsubscribe: , 
	
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: , 
	
X-List-Received-Date: Sat, 06 Nov 2010 15:56:45 -0000

Author: bz
Date: Sat Nov  6 15:56:44 2010
New Revision: 214891
URL: http://svn.freebsd.org/changeset/base/214891

Log:
  MFC r214250:
  
    Make the IPsec SADB embedded route cache a union to be able to hold both the
    legacy and IPv6 route destination address.
    Previously in case of IPv6, there was a memory overwrite due to not enough
    space for the IPv6 address.
  
  PR:		kern/122565

Modified:
  stable/7/sys/netinet/ip_ipsec.c
  stable/7/sys/netinet6/ip6_ipsec.c
  stable/7/sys/netipsec/ipsec_output.c
  stable/7/sys/netipsec/key.c
  stable/7/sys/netipsec/keydb.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/netinet/ip_ipsec.c
==============================================================================
--- stable/7/sys/netinet/ip_ipsec.c	Sat Nov  6 15:49:59 2010	(r214890)
+++ stable/7/sys/netinet/ip_ipsec.c	Sat Nov  6 15:56:44 2010	(r214891)
@@ -220,7 +220,7 @@ ip_ipsec_mtu(struct mbuf *m, int mtu)
 		if (sp->req != NULL &&
 		    sp->req->sav != NULL &&
 		    sp->req->sav->sah != NULL) {
-			ro = &sp->req->sav->sah->sa_route;
+			ro = &sp->req->sav->sah->route_cache.sa_route;
 			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
 				mtu =
 				    ro->ro_rt->rt_rmx.rmx_mtu ?

Modified: stable/7/sys/netinet6/ip6_ipsec.c
==============================================================================
--- stable/7/sys/netinet6/ip6_ipsec.c	Sat Nov  6 15:49:59 2010	(r214890)
+++ stable/7/sys/netinet6/ip6_ipsec.c	Sat Nov  6 15:56:44 2010	(r214891)
@@ -346,7 +346,7 @@ ip6_ipsec_mtu(struct mbuf *m)
 		if (sp->req != NULL &&
 		    sp->req->sav != NULL &&
 		    sp->req->sav->sah != NULL) {
-			ro = &sp->req->sav->sah->sa_route;
+			ro = &sp->req->sav->sah->route_cache.sa_route;
 			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
 				mtu =
 				    ro->ro_rt->rt_rmx.rmx_mtu ?

Modified: stable/7/sys/netipsec/ipsec_output.c
==============================================================================
--- stable/7/sys/netipsec/ipsec_output.c	Sat Nov  6 15:49:59 2010	(r214890)
+++ stable/7/sys/netipsec/ipsec_output.c	Sat Nov  6 15:56:44 2010	(r214891)
@@ -773,7 +773,8 @@ ipsec6_output_tunnel(struct ipsec_output
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 
-		state->ro = &isr->sav->sah->sa_route;
+		state->ro =
+		    (struct route *)&isr->sav->sah->route_cache.sin6_route;
 		state->dst = (struct sockaddr *)&state->ro->ro_dst;
 		dst6 = (struct sockaddr_in6 *)state->dst;
 		if (state->ro->ro_rt

Modified: stable/7/sys/netipsec/key.c
==============================================================================
--- stable/7/sys/netipsec/key.c	Sat Nov  6 15:49:59 2010	(r214890)
+++ stable/7/sys/netipsec/key.c	Sat Nov  6 15:56:44 2010	(r214891)
@@ -2674,9 +2674,9 @@ key_delsah(sah)
 		/* remove from tree of SA index */
 		if (__LIST_CHAINED(sah))
 			LIST_REMOVE(sah, chain);
-		if (sah->sa_route.ro_rt) {
-			RTFREE(sah->sa_route.ro_rt);
-			sah->sa_route.ro_rt = (struct rtentry *)NULL;
+		if (sah->route_cache.sa_route.ro_rt) {
+			RTFREE(sah->route_cache.sa_route.ro_rt);
+			sah->route_cache.sa_route.ro_rt = (struct rtentry *)NULL;
 		}
 		free(sah, M_IPSEC_SAH);
 	}
@@ -7196,7 +7196,7 @@ key_sa_routechange(dst)
 
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &sahtree, chain) {
-		ro = &sah->sa_route;
+		ro = &sah->route_cache.sa_route;
 		if (ro->ro_rt && dst->sa_len == ro->ro_dst.sa_len
 		 && bcmp(dst, &ro->ro_dst, dst->sa_len) == 0) {
 			RTFREE(ro->ro_rt);

Modified: stable/7/sys/netipsec/keydb.h
==============================================================================
--- stable/7/sys/netipsec/keydb.h	Sat Nov  6 15:49:59 2010	(r214890)
+++ stable/7/sys/netipsec/keydb.h	Sat Nov  6 15:56:44 2010	(r214891)
@@ -85,6 +85,12 @@ struct seclifetime {
 	u_int64_t usetime;
 };
 
+union sa_route_union {
+	struct route		sa_route;
+	struct route		sin_route;	/* Duplicate for consistency. */
+	struct route_in6	sin6_route;
+};
+
 /* Security Association Data Base */
 struct secashead {
 	LIST_ENTRY(secashead) chain;
@@ -100,7 +106,7 @@ struct secashead {
 					/* SA chain */
 					/* The first of this list is newer SA */
 
-	struct route sa_route;		/* route cache */
+	union sa_route_union route_cache;
 };
 
 struct xformsw;