From owner-svn-src-projects@FreeBSD.ORG Fri Nov 23 14:00:27 2012 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 6EAA757A; Fri, 23 Nov 2012 14:00:27 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 48A4D8FC08; Fri, 23 Nov 2012 14:00:27 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qANE0RnU026905; Fri, 23 Nov 2012 14:00:27 GMT (envelope-from glebius@svn.freebsd.org) Received: (from glebius@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qANE0RpS026902; Fri, 23 Nov 2012 14:00:27 GMT (envelope-from glebius@svn.freebsd.org) Message-Id: <201211231400.qANE0RpS026902@svn.freebsd.org> From: Gleb Smirnoff Date: Fri, 23 Nov 2012 14:00:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r243453 - projects/counters/sys/netinet X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 23 Nov 2012 14:00:27 -0000 Author: glebius Date: Fri Nov 23 14:00:26 2012 New Revision: 243453 URL: http://svnweb.freebsd.org/changeset/base/243453 Log: Collect IP statistics in per-cpu 64-bit counters. This way we shoot two hares with one shot: - Parallel threads no longer invalidate the cache lines where old struct ipstat resided. - Parallel non-atomic writes no longer lose statistics. Old 'struct ipstat' left only as interface to userland, however all fields converted to uint64_t. Yes, this break ABI on 32-bit arches, but now statistics will not overflow. Old 'struct ipstat' was imported as SYSCTL_STRUCT(... CTLFLAG_RW ...), thus could not be only zeroed, but filled in with fake values. This is no longer possible - any attempt to write to statictics zeroes them, without accepting userland supplied info. Modified: projects/counters/sys/netinet/ip_input.c projects/counters/sys/netinet/ip_var.h Modified: projects/counters/sys/netinet/ip_input.c ============================================================================== --- projects/counters/sys/netinet/ip_input.c Fri Nov 23 13:55:38 2012 (r243452) +++ projects/counters/sys/netinet/ip_input.c Fri Nov 23 14:00:26 2012 (r243453) @@ -153,11 +153,6 @@ VNET_DEFINE(struct in_ifaddrhead, in_ifa VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ -VNET_DEFINE(struct ipstat, ipstat); -SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, - &VNET_NAME(ipstat), ipstat, - "IP statistics (struct ipstat, netinet/ip_var.h)"); - static VNET_DEFINE(uma_zone_t, ipq_zone); static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]); static struct mtx ipqlock; @@ -213,6 +208,175 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, static void ip_freef(struct ipqhead *, struct ipq *); /* + * ipstat + * XXXGL: more words here. + */ +VNET_DEFINE(struct ipstat_p, ipstatp); + +static void +ipstat_zero() +{ + counter_u64_zero(V_ipstatp.ips_total); + counter_u64_zero(V_ipstatp.ips_badsum); + counter_u64_zero(V_ipstatp.ips_tooshort); + counter_u64_zero(V_ipstatp.ips_toosmall); + counter_u64_zero(V_ipstatp.ips_badhlen); + counter_u64_zero(V_ipstatp.ips_badlen); + counter_u64_zero(V_ipstatp.ips_fragments); + counter_u64_zero(V_ipstatp.ips_fragdropped); + counter_u64_zero(V_ipstatp.ips_fragtimeout); + counter_u64_zero(V_ipstatp.ips_forward); + counter_u64_zero(V_ipstatp.ips_fastforward); + counter_u64_zero(V_ipstatp.ips_cantforward); + counter_u64_zero(V_ipstatp.ips_redirectsent); + counter_u64_zero(V_ipstatp.ips_noproto); + counter_u64_zero(V_ipstatp.ips_delivered); + counter_u64_zero(V_ipstatp.ips_localout); + counter_u64_zero(V_ipstatp.ips_odropped); + counter_u64_zero(V_ipstatp.ips_reassembled); + counter_u64_zero(V_ipstatp.ips_fragmented); + counter_u64_zero(V_ipstatp.ips_ofragments); + counter_u64_zero(V_ipstatp.ips_cantfrag); + counter_u64_zero(V_ipstatp.ips_badoptions); + counter_u64_zero(V_ipstatp.ips_noroute); + counter_u64_zero(V_ipstatp.ips_badvers); + counter_u64_zero(V_ipstatp.ips_rawout); + counter_u64_zero(V_ipstatp.ips_toolong); + counter_u64_zero(V_ipstatp.ips_notmember); + counter_u64_zero(V_ipstatp.ips_nogif); + counter_u64_zero(V_ipstatp.ips_badaddr); +} + +static void +vnet_ipstatp_init(const void *unused) +{ + + V_ipstatp.ips_total = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badsum = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_tooshort = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_toosmall = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badhlen = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badlen = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_fragments = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_fragdropped = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_fragtimeout = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_forward = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_fastforward = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_cantforward = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_redirectsent = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_noproto = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_delivered = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_localout = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_odropped = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_reassembled = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_fragmented = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_ofragments = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_cantfrag = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badoptions = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_noroute = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badvers = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_rawout = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_toolong = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_notmember = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_nogif = counter_u64_alloc(M_WAITOK); + V_ipstatp.ips_badaddr = counter_u64_alloc(M_WAITOK); + + ipstat_zero(); +} +VNET_SYSINIT(vnet_ipstatp_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipstatp_init, NULL); + +#ifdef VIMAGE +static void +vnet_ipstatp_uninit(const void *unused) +{ + + counter_u64_free(V_ipstatp.ips_total); + counter_u64_free(V_ipstatp.ips_badsum); + counter_u64_free(V_ipstatp.ips_tooshort); + counter_u64_free(V_ipstatp.ips_toosmall); + counter_u64_free(V_ipstatp.ips_badhlen); + counter_u64_free(V_ipstatp.ips_badlen); + counter_u64_free(V_ipstatp.ips_fragments); + counter_u64_free(V_ipstatp.ips_fragdropped); + counter_u64_free(V_ipstatp.ips_fragtimeout); + counter_u64_free(V_ipstatp.ips_forward); + counter_u64_free(V_ipstatp.ips_fastforward); + counter_u64_free(V_ipstatp.ips_cantforward); + counter_u64_free(V_ipstatp.ips_redirectsent); + counter_u64_free(V_ipstatp.ips_noproto); + counter_u64_free(V_ipstatp.ips_delivered); + counter_u64_free(V_ipstatp.ips_localout); + counter_u64_free(V_ipstatp.ips_odropped); + counter_u64_free(V_ipstatp.ips_reassembled); + counter_u64_free(V_ipstatp.ips_fragmented); + counter_u64_free(V_ipstatp.ips_ofragments); + counter_u64_free(V_ipstatp.ips_cantfrag); + counter_u64_free(V_ipstatp.ips_badoptions); + counter_u64_free(V_ipstatp.ips_noroute); + counter_u64_free(V_ipstatp.ips_badvers); + counter_u64_free(V_ipstatp.ips_rawout); + counter_u64_free(V_ipstatp.ips_toolong); + counter_u64_free(V_ipstatp.ips_notmember); + counter_u64_free(V_ipstatp.ips_nogif); + counter_u64_free(V_ipstatp.ips_badaddr); +} +VNET_SYSUNINIT(vnet_ipstatp_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipstatp_uninit, NULL); +#endif /* VIMAGE */ + +static int +ipstat_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct ipstat ipstat; + + ipstat.ips_total = counter_u64_fetch(V_ipstatp.ips_total); + ipstat.ips_badsum = counter_u64_fetch(V_ipstatp.ips_badsum); + ipstat.ips_tooshort = counter_u64_fetch(V_ipstatp.ips_tooshort); + ipstat.ips_toosmall = counter_u64_fetch(V_ipstatp.ips_toosmall); + ipstat.ips_badhlen = counter_u64_fetch(V_ipstatp.ips_badhlen); + ipstat.ips_badlen = counter_u64_fetch(V_ipstatp.ips_badlen); + ipstat.ips_fragments = counter_u64_fetch(V_ipstatp.ips_fragments); + ipstat.ips_fragdropped = counter_u64_fetch(V_ipstatp.ips_fragdropped); + ipstat.ips_fragtimeout = counter_u64_fetch(V_ipstatp.ips_fragtimeout); + ipstat.ips_forward = counter_u64_fetch(V_ipstatp.ips_forward); + ipstat.ips_fastforward = counter_u64_fetch(V_ipstatp.ips_fastforward); + ipstat.ips_cantforward = counter_u64_fetch(V_ipstatp.ips_cantforward); + ipstat.ips_redirectsent = counter_u64_fetch(V_ipstatp.ips_redirectsent); + ipstat.ips_noproto = counter_u64_fetch(V_ipstatp.ips_noproto); + ipstat.ips_delivered = counter_u64_fetch(V_ipstatp.ips_delivered); + ipstat.ips_localout = counter_u64_fetch(V_ipstatp.ips_localout); + ipstat.ips_odropped = counter_u64_fetch(V_ipstatp.ips_odropped); + ipstat.ips_reassembled = counter_u64_fetch(V_ipstatp.ips_reassembled); + ipstat.ips_fragmented = counter_u64_fetch(V_ipstatp.ips_fragmented); + ipstat.ips_ofragments = counter_u64_fetch(V_ipstatp.ips_ofragments); + ipstat.ips_cantfrag = counter_u64_fetch(V_ipstatp.ips_cantfrag); + ipstat.ips_badoptions = counter_u64_fetch(V_ipstatp.ips_badoptions); + ipstat.ips_noroute = counter_u64_fetch(V_ipstatp.ips_noroute); + ipstat.ips_badvers = counter_u64_fetch(V_ipstatp.ips_badvers); + ipstat.ips_rawout = counter_u64_fetch(V_ipstatp.ips_rawout); + ipstat.ips_toolong = counter_u64_fetch(V_ipstatp.ips_toolong); + ipstat.ips_notmember = counter_u64_fetch(V_ipstatp.ips_notmember); + ipstat.ips_nogif = counter_u64_fetch(V_ipstatp.ips_nogif); + ipstat.ips_badaddr = counter_u64_fetch(V_ipstatp.ips_badaddr); + + /* + * Old interface allowed to rewrite 'struct ipstat', and netstat(1) + * used it to zero the structure. To keep compatibility with old + * netstat(1) we will zero out statistics on every write attempt, + * however we no longer support writing arbitrary fake values to + * the statistics. + */ + if (req->newptr) + ipstat_zero(); + + return (SYSCTL_OUT(req, &ipstat, sizeof(ipstat))); +} + +SYSCTL_VNET_PROC(_net_inet_ip, IPCTL_STATS, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, + NULL, 0, ipstat_sysctl, "I", + "IP statistics (struct ipstat, netinet/ip_var.h)"); +/* * Kernel module interface for updating ipstat. The argument is an index * into ipstat treated as an array of u_long. While this encodes the general * layout of ipstat into the caller, it doesn't encode its location, so that @@ -223,14 +387,14 @@ void kmod_ipstat_inc(int statnum) { - (*((u_long *)&V_ipstat + statnum))++; + counter_u64_inc((counter_u64_t )&V_ipstatp + statnum, 1); } void kmod_ipstat_dec(int statnum) { - (*((u_long *)&V_ipstat + statnum))--; + counter_u64_dec((counter_u64_t )&V_ipstatp + statnum, 1); } static int Modified: projects/counters/sys/netinet/ip_var.h ============================================================================== --- projects/counters/sys/netinet/ip_var.h Fri Nov 23 13:55:38 2012 (r243452) +++ projects/counters/sys/netinet/ip_var.h Fri Nov 23 14:00:26 2012 (r243453) @@ -97,47 +97,83 @@ struct ip_moptions { }; struct ipstat { - u_long ips_total; /* total packets received */ - u_long ips_badsum; /* checksum bad */ - u_long ips_tooshort; /* packet too short */ - u_long ips_toosmall; /* not enough data */ - u_long ips_badhlen; /* ip header length < data size */ - u_long ips_badlen; /* ip length < ip header length */ - u_long ips_fragments; /* fragments received */ - u_long ips_fragdropped; /* frags dropped (dups, out of space) */ - u_long ips_fragtimeout; /* fragments timed out */ - u_long ips_forward; /* packets forwarded */ - u_long ips_fastforward; /* packets fast forwarded */ - u_long ips_cantforward; /* packets rcvd for unreachable dest */ - u_long ips_redirectsent; /* packets forwarded on same net */ - u_long ips_noproto; /* unknown or unsupported protocol */ - u_long ips_delivered; /* datagrams delivered to upper level*/ - u_long ips_localout; /* total ip packets generated here */ - u_long ips_odropped; /* lost packets due to nobufs, etc. */ - u_long ips_reassembled; /* total packets reassembled ok */ - u_long ips_fragmented; /* datagrams successfully fragmented */ - u_long ips_ofragments; /* output fragments created */ - u_long ips_cantfrag; /* don't fragment flag was set, etc. */ - u_long ips_badoptions; /* error in option processing */ - u_long ips_noroute; /* packets discarded due to no route */ - u_long ips_badvers; /* ip version != 4 */ - u_long ips_rawout; /* total raw ip packets generated */ - u_long ips_toolong; /* ip length > max ip packet size */ - u_long ips_notmember; /* multicasts for unregistered grps */ - u_long ips_nogif; /* no match gif found */ - u_long ips_badaddr; /* invalid address on header */ + uint64_t ips_total; /* total packets received */ + uint64_t ips_badsum; /* checksum bad */ + uint64_t ips_tooshort; /* packet too short */ + uint64_t ips_toosmall; /* not enough data */ + uint64_t ips_badhlen; /* ip header length < data size */ + uint64_t ips_badlen; /* ip length < ip header length */ + uint64_t ips_fragments; /* fragments received */ + uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */ + uint64_t ips_fragtimeout; /* fragments timed out */ + uint64_t ips_forward; /* packets forwarded */ + uint64_t ips_fastforward; /* packets fast forwarded */ + uint64_t ips_cantforward; /* packets rcvd for unreachable dest */ + uint64_t ips_redirectsent; /* packets forwarded on same net */ + uint64_t ips_noproto; /* unknown or unsupported protocol */ + uint64_t ips_delivered; /* datagrams delivered to upper level*/ + uint64_t ips_localout; /* total ip packets generated here */ + uint64_t ips_odropped; /* lost packets due to nobufs, etc. */ + uint64_t ips_reassembled; /* total packets reassembled ok */ + uint64_t ips_fragmented; /* datagrams successfully fragmented */ + uint64_t ips_ofragments; /* output fragments created */ + uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */ + uint64_t ips_badoptions; /* error in option processing */ + uint64_t ips_noroute; /* packets discarded due to no route */ + uint64_t ips_badvers; /* ip version != 4 */ + uint64_t ips_rawout; /* total raw ip packets generated */ + uint64_t ips_toolong; /* ip length > max ip packet size */ + uint64_t ips_notmember; /* multicasts for unregistered grps */ + uint64_t ips_nogif; /* no match gif found */ + uint64_t ips_badaddr; /* invalid address on header */ }; #ifdef _KERNEL +#include #include +/* Should match 'struct ipstat' above. */ +struct ipstat_p { + counter_u64_t ips_total; + counter_u64_t ips_badsum; + counter_u64_t ips_tooshort; + counter_u64_t ips_toosmall; + counter_u64_t ips_badhlen; + counter_u64_t ips_badlen; + counter_u64_t ips_fragments; + counter_u64_t ips_fragdropped; + counter_u64_t ips_fragtimeout; + counter_u64_t ips_forward; + counter_u64_t ips_fastforward; + counter_u64_t ips_cantforward; + counter_u64_t ips_redirectsent; + counter_u64_t ips_noproto; + counter_u64_t ips_delivered; + counter_u64_t ips_localout; + counter_u64_t ips_odropped; + counter_u64_t ips_reassembled; + counter_u64_t ips_fragmented; + counter_u64_t ips_ofragments; + counter_u64_t ips_cantfrag; + counter_u64_t ips_badoptions; + counter_u64_t ips_noroute; + counter_u64_t ips_badvers; + counter_u64_t ips_rawout; + counter_u64_t ips_toolong; + counter_u64_t ips_notmember; + counter_u64_t ips_nogif; + counter_u64_t ips_badaddr; +}; +VNET_DECLARE(struct ipstat_p, ipstatp); +#define V_ipstatp VNET(ipstatp) + /* * In-kernel consumers can use these accessor macros directly to update * stats. */ -#define IPSTAT_ADD(name, val) V_ipstat.name += (val) -#define IPSTAT_SUB(name, val) V_ipstat.name -= (val) +#define IPSTAT_ADD(name, val) counter_u64_inc(V_ipstatp.name, (val)) +#define IPSTAT_SUB(name, val) counter_u64_dec(V_ipstatp.name, (val)) #define IPSTAT_INC(name) IPSTAT_ADD(name, 1) #define IPSTAT_DEC(name) IPSTAT_SUB(name, 1) @@ -146,10 +182,10 @@ struct ipstat { */ void kmod_ipstat_inc(int statnum); #define KMOD_IPSTAT_INC(name) \ - kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long)) + kmod_ipstat_inc(offsetof(struct ipstat_p, name) / sizeof(counter_u64_t)) void kmod_ipstat_dec(int statnum); #define KMOD_IPSTAT_DEC(name) \ - kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long)) + kmod_ipstat_dec(offsetof(struct ipstat_p, name) / sizeof(counter_u64_t)) /* flags passed to ip_output as last parameter */ #define IP_FORWARDING 0x1 /* most of ip header exists */ @@ -176,7 +212,6 @@ struct inpcb; struct route; struct sockopt; -VNET_DECLARE(struct ipstat, ipstat); VNET_DECLARE(u_short, ip_id); /* ip packet ctr, for ids */ VNET_DECLARE(int, ip_defttl); /* default IP ttl */ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ @@ -192,7 +227,6 @@ VNET_DECLARE(int, rsvp_on); VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; -#define V_ipstat VNET(ipstat) #define V_ip_id VNET(ip_id) #define V_ip_defttl VNET(ip_defttl) #define V_ipforwarding VNET(ipforwarding)