Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 29 Aug 2013 14:51:41 -0700
From:      "T.C. Gubatayao" <tgubatayao@barracuda.com>
To:        Alan Somers <asomers@freebsd.org>
Cc:        Jack F Vogel <jfv@freebsd.org>, "Justin T. Gibbs" <gibbs@freebsd.org>, Andre Oppermann <andre@freebsd.org>, "net@freebsd.org" <net@freebsd.org>
Subject:   Re: Flow ID, LACP, and igb
Message-ID:  <49170157-EFC7-44A3-B881-12B4F2644F59@barracuda.com>
In-Reply-To: <C209B12F-A404-47EC-8225-3F5E4123E05E@barracuda.com>
References:  <D01A0CB2-B1E3-4F4B-97FA-4C821C0E3FD2@FreeBSD.org> <521BBD21.4070304@freebsd.org> <CAOtMX2jvKGY==t9i-a_8RtMAPH2p1VDj950nMHHouryoz3nbsA@mail.gmail.com> <521EE8DA.3060107@freebsd.org> <BCC2C62D4FE171479E2F1C2593FE508B0BE24383@BN-SCL-MBX03.Cudanet.local> <CAOtMX2h5SGh5eYV50y%2BQB_s367V9iattGU862wwXcONDV%2BTG8g@mail.gmail.com> <0771FC4F-BCDD-4985-A33F-09951806AD99@barracuda.com> <CAOtMX2i5BXqm4_gP67MEmN8szCabp8_QRKfZM0tqFtbEKS31SA@mail.gmail.com> <C209B12F-A404-47EC-8225-3F5E4123E05E@barracuda.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Aug 29, 2013, at 5:40 PM, T.C. Gubatayao <tgubatayao@barracuda.com> wrot=
e:
> On Aug 29, 2013, at 4:21 PM, Alan Somers <asomers@freebsd.org> wrote:
>
>> They're faster, but even with this change, jenkins_hash is still 6 times
>> slower than FNV hash.
>
> Actually, I think your test isn't accurately simulating memory access, wh=
ich
> might be skewing the results.
>
> For example, from net/if_lagg.c:
>
>                p =3D hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
>                p =3D hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
>
> These two calls can't both be aligned, since ETHER_ADDR_LEN is 6 octets. =
 The
> same is true for the other hashed fields in the IP and TCP/UDP headers.
> Assuming the mbuf data pointer is aligned, the IP addresses and ports are=
 both
> on 2-byte alignments (without VLAN or IP options).  In your test, they're=
 all
> aligned and in the same cache line.
>
> When I modify the test to simulate an mbuf, lookup3 beats FNV and hash32,=
 and
> SipHash is only 2-3 times slower.
>
>> Also, your technique of copying the hashable fields into a separate buff=
er
>> would need modification to work with different types of packet and diffe=
rent
>> LAGG_F_HASH[234] flags.  Because different packets have different hashab=
le
>> fields, struct key would need to be expanded to include the vlan tag, IP=
V6
>> addresses, and IPv6 flowid.  lagg_hashmbuf would then have to zero the u=
nused
>> fields.
>
> Agreed, but this is relatively simple with a buffer on the stack, and doe=
s not
> require zeroes or padding.  See my modified test, attached.
>
> T.C.

Attachment was stripped.

--- a/lagg_hash.c       2013-08-29 14:21:17.255307349 -0400
+++ b/lagg_hash.c       2013-08-29 17:26:14.055404918 -0400
@@ -7,35 +7,63 @@
 #include <sys/hash.h>
 #include <sys/fnv_hash.h>
 #include <sys/time.h>
-
-uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
+#include <string.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
=20
 #define ITERATIONS     100000000
=20
-typedef uint32_t do_hash_t(void);
+typedef uint32_t do_hash_t(uint32_t);
+
+/*
+ * Simulate mbuf data for a packet.
+ * No VLAN tagging and no IP options.
+ */
+struct _mbuf {
+       struct ether_header eh;
+       struct ip ip;
+       struct tcphdr th;
+} __attribute__((packed)) m =3D {
+       {
+               .ether_dhost =3D { 181, 16, 73, 9, 219, 22 },
+               .ether_shost =3D { 69, 170, 210, 11, 24, 120 },
+               .ether_type =3D 0x008
+       },
+       {
+               .ip_src.s_addr =3D 1329258245,
+               .ip_dst.s_addr =3D 1319097119,
+               .ip_p =3D 0x06
+       },
+       {
+               .th_sport =3D 12506,
+               .th_dport =3D 47804
+       }
+};
=20
-// Pad the MACs with 0s because jenkins_hash operates on 32-bit inputs
-const uint8_t ether_shost[] =3D {181, 16, 73, 9, 219, 22, 0, 0};
-const uint8_t ether_dhost[] =3D {69, 170, 210, 111, 24, 120, 0, 0};
-const struct in_addr ip_src =3D {.s_addr =3D 1329258245};
-const struct in_addr ip_dst =3D {.s_addr =3D 1319097119};
-const uint32_t ports =3D 3132895450;
 const uint8_t sipkey[16] =3D {7, 239, 255, 43, 68, 53, 56, 225,
                            98, 81, 177, 80, 92, 235, 242, 39};
=20
+#define LAGG_F_HASHL2  0x1
+#define LAGG_F_HASHL3  0x2
+#define LAGG_F_HASHL4  0x4
+#define LAGG_F_HASHALL (LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4)
+
 /*
  * Simulate how lagg_hashmbuf uses FNV hash for a TCP/IP packet
  * No VLAN tagging
  */
-uint32_t do_fnv(void)
+uint32_t do_fnv(uint32_t flags)
 {
        uint32_t p =3D FNV1_32_INIT;
=20
-       p =3D fnv_32_buf(ether_shost, 6, p);
-       p =3D fnv_32_buf(ether_dhost, 6, p);
-       p =3D fnv_32_buf(&ip_src, sizeof(struct in_addr), p);
-       p =3D fnv_32_buf(&ip_dst, sizeof(struct in_addr), p);
-       p =3D fnv_32_buf(&ports, sizeof(ports), p);
+       if (flags & LAGG_F_HASHL2)
+               p =3D fnv_32_buf(&m.eh.ether_dhost, 12, p);
+       if (flags & LAGG_F_HASHL3)
+               p =3D fnv_32_buf(&m.ip.ip_src, 8, p);
+       if (flags & LAGG_F_HASHL4)
+               p =3D fnv_32_buf(&m.th.th_sport, 4, p);
+
        return (p);
 }
=20
@@ -43,59 +71,74 @@
  * Simulate how lagg_hashmbuf uses hash32 for a TCP/IP packet
  * No VLAN tagging
  */
-uint32_t do_hash32(void)
+uint32_t do_hash32(uint32_t flags)
 {
        // Actually, if_lagg used a pseudorandom number determined at inter=
face
        // creation time.  But this should have the same timing
        // characteristics.
        uint32_t p =3D HASHINIT;
=20
-       p =3D hash32_buf(ether_shost, 6, p);
-       p =3D hash32_buf(ether_dhost, 6, p);
-       p =3D hash32_buf(&ip_src, sizeof(struct in_addr), p);
-       p =3D hash32_buf(&ip_dst, sizeof(struct in_addr), p);
-       p =3D hash32_buf(&ports, sizeof(ports), p);
+       if (flags & LAGG_F_HASHL2)
+               p =3D hash32_buf(&m.eh.ether_dhost, 12, p);
+       if (flags & LAGG_F_HASHL3)
+               p =3D hash32_buf(&m.ip.ip_src, 8, p);
+       if (flags & LAGG_F_HASHL4)
+               p =3D hash32_buf(&m.th.th_sport, 4, p);
+
        return (p);
 }
=20
+/* Simulate copying the info out of the mbuf. */
+static __inline size_t init_key(char *key, uint32_t flags)
+{
+       uint16_t etype;
+       size_t len =3D 0;
+
+       if (flags & LAGG_F_HASHL2) {
+               memcpy(key + len, &m.eh.ether_dhost, 12);
+               len +=3D 12;
+       }
+
+       if (flags & LAGG_F_HASHL3) {
+               memcpy(key + len, &m.ip.ip_src, 8);
+               len +=3D 8;
+       }
+
+       if (flags & LAGG_F_HASHL4) {
+               memcpy(key + len, &m.th.th_sport, 4);
+               len +=3D 4;
+       }
+
+       return (len);
+}
+
 /*
  * Simulate how lagg_hashmbuf would use siphash24 for a TCP/IP packet
  * No VLAN tagging
  */
-uint32_t do_siphash24(void)
+uint32_t do_siphash24(uint32_t flags)
 {
        SIPHASH_CTX ctx;
+       char key[26];
+       size_t len;
=20
-       SipHash24_Init(&ctx);
-       SipHash_SetKey(&ctx, sipkey);
+       len =3D init_key(key, flags);
=20
-       SipHash_Update(&ctx, ether_shost, 6);
-       SipHash_Update(&ctx, ether_dhost, 6);
-       SipHash_Update(&ctx, &ip_src, sizeof(struct in_addr));
-       SipHash_Update(&ctx, &ip_dst, sizeof(struct in_addr));
-       SipHash_Update(&ctx, &ports, sizeof(ports));
-       return (SipHash_End(&ctx) & 0xFFFFFFFF);
+       return (SipHash24(&ctx, sipkey, key, len) & 0xFFFFFFFF);
 }
=20
 /*
  * Simulate how lagg_hashmbuf would use lookup3 aka jenkins_hash
  * No VLAN tagging
  */
-uint32_t do_jenkins(void)
+uint32_t do_jenkins(uint32_t flags)
 {
-       /* Jenkins hash does not recommend any specific initializer */
-       uint32_t p =3D FNV1_32_INIT;
+       char key[26];
+       size_t len;
=20
-       /*=20
-        * jenkins_hash uses 32-bit inputs, so we need to present the MACs =
as
-        * arrays of 2 32-bit values
-        */
-       p =3D jenkins_hash32((uint32_t*)ether_shost, 2, p);
-       p =3D jenkins_hash32((uint32_t*)ether_dhost, 2, p);
-       p =3D jenkins_hash32((uint32_t*)&ip_src, sizeof(struct in_addr) / 4=
, p);
-       p =3D jenkins_hash32((uint32_t*)&ip_dst, sizeof(struct in_addr) / 4=
, p);
-       p =3D jenkins_hash32(&ports, sizeof(ports) / 4, p);
-       return (p);
+       len =3D init_key(key, flags);
+
+       return (jenkins_hash(key, len, FNV1_32_INIT));
 }
=20
=20
@@ -120,7 +163,7 @@
=20
                gettimeofday(&tv_old, NULL);
                for (j=3D0; j<ITERATIONS; j++)
-                       funcs[i].f();
+                       funcs[i].f(LAGG_F_HASHALL);
                gettimeofday(&tv_new, NULL);
                timersub(&tv_new, &tv_old, &tv_diff);
                t =3D tv_diff.tv_sec + tv_diff.tv_usec / 1000000.;




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?49170157-EFC7-44A3-B881-12B4F2644F59>