Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Aug 2017 10:31:33 +0200
From:      Hans Petter Selasky <hps@selasky.org>
To:        Ben RUBSON <ben.rubson@gmail.com>, FreeBSD Net <freebsd-net@freebsd.org>,  jch <jch@FreeBSD.org>, hiren <hiren@strugglingcoder.info>, Slawa Olhovchenkov <slw@zxy.spb.ru>, FreeBSD Stable <freebsd-stable@freebsd.org>
Subject:   Re: mlx4en, timer irq @100%... (11.0 stuck on high network load ???)
Message-ID:  <c05c2b1c-b5a8-c39c-6dff-e6cc0d8642bf@selasky.org>
In-Reply-To: <3ECCFBF1-18D9-4E33-8F39-0C366C3BB8B4@gmail.com>
References:  <BF3A3E47-A726-49FB-B83F-666EFC5C0FF1@gmail.com> <c3fdf0bb-833d-915f-c246-9459819cd825@selasky.org> <4DF74CB8-23D2-4CCF-B699-5B86DAEA65E5@gmail.com> <40602CEA-D417-4E5B-8C68-916958D49A0B@gmail.com> <9c306f10-7c05-d28d-e551-a930603aaafa@selasky.org> <896dd782-cb2c-0259-65d1-b00daae452de@FreeBSD.org> <0DB9F6FF-8BC9-48F5-B359-AC1905B9EB06@gmail.com> <7f14c95d-1ef8-bf82-c469-e6566c3aba66@selasky.org> <76A5EE7E-1D2E-46B4-86F1-F219C3DCE6EA@gmail.com> <e6f9df1c-8b55-8a3b-9f44-e67c26561543@selasky.org> <4C91C6E5-0725-42E7-9813-1F3ACF3DDD6E@gmail.com> <5840c25e-7472-3276-6df9-1ed4183078ad@selasky.org> <2ADA8C57-2C2D-4F97-9F0B-82D53EDDC649@gmail.com> <061cdf72-6285-8239-5380-58d9d19a1ef7@selasky.org> <92BEE83D-498F-47D5-A53C-39DCDC00A0FD@gmail.com> <5d8960d8-e1ff-8719-320f-d3ae84054714@selasky.org> <6B4A35F7-5694-4945-9575-19ADB678F9FA@gmail.com> <297a784a-3d80-b1a6-652e-a78621fe5a8b@selasky.org> <3ECCFBF1-18D9-4E33-8F39-0C366C3BB8B4@gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------FE67B731424B1568ADC4B17D
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 7bit

On 08/08/17 10:06, Ben RUBSON wrote:
>> On 08 Aug 2017, at 10:02, Hans Petter Selasky <hps@selasky.org> wrote:
>>
>> On 08/08/17 10:00, Ben RUBSON wrote:
>>> kgdb) print *twq_2msl.tqh_first
>>> $2 = {
>>>    tw_inpcb = 0xfffff8031c570740,
>>
>> print *twq_2msl.tqh_first->tw_inpcb
> 
> (kgdb) print *twq_2msl.tqh_first->tw_inpcb
> $3 = {
>    inp_hash = {
>      le_next = 0x0,
>      le_prev = 0xfffffe000f78adb8
>    },
>    inp_pcbgrouphash = {
>      le_next = 0x0,
>      le_prev = 0x0
>    },
>    inp_list = {
>      le_next = 0xfffff80c2a07f570,
>      le_prev = 0xffffffff81e15e20
>    },
>    inp_ppcb = 0xfffff80d1bf12210,
>    inp_pcbinfo = 0xffffffff81e15e28,
>    inp_pcbgroup = 0x0,
>    inp_pcbgroup_wild = {
>      le_next = 0x0,
>      le_prev = 0x0
>    },
>    inp_socket = 0x0,
>    inp_cred = 0xfffff804ae6ca400,
>    inp_flow = 0,
>    inp_flags = 92274688,
>    inp_flags2 = 16,
>    inp_vflag = 0 '\0',
>    inp_ip_ttl = 64 '@',
>    inp_ip_p = 0 '\0',
>    inp_ip_minttl = 0 '\0',
>    inp_flowid = 946611505,
>    inp_refcount = 2,
>    inp_pspare = 0xfffff8031c5707c0,
>    inp_flowtype = 191,
>    inp_rss_listen_bucket = 0,
>    inp_ispare = 0xfffff8031c5707f0,
>    inp_inc = {
>      inc_flags = 0 '\0',
>      inc_len = 0 '\0',
>      inc_fibnum = 0,
>      inc_ie = {
>        ie_fport = 53987,
>        ie_lport = 47873,
>        ie_dependfaddr = {
>          ie46_foreign = {
>            ia46_pad32 = 0xfffff8031c570808,
>            ia46_addr4 = {
>              s_addr = 3011802202
>            }
>          },
>          ie6_foreign = {
>            __u6_addr = {
>              __u6_addr8 = 0xfffff8031c570808 "",
>              __u6_addr16 = 0xfffff8031c570808,
>              __u6_addr32 = 0xfffff8031c570808
>            }
>          }
>        },
>        ie_dependladdr = {
>          ie46_local = {
>            ia46_pad32 = 0xfffff8031c570818,
>            ia46_addr4 = {
>              s_addr = 4068705883
>            }
>          },
>          ie6_local = {
>            __u6_addr = {
>              __u6_addr8 = 0xfffff8031c570818 "",
>              __u6_addr16 = 0xfffff8031c570818,
>              __u6_addr32 = 0xfffff8031c570818
>            }
>          }
>        },
>        ie6_zoneid = 0
>      }
>    },
>    inp_label = 0x0,
>    inp_sp = 0x0,
>    inp_depend4 = {
>      inp4_ip_tos = 0 '\0',
>      inp4_options = 0x0,
>      inp4_moptions = 0x0
>    },
>    inp_depend6 = {
>      inp6_options = 0x0,
>      inp6_outputopts = 0x0,
>      inp6_moptions = 0x0,
>      inp6_icmp6filt = 0x0,
>      inp6_cksum = 0,
>      inp6_hops = 0
>    },
>    inp_portlist = {
>      le_next = 0xfffff80274298ae0,
>      le_prev = 0xfffff800454999b0
>    },
>    inp_phd = 0xfffff800454999a0,
>    inp_gencnt = 2119756,
>    inp_lle = 0x0,
>    inp_lock = {
>      lock_object = {
>        lo_name = 0xffffffff814e6940 "tcpinp",
>        lo_flags = 90898432,
>        lo_data = 0,
>        lo_witness = 0x0
>      },
>      rw_lock = 18446735277871559936
>    },
>    inp_rt_cookie = 10,
>    inp_rtu = {
>      inpu_route = {
>        ro_rt = 0x0,
>        ro_lle = 0x0,
>        ro_prepend = 0x0,
>        ro_plen = 0,
>        ro_flags = 384,
>        ro_mtu = 0,
>        spare = 0,
>        ro_dst = {
>          sa_len = 16 '\020',
>          sa_family = 2 '\002',
>          sa_data = 0xfffff8031c5708f2 ""
>        }
>      },
>      inpu_route6 = {
>        ro_rt = 0x0,
>        ro_lle = 0x0,
>        ro_prepend = 0x0,
>        ro_plen = 0,
>        ro_flags = 384,
>        ro_mtu = 0,
>        spare = 0,
>        ro_dst = {
>          sin6_len = 16 '\020',
>          sin6_family = 2 '\002',
>          sin6_port = 0,
>          sin6_flowinfo = 3011802202,
>          sin6_addr = {
>            __u6_addr = {
>              __u6_addr8 = 0xfffff8031c5708f8 "",
>              __u6_addr16 = 0xfffff8031c5708f8,
>              __u6_addr32 = 0xfffff8031c5708f8
>            }
>          },
>          sin6_scope_id = 0
>        }
>      }
>    }
> }
> (kgdb)
> 

Hi,

Here is the conclusion:

The following code is going in an infinite loop:


>         for (;;) {
>                 TW_RLOCK(V_tw_lock);
>                 tw = TAILQ_FIRST(&V_twq_2msl);
>                 if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
>                         TW_RUNLOCK(V_tw_lock);
>                         break;
>                 }
>                 KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
>                     __func__));
> 
>                 inp = tw->tw_inpcb;
>                 in_pcbref(inp);
>                 TW_RUNLOCK(V_tw_lock);
> 
>                 if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
> 
>                         INP_WLOCK(inp);
>                         tw = intotw(inp);
>                         if (in_pcbrele_wlocked(inp)) {

in_pcbrele_wlocked() returns (1) because INP_FREED (16) is set in 
inp->inp_flags2. I guess you have invariants disabled, because the 
KASSERT() below should have caused a panic.

>                                 KASSERT(tw == NULL, ("%s: held last inp "
>                                     "reference but tw not NULL", __func__));
>                                 INP_INFO_RUNLOCK(&V_tcbinfo);
>                                 continue;
>                         }

This is a regression issue after:

> commit 5630210a7f1dbbd903b77b2aef939cd47c63da58
> Author: jch <jch@FreeBSD.org>
> Date:   Thu Oct 30 08:53:56 2014 +0000
> 
>     Fix a race condition in TCP timewait between tcp_tw_2msl_reuse() and
>     tcp_tw_2msl_scan().  This race condition drives unplanned timewait
>     timeout cancellation.  Also simplify implementation by holding inpcb
>     reference and removing tcptw reference counting.

Suggested fix attached.

--HPS

--------------FE67B731424B1568ADC4B17D
Content-Type: text/x-patch;
 name="tcp_timewait.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="tcp_timewait.diff"

Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c	(revision 321981)
+++ sys/netinet/tcp_timewait.c	(working copy)
@@ -709,10 +709,11 @@
 			INP_WLOCK(inp);
 			tw = intotw(inp);
 			if (in_pcbrele_wlocked(inp)) {
-				KASSERT(tw == NULL, ("%s: held last inp "
-				    "reference but tw not NULL", __func__));
 				INP_INFO_RUNLOCK(&V_tcbinfo);
-				continue;
+				if (tw == NULL)
+					continue;
+				else
+					break;	/* INP_FREED flag is set */
 			}
 
 			if (tw == NULL) {

--------------FE67B731424B1568ADC4B17D--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?c05c2b1c-b5a8-c39c-6dff-e6cc0d8642bf>