Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 1 Sep 2010 10:31:15 -0700
From:      Jack Vogel <jfvogel@gmail.com>
To:        John Baldwin <jhb@freebsd.org>
Cc:        pluknet <pluknet@gmail.com>, freebsd-stable@freebsd.org
Subject:   Re: page fault in e1000_clear_hw_cntrs_base_generic() during SIOCAIFADDR
Message-ID:  <AANLkTinLAkLL7aWRan8Y%2BViM_MavsWcEuQVEJzKFNuOs@mail.gmail.com>
In-Reply-To: <201009011324.59934.jhb@freebsd.org>
References:  <AANLkTi=cQF0Ta5scpMBZ6Ba_uj_Zqxu9=2qUv2g=14fp@mail.gmail.com> <201009011206.15494.jhb@freebsd.org> <AANLkTimD6K0UKu80Xf3hu6Mx5MEth2Gaqr-Dxm44fqSi@mail.gmail.com> <201009011324.59934.jhb@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
LOL, if its the VF its pretty new code, PLEASE anyone, if this is the case
make it clear in the title somewhere, ok? Thanks.

Jack


On Wed, Sep 1, 2010 at 10:24 AM, John Baldwin <jhb@freebsd.org> wrote:

> On Wednesday, September 01, 2010 1:11:31 pm pluknet wrote:
> > On 1 September 2010 20:06, John Baldwin <jhb@freebsd.org> wrote:
> > > On Wednesday, September 01, 2010 11:53:09 am pluknet wrote:
> > >> Hi.
> > >>
> > >> This is reproducible from time to time on boot when
> > >> handling SIOCAIFADDR called from ifconfig on igb
> > >> on fresh (and not so fresh) 8-STABLE.
> > >>
> > >> How can I help with debugging?
> > >>
> > >> Kernel page fault with the following non-sleepable locks held:
> > >> exclusive sleep mutex igb0 (IGB Core Lock) r =3D 0 (0xc2655534) lock=
ed @
> > >> /usr/src/sys/modules/igb/../../dev/e1000/if_igb.c:965
> > >> KDB: stack backtrace:
> > >> db_trace_self_wrapper(c08b5055,cce577b8,c060db15,3c5,0,...) at
> > >> db_trace_self_wrapper+0x26
> > >> kdb_backtrace(3c5,0,ffffffff,c0a94864,cce577f0,...) at
> kdb_backtrace+0x29
> > >> _witness_debugger(c08b74fe,cce57804,4,1,0,...) at
> _witness_debugger+0x25
> > >> witness_warn(5,0,c08e3140,cce5782c,c2956000,...) at witness_warn+0x1=
fe
> > >> trap(cce57890) at trap+0x195
> > >> calltrap() at calltrap+0x6
> > >> --- trap 0xc, eip =3D 0xc3192477, esp =3D 0xcce578d0, ebp =3D 0xcce5=
78e0 ---
> > >> e1000_clear_hw_cntrs_base_generic(c2651004,64,c3185850,c2651000,0,..=
.)
> > >> at e1000_clear_hw_cntrs_base_generic+0x3e7
> > >
> > > Can you use gdb on your kernel.debug to map this to a source file and
> line?
> > >
> >
> > Here it is (btw, it took about 10-15 reboots to reproduce after adding
> > swap and dumpon setup).
> > Hmm.. don't see where it might access an invalid pointer.
> >
> > #0  doadump () at pcpu.h:231
> > #1  0xc04a3679 in db_fncall (dummy1=3D1, dummy2=3D0, dummy3=3D-10621221=
44,
> >     dummy4=3D0xcce636a8 "") at /usr/src/sys/ddb/db_command.c:548
> > #2  0xc04a3a71 in db_command (last_cmdp=3D0xc093d19c, cmd_table=3D0x0,
> dopager=3D1)
> >     at /usr/src/sys/ddb/db_command.c:445
> > #3  0xc04a3bca in db_command_loop () at /usr/src/sys/ddb/db_command.c:4=
98
> > #4  0xc04a5aed in db_trap (type=3D12, code=3D0) at
> /usr/src/sys/ddb/db_main.c:229
> > #5  0xc05fa64e in kdb_trap (type=3D12, code=3D0, tf=3D0xcce63890)
> >     at /usr/src/sys/kern/subr_kdb.c:535
> > #6  0xc084dcdf in trap_fatal (frame=3D0xcce63890, eva=3D3428511744)
> >     at /usr/src/sys/i386/i386/trap.c:929
> > #7  0xc084e553 in trap (frame=3D0xcce63890) at
> /usr/src/sys/i386/i386/trap.c:328
> > #8  0xc082f66c in calltrap () at /usr/src/sys/i386/i386/exception.s:166
> > #9  0xc318c477 in e1000_clear_hw_cntrs_base_generic (hw=3D0xc2655004)
> >     at /usr/src/sys/modules/igb/../../dev/e1000/e1000_mac.c:643
> > #10 0xc317ec82 in igb_init_locked (adapter=3D0xc2655000)
> >     at /usr/src/sys/modules/igb/../../dev/e1000/if_igb.c:1202
> > #11 0xc31801e5 in igb_ioctl (ifp=3D0xc2943c00, command=3D2149607692,
> >     data=3D0xc29db600 "=E2=95=A2=E2=95=A4\235=D0=B1=D0=B4=E2=95=A4\235=
=D0=B1=D1=82=E2=95=A4\235=D0=B1")
> >     at /usr/src/sys/modules/igb/../../dev/e1000/if_igb.c:966
> > #12 0xc0696c4e in in_ifinit (ifp=3D0xc2943c00, ia=3D0xc29db600,
> > sin=3DVariable "sin" is not available.
> > )
> >     at /usr/src/sys/netinet/in.c:848
> > #13 0xc06980cb in in_control (so=3D0xc2a5d9a8, cmd=3D2151704858,
> >     data=3D0xc2649400 "igb0", ifp=3D0xc2943c00, td=3D0xc29b8280)
> > ---Type <return> to continue, or q <return> to quit---
> >     at /usr/src/sys/netinet/in.c:563
> > #14 0xc067c860 in ifioctl (so=3D0xc2a5d9a8, cmd=3D2151704858,
> >     data=3D0xc2649400 "igb0", td=3D0xc29b8280) at /usr/src/sys/net/if.c=
:2523
> > #15 0xc0617395 in soo_ioctl (fp=3D0xc29ce310, cmd=3D2151704858,
> data=3D0xc2649400,
> >     active_cred=3D0xc254b100, td=3D0xc29b8280)
> >     at /usr/src/sys/kern/sys_socket.c:212
> > #16 0xc06113dd in kern_ioctl (td=3D0xc29b8280, fd=3D3, com=3D2151704858=
,
> >     data=3D0xc2649400 "igb0") at file.h:262
> > #17 0xc0611564 in ioctl (td=3D0xc29b8280, uap=3D0xcce63cf8)
> >     at /usr/src/sys/kern/sys_generic.c:678
> > #18 0xc084e160 in syscall (frame=3D0xcce63d38)
> >     at /usr/src/sys/i386/i386/trap.c:1111
> > #19 0xc082f6d1 in Xint0x80_syscall ()
> >     at /usr/src/sys/i386/i386/exception.s:264
> > #20 0x00000033 in ?? ()
> > Previous frame inner to this frame (corrupt stack?)
> >
> > (kgdb) f 9
> > #9  0xc318c477 in e1000_clear_hw_cntrs_base_generic (hw=3D0xc2655004)
> >     at /usr/src/sys/modules/igb/../../dev/e1000/e1000_mac.c:643
> > 643             E1000_READ_REG(hw, E1000_SYMERRS);
> > (kgdb) list
> > 638     void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw)
> > 639     {
> > 640             DEBUGFUNC("e1000_clear_hw_cntrs_base_generic");
> > 641
> > 642             E1000_READ_REG(hw, E1000_CRCERRS);
> > 643             E1000_READ_REG(hw, E1000_SYMERRS);
> > 644             E1000_READ_REG(hw, E1000_MPC);
> > 645             E1000_READ_REG(hw, E1000_SCC);
> > 646             E1000_READ_REG(hw, E1000_ECOL);
> > 647             E1000_READ_REG(hw, E1000_MCC);
> >
> > (kgdb) p *(struct e1000_osdep *)hw->back
> > $6 =3D {mem_bus_space_tag =3D 1, mem_bus_space_handle =3D 3428495360,
> >   io_bus_space_tag =3D 0, io_bus_space_handle =3D 0, flash_bus_space_ta=
g =3D 0,
> >   flash_bus_space_handle =3D 0, dev =3D 0xc261a600}
> >
> > (kgdb) p *hw
> > [...]
> >       power_down =3D 0xc3186340 <e1000_null_phy_generic>}, type =3D
> e1000_phy_vf,
> > [...]
> >
> > (kgdb) p (struct e1000_mac_info *)hw->mac.type
> > $8 =3D (struct e1000_mac_info *) 0x1a
> >
> > (kgdb) p *(struct e1000_mac_info *)hw->mac
> > $10 =3D {ops =3D {init_params =3D 0x8be58955, id_led_init =3D 0x80c7084=
5,
> >     blink_led =3D 0x390, check_for_link =3D 0, check_mng_mode =3D 0x2d4=
80c7,
> >     cleanup_led =3D 0, clear_hw_cntrs =3D 0x80c70000, clear_vfta =3D 0x=
2d0,
> >     get_bus_info =3D 0, set_lan_id =3D 0x2c880c7, get_link_up_info =3D =
0,
> >     led_on =3D 0xc7660000, led_off =3D 0xbe80, update_mc_addr_list =3D
> 0x66008000,
> >     reset_hw =3D 0x2c480c7, init_hw =3D 0x10000, shutdown_serdes =3D
> 0xf05840c7,
> >     power_up_serdes =3D 0xc7c31a4f, setup_link =3D 0x50003040,
> >     setup_physical_interface =3D 0x40c7c31a, setup_led =3D 0x1a539048,
> >     write_vfta =3D 0x4c40c7c3,
> >     config_collision_dist =3D 0xc31a5360 <e1000_init_hw_vf>,
> >     rar_set =3D 0x901c40c7, read_mac_addr =3D 0xc7c31a55,
> >     validate_mdi_setting =3D 0x55103840, mng_host_if_write =3D 0x40c7c3=
1a,
> >     mng_write_cmd_header =3D 0x1a502044, mng_enable_host_if =3D 0x6c40c=
7c3,
> >     wait_autoneg =3D 0xc31a52a0 <e1000_rar_set_vf>}, addr =3D "=D0=B3@p=
@R\032",
> >   perm_addr =3D "=D1=861=D1=8E]=D1=86\215", type =3D 182, collision_del=
ta =3D 666668288,
> >   ledctl_default =3D 0, ledctl_mode1 =3D 2347075925, ledctl_mode2 =3D
> 1086785605,
> >   mc_filter_type =3D 441389072, tx_packet_delta =3D 3095447491,
> >   txcw =3D 3758096387, current_ifs_val =3D 6734, ifs_max_val =3D 51139,
> >   ifs_min_val =3D 5248, ifs_ratio =3D 3, ifs_step_size =3D 45056,
> >   mta_reg_count =3D 6734, uta_reg_count =3D 51139, mta_shadow =3D {1879=
048196,
> >     1573067352, 7769539, 4294883413, 3851026431, 3062743901, 0,
> 1575323989,
> >     645172675, 666668288, 0, 2311074133, 2311282149, 666668534, 0,
> >     2347075925, 2160527429, 1016, 4, 66879687, 393216, 3224436736,
> >     4136223581, 1474660693, 3968029526, 209554260, 504397187, 117070194=
2,
> > ---Type <return> to continue, or q <return> to quit---
> >     503317428, 273008384, 30, 3339212171, 45125, 3071213568, 48770,
> 98615552,
> >     838817933, 4294672841, 3187671040, 8, 3526433396, 2298593923,
> 3400929240,
> >     4282247379, 1962934272, 573167, 3458793472, 88585743, 72857103,
> >     4052345043, 3490314963, 565204363, 4280641240, 1149855247,
> 1301002325,
> >     29524752, 967857545, 2199679946, 2817197767, 3239069067, 3364032736=
,
> >     3024455939, 2232436107, 2038763456, 2348810239, 1166870613,
> 608487348,
> >     12, 608487168, 4104, 608471296, 605325572, 68719359, 3296919552,
> >     1600019284, 3029189469, 38, 666668288, 0, 2212858197, 1300961516,
> >     1169624848, 139823884, 83379655, 2231369728, 4232415689, 1170671476=
,
> >     16778488, 4165307648, 203703495, 0, 136594631, 2, 69485705,
> 4280554633,
> >     268434, 2311309568, 666668534, 0, 2212858197, 3071219948, 116674056=
5,
> >     4165322504, 5, 203703495, 0, 2382124425, 1153955925, 133156,
> 1418264576,
> >     76088356, 412155684, 3372220420, 649366979, 0, 2212858197,
> 1435180268,
> >     4166879500, 2299026827, 1170734197, 1780, 33194752, 812861556,
> >     3354686861, 795716, 3338665984, 17310788, 2298478592},
> >   rar_entry_count =3D 9332, forced_speed_duplex =3D 4 '\004',
> >   adaptive_ifs =3D -1811995620, has_fwsm =3D 1048,
> >   arc_subsystem_valid =3D 745848965, asf_firmware_present =3D -19466573=
97,
> >   autoneg =3D -326501259, autoneg_failed =3D -158743715,
> >   get_link_status =3D 1946352259, in_ifs_mode =3D 66749261,
> >   report_tx_early =3D -1096, serdes_link_state =3D 3353703935,
> >   serdes_has_link =3D 455749, tx_pkt_filtering =3D 1300299778}
> >
> > (kgdb) p *hw
> > $13 =3D {back =3D 0xc2659498, hw_addr =3D 0xc265949c "", flash_address =
=3D 0x0,
> >   io_base =3D 0, mac =3D {ops =3D {
> >       init_params =3D 0xc31a4f10 <e1000_init_mac_params_vf>, id_led_ini=
t =3D
> 0,
>
> Hmm, this is a VF interface.  I know that VF adapters have a different se=
t
> of
> stats than all the other e1000 adapters.  Perhaps that is related?  Maybe
> make
> the call to e1000_clear_hw_cntrs_base_generic() in igb_init_locked()
> conditional on 'if (adapter->hw.mac.type !=3D e1000_vfadapt)'?
>
> --
> John Baldwin
> _______________________________________________
> freebsd-stable@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-stable
> To unsubscribe, send any mail to "freebsd-stable-unsubscribe@freebsd.org"
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?AANLkTinLAkLL7aWRan8Y%2BViM_MavsWcEuQVEJzKFNuOs>