From owner-svn-src-user@FreeBSD.ORG Thu Jun 4 21:40:36 2009 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B6FF91065676; Thu, 4 Jun 2009 21:40:36 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A4BEC8FC1D; Thu, 4 Jun 2009 21:40:36 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n54Leab7004916; Thu, 4 Jun 2009 21:40:36 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n54Leai2004915; Thu, 4 Jun 2009 21:40:36 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200906042140.n54Leai2004915@svn.freebsd.org> From: Kip Macy Date: Thu, 4 Jun 2009 21:40:36 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r193460 - user/kmacy/releng_7_2_fcs/sys/dev/cxgb X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 04 Jun 2009 21:40:37 -0000 Author: kmacy Date: Thu Jun 4 21:40:36 2009 New Revision: 193460 URL: http://svn.freebsd.org/changeset/base/193460 Log: reduce memory barrier usage Modified: user/kmacy/releng_7_2_fcs/sys/dev/cxgb/cxgb_sge.c Modified: user/kmacy/releng_7_2_fcs/sys/dev/cxgb/cxgb_sge.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/dev/cxgb/cxgb_sge.c Thu Jun 4 21:31:03 2009 (r193459) +++ user/kmacy/releng_7_2_fcs/sys/dev/cxgb/cxgb_sge.c Thu Jun 4 21:40:36 2009 (r193460) @@ -212,6 +212,24 @@ static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); +#ifdef __LP64__ +static void +set_wr_hdr(struct work_request_hdr *wrp, uint64_t wr_hi, uint64_t wr_lo) +{ + + wrp->wrh_hilo = (wr_hi<<32)|wr_lo; +} +#else +static void +set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) +{ + + wrp->wrh_hi = wr_hi; + wmb(); + wrp->wrh_lo = wr_lo; +} +#endif + /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter @@ -675,7 +693,7 @@ recycle_rx_buf(adapter_t *adap, struct s q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise - wmb(); + wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; @@ -1096,7 +1114,7 @@ make_sgl(struct sg_ent *sgp, bus_dma_seg * @adap: the adapter * @q: the Tx queue * - * Ring the doorbel if a Tx queue is asleep. There is a natural race, + * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. @@ -1159,11 +1177,10 @@ write_wr_hdr_sgl(unsigned int ndesc, str struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { - wrp->wrh_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | - V_WR_SGLSFLT(flits)) | wr_hi; - wmb(); - wrp->wrh_lo = htonl(V_WR_LEN(flits + sgl_flits) | - V_WR_GEN(txqs->gen)) | wr_lo; + set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | + V_WR_SGLSFLT(flits)) | wr_hi, + wrp->wrh_lo = htonl(V_WR_LEN(flits + sgl_flits) | + V_WR_GEN(txqs->gen)) | wr_lo); /* XXX gen? */ wr_gen2(txd, txqs->gen); @@ -1210,9 +1227,8 @@ write_wr_hdr_sgl(unsigned int ndesc, str wr_gen2(txd, txqs->gen); flits = 1; } - wrp->wrh_hi |= htonl(F_WR_EOP); - wmb(); - wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; + set_wr_hdr(wrp, wrp->wrh_hi |= htonl(F_WR_EOP), + htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo); wr_gen2((struct tx_desc *)wp, ogen); } } @@ -1250,8 +1266,6 @@ t3_encap(struct sge_qset *qs, struct mbu struct tx_desc *txd; - DPRINTF("t3_encap cpu=%d ", curcpu); - pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; @@ -1309,11 +1323,12 @@ t3_encap(struct sge_qset *qs, struct mbu txd->flit[fidx] |= htobe64(1 << 24); } - wrp->wrh_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | + + wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); - wmb(); - wrp->wrh_lo = htonl(V_WR_LEN(flits) | + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); + set_wr_hdr(wrp, wr_hi, wr_lo); /* XXX gen? */ wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); @@ -1382,13 +1397,12 @@ t3_encap(struct sge_qset *qs, struct mbu txq_prod(txq, 1, &txqs); m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); flits = (mlen + 7) / 8 + 3; - hdr->wr.wrh_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | + wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); - wmb(); - hdr->wr.wrh_lo = htonl(V_WR_LEN(flits) | + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); - + set_wr_hdr(&hdr->wr, wr_hi, wr_lo); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); return (0); @@ -1411,19 +1425,17 @@ t3_encap(struct sge_qset *qs, struct mbu txq_prod(txq, 1, &txqs); m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; - cpl->wr.wrh_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | - V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | + + wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | + V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); - wmb(); - cpl->wr.wrh_lo = htonl(V_WR_LEN(flits) | + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); - + set_wr_hdr(&cpl->wr, wr_hi, wr_lo); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); - DPRINTF("pio buf\n"); return (0); } - DPRINTF("regular buf\n"); flits = 2; } wrp = (struct work_request_hdr *)txd; @@ -1435,7 +1447,6 @@ t3_encap(struct sge_qset *qs, struct mbu sgl_flits = sgl_len(nsegs); - DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); txq_prod(txq, ndesc, &txqs); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); @@ -1598,6 +1609,7 @@ write_imm(struct tx_desc *d, struct mbuf { struct work_request_hdr *from = mtod(m, struct work_request_hdr *); struct work_request_hdr *to = (struct work_request_hdr *)d; + uint32_t wr_hi, wr_lo; if (len > WR_LEN) panic("len too big %d\n", len); @@ -1605,11 +1617,11 @@ write_imm(struct tx_desc *d, struct mbuf panic("len too small %d", len); memcpy(&to[1], &from[1], len - sizeof(*from)); - to->wrh_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | + wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); - wmb(); - to->wrh_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | + wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); + set_wr_hdr(to, wr_hi, wr_lo); wr_gen2(d, gen); /* @@ -1657,11 +1669,7 @@ addq_exit: mbufq_tail(&q->sendq, m); struct sge_qset *qs = txq_to_qset(q, qid); - printf("stopping q\n"); - setbit(&qs->txq_stopped, qid); - smp_mb(); - if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; @@ -2207,8 +2215,6 @@ again: cleaned = reclaim_completed_tx(qs if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); - smp_mb(); - if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; @@ -2418,11 +2424,6 @@ t3_sge_alloc_qset(adapter_t *sc, u_int i } for (i = 0; i < ntxq; ++i) { - /* - * The control queue always uses immediate data so does not - * need to keep track of any mbufs. - * XXX Placeholder for future TOE support. - */ size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], @@ -2861,7 +2862,6 @@ process_responses(adapter_t *adap, struc ethpad = 2; } else { - DPRINTF("pure response\n"); rspq->pure_rsps++; } skip: @@ -2881,8 +2881,6 @@ process_responses(adapter_t *adap, struc refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } - DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); - if (!eth && eop) { rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; /* @@ -2928,7 +2926,6 @@ process_responses(adapter_t *adap, struc struct ifnet *ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); } - DPRINTF("received tunnel packet\n"); rspq->rspq_mh.mh_head = NULL; }