Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 May 2019 13:29:24 +0000 (UTC)
From:      Marcin Wojtas <mw@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r348402 - head/sys/dev/ena
Message-ID:  <201905301329.x4UDTOww066571@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mw
Date: Thu May 30 13:29:24 2019
New Revision: 348402
URL: https://svnweb.freebsd.org/changeset/base/348402

Log:
  Lock optimization in ENA
  
  Handle IO interrupts using filter routine. That way, the main cleanup
  task could be moved to the separate thread using taskqueue.
  
  The deferred Rx cleanup task was removed, and now the cleanup task is
  begin called instead. That way, the Rx lock could be removed.
  
  In addition, Queue management (wake up and stop TX ring) was added, so
  the TX cleanup task can be performed mostly lockless.
  
  Submitted by:  Michal Krawczyk <mk@semihalf.com>
  Obtained from: Semihalf
  Sponsored by:  Amazon, Inc.

Modified:
  head/sys/dev/ena/ena.c
  head/sys/dev/ena/ena.h
  head/sys/dev/ena/ena_sysctl.c

Modified: head/sys/dev/ena/ena.c
==============================================================================
--- head/sys/dev/ena/ena.c	Thu May 30 13:28:03 2019	(r348401)
+++ head/sys/dev/ena/ena.c	Thu May 30 13:29:24 2019	(r348402)
@@ -120,7 +120,6 @@ static void	ena_destroy_all_rx_queues(struct ena_adapt
 static void	ena_destroy_all_io_queues(struct ena_adapter *);
 static int	ena_create_io_queues(struct ena_adapter *);
 static int	ena_tx_cleanup(struct ena_ring *);
-static void	ena_deferred_rx_cleanup(void *, int);
 static int	ena_rx_cleanup(struct ena_ring *);
 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
 static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
@@ -129,7 +128,8 @@ static struct mbuf* ena_rx_mbuf(struct ena_ring *, str
     struct ena_com_rx_ctx *, uint16_t *);
 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
     struct mbuf *);
-static void	ena_handle_msix(void *);
+static void	ena_cleanup(void *arg, int pending);
+static int	ena_handle_msix(void *);
 static int	ena_enable_msix(struct ena_adapter *);
 static void	ena_setup_mgmnt_intr(struct ena_adapter *);
 static void	ena_setup_io_intr(struct ena_adapter *);
@@ -450,7 +450,6 @@ ena_init_io_rings(struct ena_adapter *adapter)
 		    device_get_nameunit(adapter->pdev), i);
 
 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
-		mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		que = &adapter->que[i];
 		que->adapter = adapter;
@@ -481,7 +480,6 @@ ena_free_io_ring_resources(struct ena_adapter *adapter
 	ENA_RING_MTX_UNLOCK(txr);
 
 	mtx_destroy(&txr->ring_mtx);
-	mtx_destroy(&rxr->ring_mtx);
 }
 
 static void
@@ -627,6 +625,8 @@ ena_setup_tx_resources(struct ena_adapter *adapter, in
 		goto err_buf_info_unmap;
 	}
 
+	tx_ring->running = true;
+
 	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
 	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
 
@@ -811,14 +811,6 @@ ena_setup_rx_resources(struct ena_adapter *adapter, un
 		}
 	}
 
-	/* Allocate taskqueues */
-	TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring);
-	rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK,
-	    taskqueue_thread_enqueue, &rx_ring->cmpl_tq);
-
-	taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET,
-	    "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu);
-
 	return (0);
 
 err_buf_info_unmap:
@@ -846,11 +838,6 @@ ena_free_rx_resources(struct ena_adapter *adapter, uns
 {
 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 
-	while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0)
-		taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task);
-
-	taskqueue_free(rx_ring->cmpl_tq);
-
 	/* Free buffer DMA maps, */
 	for (int i = 0; i < rx_ring->ring_size; i++) {
 		bus_dmamap_sync(adapter->rx_buf_tag,
@@ -1176,6 +1163,18 @@ ena_destroy_all_rx_queues(struct ena_adapter *adapter)
 static void
 ena_destroy_all_io_queues(struct ena_adapter *adapter)
 {
+	struct ena_que *queue;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		queue = &adapter->que[i];
+		while (taskqueue_cancel(queue->cleanup_tq,
+		    &queue->cleanup_task, NULL))
+			taskqueue_drain(queue->cleanup_tq,
+			    &queue->cleanup_task);
+		taskqueue_free(queue->cleanup_tq);
+	}
+
 	ena_destroy_all_tx_queues(adapter);
 	ena_destroy_all_rx_queues(adapter);
 }
@@ -1206,6 +1205,7 @@ ena_create_io_queues(struct ena_adapter *adapter)
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	struct ena_com_create_io_ctx ctx;
 	struct ena_ring *ring;
+	struct ena_que *queue;
 	uint16_t ena_qid;
 	uint32_t msix_vector;
 	int rc, i;
@@ -1267,6 +1267,18 @@ ena_create_io_queues(struct ena_adapter *adapter)
 		}
 	}
 
+	for (i = 0; i < adapter->num_queues; i++) {
+		queue = &adapter->que[i];
+
+		TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
+		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
+		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
+
+		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
+		    "%s queue %d cleanup",
+		    device_get_nameunit(adapter->pdev), i);
+	}
+
 	return (0);
 
 err_rx:
@@ -1304,6 +1316,7 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
 	int commit = TX_COMMIT;
 	int budget = TX_BUDGET;
 	int work_done;
+	bool above_thresh;
 
 	adapter = tx_ring->que->adapter;
 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
@@ -1372,8 +1385,28 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
 		ena_com_update_dev_comp_head(io_cq);
 	}
 
-	taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+	/*
+	 * Need to make the rings circular update visible to
+	 * ena_xmit_mbuf() before checking for tx_ring->running.
+	 */
+	mb();
 
+	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+	    ENA_TX_RESUME_THRESH);
+	if (unlikely(!tx_ring->running && above_thresh)) {
+		ENA_RING_MTX_LOCK(tx_ring);
+		above_thresh =
+		    ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+		    ENA_TX_RESUME_THRESH);
+		if (!tx_ring->running && above_thresh) {
+			tx_ring->running = true;
+			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
+			taskqueue_enqueue(tx_ring->enqueue_tq,
+			    &tx_ring->enqueue_task);
+		}
+		ENA_RING_MTX_UNLOCK(tx_ring);
+	}
+
 	return (work_done);
 }
 
@@ -1581,24 +1614,6 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_c
 	}
 }
 
-static void
-ena_deferred_rx_cleanup(void *arg, int pending)
-{
-	struct ena_ring *rx_ring = arg;
-	int budget = CLEAN_BUDGET;
-
-	ENA_RING_MTX_LOCK(rx_ring);
-	/*
-	 * If deferred task was executed, perform cleanup of all awaiting
-	 * descs (or until given budget is depleted to avoid infinite loop).
-	 */
-	while (likely(budget--)) {
-		if (ena_rx_cleanup(rx_ring) == 0)
-			break;
-	}
-	ENA_RING_MTX_UNLOCK(rx_ring);
-}
-
 /**
  * ena_rx_cleanup - handle rx irq
  * @arg: ring for which irq is being handled
@@ -1753,12 +1768,8 @@ ena_intr_msix_mgmnt(void *arg)
 		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
 }
 
-/**
- * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
- * @arg: interrupt number
- **/
 static void
-ena_handle_msix(void *arg)
+ena_cleanup(void *arg, int pending)
 {
 	struct ena_que	*que = arg;
 	struct ena_adapter *adapter = que->adapter;
@@ -1785,22 +1796,8 @@ ena_handle_msix(void *arg)
 	rx_ring->first_interrupt = true;
 
 	for (i = 0; i < CLEAN_BUDGET; ++i) {
-		/*
-		 * If lock cannot be acquired, then deferred cleanup task was
-		 * being executed and rx ring is being cleaned up in
-		 * another thread.
-		 */
-		if (likely(ENA_RING_MTX_TRYLOCK(rx_ring) != 0)) {
-			rxc = ena_rx_cleanup(rx_ring);
-			ENA_RING_MTX_UNLOCK(rx_ring);
-		} else {
-			rxc = 0;
-		}
-
-		/* Protection from calling ena_tx_cleanup from ena_start_xmit */
-		ENA_RING_MTX_LOCK(tx_ring);
+		rxc = ena_rx_cleanup(rx_ring);
 		txc = ena_tx_cleanup(tx_ring);
-		ENA_RING_MTX_UNLOCK(tx_ring);
 
 		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
 			return;
@@ -1817,7 +1814,26 @@ ena_handle_msix(void *arg)
 	ena_com_unmask_intr(io_cq, &intr_reg);
 }
 
+/**
+ * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
+ * @arg: queue
+ **/
 static int
+ena_handle_msix(void *arg)
+{
+	struct ena_que *queue = arg;
+	struct ena_adapter *adapter = queue->adapter;
+	if_t ifp = adapter->ifp;
+
+	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+		return (FILTER_STRAY);
+
+	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
+
+	return (FILTER_HANDLED);
+}
+
+static int
 ena_enable_msix(struct ena_adapter *adapter)
 {
 	device_t dev = adapter->pdev;
@@ -1997,8 +2013,8 @@ ena_request_io_irq(struct ena_adapter *adapter)
 		}
 
 		rc = bus_setup_intr(adapter->pdev, irq->res,
-		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
-		    irq->handler, irq->data, &irq->cookie);
+		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
+		    irq->data, &irq->cookie);
 		 if (unlikely(rc != 0)) {
 			device_printf(adapter->pdev, "failed to register "
 			    "interrupt handler for irq %ju: %d\n",
@@ -2834,6 +2850,34 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **
 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 	    tx_ring->ring_size);
 
+	/* stop the queue when no more space available, the packet can have up
+	 * to sgl_size + 2. one for the meta descriptor and one for header
+	 * (if the header is larger than tx_max_header_size).
+	 */
+	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+	    adapter->max_tx_sgl_size + 2))) {
+		ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
+		    tx_ring->que->id);
+
+		tx_ring->running = false;
+		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
+
+		/* There is a rare condition where this function decides to
+		 * stop the queue but meanwhile tx_cleanup() updates
+		 * next_to_completion and terminates.
+		 * The queue will remain stopped forever.
+		 * To solve this issue this function performs mb(), checks
+		 * the wakeup condition and wakes up the queue if needed.
+		 */
+		mb();
+
+		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+		    ENA_TX_RESUME_THRESH)) {
+			tx_ring->running = true;
+			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
+		}
+	}
+
 	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map,
 	    BUS_DMASYNC_PREWRITE);
 
@@ -2870,9 +2914,10 @@ ena_start_xmit(struct ena_ring *tx_ring)
 		    " header csum flags %#jx",
 		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
 
-		if (unlikely(ena_com_free_desc(io_sq) <=
-		    ENA_TX_CLEANUP_THRESHOLD))
-			ena_tx_cleanup(tx_ring);
+		if (unlikely(!tx_ring->running)) {
+			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+			break;
+		}
 
 		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
 			if (ret == ENA_COM_NO_MEM) {
@@ -2914,8 +2959,9 @@ ena_start_xmit(struct ena_ring *tx_ring)
 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
 	}
 
-	if (ena_com_free_desc(io_sq) <= ENA_TX_CLEANUP_THRESHOLD)
-		ena_tx_cleanup(tx_ring);
+	if (unlikely(!tx_ring->running))
+		taskqueue_enqueue(tx_ring->que->cleanup_tq,
+		    &tx_ring->que->cleanup_task);
 }
 
 static void
@@ -2925,6 +2971,7 @@ ena_deferred_mq_start(void *arg, int pending)
 	struct ifnet *ifp = tx_ring->adapter->ifp;
 
 	while (!drbr_empty(ifp, tx_ring->br) &&
+	    tx_ring->running &&
 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
 		ENA_RING_MTX_LOCK(tx_ring);
 		ena_start_xmit(tx_ring);
@@ -3575,7 +3622,7 @@ check_for_missing_completions(struct ena_adapter *adap
 	adapter->next_monitored_tx_qid = i % adapter->num_queues;
 }
 
-/* trigger deferred rx cleanup after 2 consecutive detections */
+/* trigger rx cleanup after 2 consecutive detections */
 #define EMPTY_RX_REFILL 2
 /* For the rare case where the device runs out of Rx descriptors and the
  * msix handler failed to refill new Rx descriptors (due to a lack of memory
@@ -3613,8 +3660,8 @@ check_for_empty_rx_ring(struct ena_adapter *adapter)
 				device_printf(adapter->pdev,
 				    "trigger refill for ring %d\n", i);
 
-				taskqueue_enqueue(rx_ring->cmpl_tq,
-				    &rx_ring->cmpl_task);
+				taskqueue_enqueue(rx_ring->que->cleanup_tq,
+				    &rx_ring->que->cleanup_task);
 				rx_ring->empty_rx_queue = 0;
 			}
 		} else {

Modified: head/sys/dev/ena/ena.h
==============================================================================
--- head/sys/dev/ena/ena.h	Thu May 30 13:28:03 2019	(r348401)
+++ head/sys/dev/ena/ena.h	Thu May 30 13:29:24 2019	(r348402)
@@ -84,7 +84,7 @@
 #define	ENA_MAX_FRAME_LEN		10000
 #define	ENA_MIN_FRAME_LEN 		60
 
-#define ENA_TX_CLEANUP_THRESHOLD	128
+#define ENA_TX_RESUME_THRESH		(ENA_PKT_MAX_BUFS + 2)
 
 #define DB_THRESHOLD	64
 
@@ -163,7 +163,7 @@ typedef struct _ena_vendor_info_t {
 struct ena_irq {
 	/* Interrupt resources */
 	struct resource *res;
-	driver_intr_t *handler;
+	driver_filter_t *handler;
 	void *data;
 	void *cookie;
 	unsigned int vector;
@@ -176,6 +176,10 @@ struct ena_que {
 	struct ena_adapter *adapter;
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
+
+	struct task cleanup_task;
+	struct taskqueue *cleanup_tq;
+
 	uint32_t id;
 	int cpu;
 };
@@ -222,6 +226,8 @@ struct ena_stats_tx {
 	counter_u64_t bad_req_id;
 	counter_u64_t collapse;
 	counter_u64_t collapse_err;
+	counter_u64_t queue_wakeup;
+	counter_u64_t queue_stop;
 };
 
 struct ena_stats_rx {
@@ -285,15 +291,9 @@ struct ena_ring {
 	struct mtx ring_mtx;
 	char mtx_name[16];
 
-	union {
-		struct {
-			struct task enqueue_task;
-			struct taskqueue *enqueue_tq;
-		};
-		struct {
-			struct task cmpl_task;
-			struct taskqueue *cmpl_tq;
-		};
+	struct {
+		struct task enqueue_task;
+		struct taskqueue *enqueue_tq;
 	};
 
 	union {
@@ -301,7 +301,11 @@ struct ena_ring {
 		struct ena_stats_rx rx_stats;
 	};
 
-	int empty_rx_queue;
+	union {
+		int empty_rx_queue;
+		/* For Tx ring to indicate if it's running or not */
+		bool running;
+	};
 } __aligned(CACHE_LINE_SIZE);
 
 struct ena_stats_dev {

Modified: head/sys/dev/ena/ena_sysctl.c
==============================================================================
--- head/sys/dev/ena/ena_sysctl.c	Thu May 30 13:28:03 2019	(r348401)
+++ head/sys/dev/ena/ena_sysctl.c	Thu May 30 13:29:24 2019	(r348402)
@@ -190,6 +190,12 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
 		        "mbuf_collapse_err", CTLFLAG_RD,
 		        &tx_stats->collapse_err,
 		        "Mbuf collapse failures");
+		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+		    "queue_wakeups", CTLFLAG_RD,
+		    &tx_stats->queue_wakeup, "Queue wakeups");
+		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+		    "queue_stops", CTLFLAG_RD,
+		    &tx_stats->queue_stop, "Queue stops");
 
 		/* RX specific stats */
 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905301329.x4UDTOww066571>