Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Aug 2021 12:01:28 GMT
From:      Wei Hu <whu@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: ce110ea12fce - main - Microsoft Azure Network Adapter(MANA) VF support
Message-ID:  <202108201201.17KC1SmP006699@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by whu:

URL: https://cgit.FreeBSD.org/src/commit/?id=ce110ea12fcea71ae437d0a1d0549d3d32055b0e

commit ce110ea12fcea71ae437d0a1d0549d3d32055b0e
Author:     Wei Hu <whu@FreeBSD.org>
AuthorDate: 2021-08-20 08:43:10 +0000
Commit:     Wei Hu <whu@FreeBSD.org>
CommitDate: 2021-08-20 10:44:57 +0000

    Microsoft Azure Network Adapter(MANA) VF support
    
    MANA is the new network adapter from Microsoft which will be available
    in Azure public cloud. It provides SRIOV NIC as virtual function to
    guest OS running on Hyper-V.
    
    The code can be divided into two major parts. Gdma_main.c is the one to
    bring up the hardware board and drives all underlying hardware queue
    infrastructure. Mana_en.c contains all main ethernet driver code.
    It has only tested and supported on amd64 architecture.
    
    PR:             256336
    Reviewed by:    decui@microsoft.com
    Tested by:      whu
    MFC after:      2 week
    Relnotes:       yes
    Sponsored by:   Microsoft
    Differential Revision:  https://reviews.freebsd.org/D31150
---
 sys/conf/files.x86         |    6 +
 sys/dev/mana/gdma.h        |  744 ++++++++++++
 sys/dev/mana/gdma_main.c   | 1961 ++++++++++++++++++++++++++++++++
 sys/dev/mana/gdma_util.c   |   96 ++
 sys/dev/mana/gdma_util.h   |  206 ++++
 sys/dev/mana/hw_channel.c  |  950 ++++++++++++++++
 sys/dev/mana/hw_channel.h  |  222 ++++
 sys/dev/mana/mana.h        |  689 +++++++++++
 sys/dev/mana/mana_en.c     | 2699 ++++++++++++++++++++++++++++++++++++++++++++
 sys/dev/mana/mana_sysctl.c |  219 ++++
 sys/dev/mana/mana_sysctl.h |   48 +
 sys/dev/mana/shm_channel.c |  337 ++++++
 sys/dev/mana/shm_channel.h |   52 +
 sys/modules/Makefile       |    2 +
 sys/modules/mana/Makefile  |   12 +
 15 files changed, 8243 insertions(+)

diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index d0cda2da8580..925a3c5fe889 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -264,6 +264,12 @@ dev/isci/scil/scif_sas_task_request_states.c			optional isci
 dev/isci/scil/scif_sas_timer.c					optional isci
 dev/itwd/itwd.c							optional itwd
 dev/kvm_clock/kvm_clock.c	optional	kvm_clock
+dev/mana/gdma_main.c		optional	mana
+dev/mana/mana_en.c		optional	mana
+dev/mana/mana_sysctl.c		optional	mana
+dev/mana/shm_channel.c		optional	mana
+dev/mana/hw_channel.c		optional	mana
+dev/mana/gdma_util.c		optional	mana
 dev/qat/qat.c			optional qat
 dev/qat/qat_ae.c		optional qat
 dev/qat/qat_c2xxx.c		optional qat
diff --git a/sys/dev/mana/gdma.h b/sys/dev/mana/gdma.h
new file mode 100644
index 000000000000..097b2b65e545
--- /dev/null
+++ b/sys/dev/mana/gdma.h
@@ -0,0 +1,744 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _GDMA_H
+#define _GDMA_H
+
+#include <sys/bus.h>
+#include <sys/bus_dma.h>
+#include <sys/types.h>
+#include <sys/limits.h>
+#include <sys/sx.h>
+
+#include "gdma_util.h"
+#include "shm_channel.h"
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+#define GDMA_BAR0		0
+
+#define GDMA_IRQNAME_SZ		40
+
+struct gdma_bus {
+	bus_space_handle_t	bar0_h;
+	bus_space_tag_t		bar0_t;
+};
+
+struct gdma_msix_entry {
+	int			entry;
+	int			vector;
+};
+
+enum gdma_request_type {
+	GDMA_VERIFY_VF_DRIVER_VERSION	= 1,
+	GDMA_QUERY_MAX_RESOURCES	= 2,
+	GDMA_LIST_DEVICES		= 3,
+	GDMA_REGISTER_DEVICE		= 4,
+	GDMA_DEREGISTER_DEVICE		= 5,
+	GDMA_GENERATE_TEST_EQE		= 10,
+	GDMA_CREATE_QUEUE		= 12,
+	GDMA_DISABLE_QUEUE		= 13,
+	GDMA_CREATE_DMA_REGION		= 25,
+	GDMA_DMA_REGION_ADD_PAGES	= 26,
+	GDMA_DESTROY_DMA_REGION		= 27,
+};
+
+enum gdma_queue_type {
+	GDMA_INVALID_QUEUE,
+	GDMA_SQ,
+	GDMA_RQ,
+	GDMA_CQ,
+	GDMA_EQ,
+};
+
+enum gdma_work_request_flags {
+	GDMA_WR_NONE			= 0,
+	GDMA_WR_OOB_IN_SGL		= BIT(0),
+	GDMA_WR_PAD_BY_SGE0		= BIT(1),
+};
+
+enum gdma_eqe_type {
+	GDMA_EQE_COMPLETION		= 3,
+	GDMA_EQE_TEST_EVENT		= 64,
+	GDMA_EQE_HWC_INIT_EQ_ID_DB	= 129,
+	GDMA_EQE_HWC_INIT_DATA		= 130,
+	GDMA_EQE_HWC_INIT_DONE		= 131,
+};
+
+enum {
+	GDMA_DEVICE_NONE	= 0,
+	GDMA_DEVICE_HWC		= 1,
+	GDMA_DEVICE_MANA	= 2,
+};
+
+
+struct gdma_resource {
+	/* Protect the bitmap */
+	struct mtx		lock_spin;
+
+	/* The bitmap size in bits. */
+	uint32_t		size;
+
+	/* The bitmap tracks the resources. */
+	unsigned long		*map;
+};
+
+union gdma_doorbell_entry {
+	uint64_t		as_uint64;
+
+	struct {
+		uint64_t id		: 24;
+		uint64_t reserved	: 8;
+		uint64_t tail_ptr	: 31;
+		uint64_t arm		: 1;
+	} cq;
+
+	struct {
+		uint64_t id		: 24;
+		uint64_t wqe_cnt	: 8;
+		uint64_t tail_ptr	: 32;
+	} rq;
+
+	struct {
+		uint64_t id		: 24;
+		uint64_t reserved	: 8;
+		uint64_t tail_ptr	: 32;
+	} sq;
+
+	struct {
+		uint64_t id		: 16;
+		uint64_t reserved	: 16;
+		uint64_t tail_ptr	: 31;
+		uint64_t arm		: 1;
+	} eq;
+}; /* HW DATA */
+
+struct gdma_msg_hdr {
+	uint32_t	hdr_type;
+	uint32_t	msg_type;
+	uint16_t	msg_version;
+	uint16_t	hwc_msg_id;
+	uint32_t	msg_size;
+}; /* HW DATA */
+
+struct gdma_dev_id {
+	union {
+		struct {
+			uint16_t type;
+			uint16_t instance;
+		};
+
+		uint32_t as_uint32;
+	};
+}; /* HW DATA */
+
+struct gdma_req_hdr {
+	struct gdma_msg_hdr	req;
+	struct gdma_msg_hdr	resp; /* The expected response */
+	struct gdma_dev_id	dev_id;
+	uint32_t		activity_id;
+}; /* HW DATA */
+
+struct gdma_resp_hdr {
+	struct gdma_msg_hdr	response;
+	struct gdma_dev_id	dev_id;
+	uint32_t		activity_id;
+	uint32_t		status;
+	uint32_t		reserved;
+}; /* HW DATA */
+
+struct gdma_general_req {
+	struct gdma_req_hdr	hdr;
+}; /* HW DATA */
+
+#define GDMA_MESSAGE_V1 1
+
+struct gdma_general_resp {
+	struct gdma_resp_hdr	hdr;
+}; /* HW DATA */
+
+#define GDMA_STANDARD_HEADER_TYPE	0
+
+static inline void
+mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, uint32_t code,
+    uint32_t req_size, uint32_t resp_size)
+{
+	hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+	hdr->req.msg_type = code;
+	hdr->req.msg_version = GDMA_MESSAGE_V1;
+	hdr->req.msg_size = req_size;
+
+	hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+	hdr->resp.msg_type = code;
+	hdr->resp.msg_version = GDMA_MESSAGE_V1;
+	hdr->resp.msg_size = resp_size;
+}
+
+/* The 16-byte struct is part of the GDMA work queue entry (WQE). */
+struct gdma_sge {
+	uint64_t		address;
+	uint32_t		mem_key;
+	uint32_t		size;
+}; /* HW DATA */
+
+struct gdma_wqe_request {
+	struct gdma_sge		*sgl;
+	uint32_t		num_sge;
+
+	uint32_t		inline_oob_size;
+	const void		*inline_oob_data;
+
+	uint32_t		flags;
+	uint32_t		client_data_unit;
+};
+
+enum gdma_page_type {
+	GDMA_PAGE_TYPE_4K,
+};
+
+#define GDMA_INVALID_DMA_REGION		0
+
+struct gdma_mem_info {
+	device_t		dev;
+
+	bus_dma_tag_t		dma_tag;
+	bus_dmamap_t		dma_map;
+	bus_addr_t		dma_handle;	/* Physical address	*/
+	void			*virt_addr;	/* Virtual address	*/
+	uint64_t		length;
+
+	/* Allocated by the PF driver */
+	uint64_t		gdma_region;
+};
+
+#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
+
+struct gdma_dev {
+	struct gdma_context	*gdma_context;
+
+	struct gdma_dev_id	dev_id;
+
+	uint32_t		pdid;
+	uint32_t		doorbell;
+	uint32_t		gpa_mkey;
+
+	/* GDMA driver specific pointer */
+	void			*driver_data;
+};
+
+#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE
+
+#define GDMA_CQE_SIZE		64
+#define GDMA_EQE_SIZE		16
+#define GDMA_MAX_SQE_SIZE	512
+#define GDMA_MAX_RQE_SIZE	256
+
+#define GDMA_COMP_DATA_SIZE	0x3C
+
+#define GDMA_EVENT_DATA_SIZE	0xC
+
+/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */
+#define GDMA_WQE_BU_SIZE	32
+
+#define INVALID_PDID		UINT_MAX
+#define INVALID_DOORBELL	UINT_MAX
+#define INVALID_MEM_KEY		UINT_MAX
+#define INVALID_QUEUE_ID	UINT_MAX
+#define INVALID_PCI_MSIX_INDEX  UINT_MAX
+
+struct gdma_comp {
+	uint32_t		cqe_data[GDMA_COMP_DATA_SIZE / 4];
+	uint32_t		wq_num;
+	bool			is_sq;
+};
+
+struct gdma_event {
+	uint32_t		details[GDMA_EVENT_DATA_SIZE / 4];
+	uint8_t			type;
+};
+
+struct gdma_queue;
+
+#define CQE_POLLING_BUFFER	512
+
+typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
+    struct gdma_event *e);
+
+typedef void gdma_cq_callback(void *context, struct gdma_queue *q);
+
+/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE
+ * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the
+ * driver increases the 'head' in BUs rather than in bytes, and notifies
+ * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track
+ * the HW head, and increases the 'head' by 1 for every processed EQE/CQE.
+ *
+ * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is
+ * processed, the driver increases the 'tail' to indicate that WQEs have
+ * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ.
+ *
+ * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures
+ * that the EQ/CQ is big enough so they can't overflow, and the driver uses
+ * the owner bits mechanism to detect if the queue has become empty.
+ */
+struct gdma_queue {
+	struct gdma_dev		*gdma_dev;
+
+	enum gdma_queue_type	type;
+	uint32_t		id;
+
+	struct gdma_mem_info	mem_info;
+
+	void			*queue_mem_ptr;
+	uint32_t		queue_size;
+
+	bool			monitor_avl_buf;
+
+	uint32_t		head;
+	uint32_t		tail;
+
+	/* Extra fields specific to EQ/CQ. */
+	union {
+		struct {
+			bool			disable_needed;
+
+			gdma_eq_callback	*callback;
+			void			*context;
+
+			unsigned int		msix_index;
+
+			uint32_t		log2_throttle_limit;
+
+			struct task		cleanup_task;
+			struct taskqueue	*cleanup_tq;
+			int			cpu;
+			bool			do_not_ring_db;
+
+			int			work_done;
+			int			budget;
+		} eq;
+
+		struct {
+			gdma_cq_callback	*callback;
+			void			*context;
+
+			/* For CQ/EQ relationship */
+			struct gdma_queue	*parent;
+		} cq;
+	};
+};
+
+struct gdma_queue_spec {
+	enum gdma_queue_type	type;
+	bool			monitor_avl_buf;
+	unsigned int		queue_size;
+
+	/* Extra fields specific to EQ/CQ. */
+	union {
+		struct {
+			gdma_eq_callback	*callback;
+			void			*context;
+
+			unsigned long		log2_throttle_limit;
+
+			/* Only used by the MANA device. */
+			struct ifnet		*ndev;
+		} eq;
+
+		struct {
+			gdma_cq_callback	*callback;
+			void			*context;
+
+			struct			gdma_queue *parent_eq;
+
+		} cq;
+	};
+};
+
+struct mana_eq {
+	struct gdma_queue	*eq;
+	struct gdma_comp	cqe_poll[CQE_POLLING_BUFFER];
+};
+
+struct gdma_irq_context {
+	struct gdma_msix_entry	msix_e;
+	struct resource		*res;
+	driver_intr_t		*handler;
+	void			*arg;
+	void			*cookie;
+	bool			requested;
+	int			cpu;
+	char			name[GDMA_IRQNAME_SZ];
+};
+
+struct gdma_context {
+	device_t		dev;
+
+	struct gdma_bus		gd_bus;
+
+	/* Per-vPort max number of queues */
+	unsigned int		max_num_queues;
+	unsigned int		max_num_msix;
+	unsigned int		num_msix_usable;
+	struct gdma_resource	msix_resource;
+	struct gdma_irq_context	*irq_contexts;
+
+	/* This maps a CQ index to the queue structure. */
+	unsigned int		max_num_cqs;
+	struct gdma_queue	**cq_table;
+
+	/* Protect eq_test_event and test_event_eq_id  */
+	struct sx		eq_test_event_sx;
+	struct completion	eq_test_event;
+	uint32_t		test_event_eq_id;
+
+	struct resource		*bar0;
+	struct resource		*msix;
+	int			msix_rid;
+	void __iomem		*shm_base;
+	void __iomem		*db_page_base;
+	uint32_t		db_page_size;
+
+	/* Shared memory chanenl (used to bootstrap HWC) */
+	struct shm_channel	shm_channel;
+
+	/* Hardware communication channel (HWC) */
+	struct gdma_dev		hwc;
+
+	/* Azure network adapter */
+	struct gdma_dev		mana;
+};
+
+#define MAX_NUM_GDMA_DEVICES	4
+
+static inline bool mana_gd_is_mana(struct gdma_dev *gd)
+{
+	return gd->dev_id.type == GDMA_DEVICE_MANA;
+}
+
+static inline bool mana_gd_is_hwc(struct gdma_dev *gd)
+{
+	return gd->dev_id.type == GDMA_DEVICE_HWC;
+}
+
+uint8_t *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, uint32_t wqe_offset);
+uint32_t mana_gd_wq_avail_space(struct gdma_queue *wq);
+
+int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq);
+
+int mana_gd_create_hwc_queue(struct gdma_dev *gd,
+    const struct gdma_queue_spec *spec,
+    struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_eq(struct gdma_dev *gd,
+    const struct gdma_queue_spec *spec,
+    struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
+    const struct gdma_queue_spec *spec,
+    struct gdma_queue **queue_ptr);
+
+void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
+
+int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
+
+void mana_gd_arm_cq(struct gdma_queue *cq);
+
+struct gdma_wqe {
+	uint32_t reserved	:24;
+	uint32_t last_vbytes	:8;
+
+	union {
+		uint32_t flags;
+
+		struct {
+			uint32_t num_sge		:8;
+			uint32_t inline_oob_size_div4	:3;
+			uint32_t client_oob_in_sgl	:1;
+			uint32_t reserved1		:4;
+			uint32_t client_data_unit	:14;
+			uint32_t reserved2		:2;
+		};
+	};
+}; /* HW DATA */
+
+#define INLINE_OOB_SMALL_SIZE	8
+#define INLINE_OOB_LARGE_SIZE	24
+
+#define MAX_TX_WQE_SIZE		512
+#define MAX_RX_WQE_SIZE		256
+
+struct gdma_cqe {
+	uint32_t cqe_data[GDMA_COMP_DATA_SIZE / 4];
+
+	union {
+		uint32_t as_uint32;
+
+		struct {
+			uint32_t wq_num		:24;
+			uint32_t is_sq		:1;
+			uint32_t reserved	:4;
+			uint32_t owner_bits	:3;
+		};
+	} cqe_info;
+}; /* HW DATA */
+
+#define GDMA_CQE_OWNER_BITS	3
+
+#define GDMA_CQE_OWNER_MASK	((1 << GDMA_CQE_OWNER_BITS) - 1)
+
+#define SET_ARM_BIT		1
+
+#define GDMA_EQE_OWNER_BITS	3
+
+union gdma_eqe_info {
+	uint32_t as_uint32;
+
+	struct {
+		uint32_t type		: 8;
+		uint32_t reserved1	: 8;
+		uint32_t client_id	: 2;
+		uint32_t reserved2	: 11;
+		uint32_t owner_bits	: 3;
+	};
+}; /* HW DATA */
+
+#define GDMA_EQE_OWNER_MASK	((1 << GDMA_EQE_OWNER_BITS) - 1)
+#define INITIALIZED_OWNER_BIT(log2_num_entries)	(1UL << (log2_num_entries))
+
+struct gdma_eqe {
+	uint32_t details[GDMA_EVENT_DATA_SIZE / 4];
+	uint32_t eqe_info;
+}; /* HW DATA */
+
+#define GDMA_REG_DB_PAGE_OFFSET	8
+#define GDMA_REG_DB_PAGE_SIZE	0x10
+#define GDMA_REG_SHM_OFFSET	0x18
+
+struct gdma_posted_wqe_info {
+	uint32_t wqe_size_in_bu;
+};
+
+/* GDMA_GENERATE_TEST_EQE */
+struct gdma_generate_test_event_req {
+	struct gdma_req_hdr hdr;
+	uint32_t queue_index;
+}; /* HW DATA */
+
+/* GDMA_VERIFY_VF_DRIVER_VERSION */
+enum {
+	GDMA_PROTOCOL_V1	= 1,
+	GDMA_PROTOCOL_FIRST	= GDMA_PROTOCOL_V1,
+	GDMA_PROTOCOL_LAST	= GDMA_PROTOCOL_V1,
+};
+
+struct gdma_verify_ver_req {
+	struct gdma_req_hdr hdr;
+
+	/* Mandatory fields required for protocol establishment */
+	uint64_t protocol_ver_min;
+	uint64_t protocol_ver_max;
+	uint64_t drv_cap_flags1;
+	uint64_t drv_cap_flags2;
+	uint64_t drv_cap_flags3;
+	uint64_t drv_cap_flags4;
+
+	/* Advisory fields */
+	uint64_t drv_ver;
+	uint32_t os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */
+	uint32_t reserved;
+	uint32_t os_ver_major;
+	uint32_t os_ver_minor;
+	uint32_t os_ver_build;
+	uint32_t os_ver_platform;
+	uint64_t reserved_2;
+	uint8_t os_ver_str1[128];
+	uint8_t os_ver_str2[128];
+	uint8_t os_ver_str3[128];
+	uint8_t os_ver_str4[128];
+}; /* HW DATA */
+
+struct gdma_verify_ver_resp {
+	struct gdma_resp_hdr hdr;
+	uint64_t gdma_protocol_ver;
+	uint64_t pf_cap_flags1;
+	uint64_t pf_cap_flags2;
+	uint64_t pf_cap_flags3;
+	uint64_t pf_cap_flags4;
+}; /* HW DATA */
+
+/* GDMA_QUERY_MAX_RESOURCES */
+struct gdma_query_max_resources_resp {
+	struct gdma_resp_hdr hdr;
+	uint32_t status;
+	uint32_t max_sq;
+	uint32_t max_rq;
+	uint32_t max_cq;
+	uint32_t max_eq;
+	uint32_t max_db;
+	uint32_t max_mst;
+	uint32_t max_cq_mod_ctx;
+	uint32_t max_mod_cq;
+	uint32_t max_msix;
+}; /* HW DATA */
+
+/* GDMA_LIST_DEVICES */
+struct gdma_list_devices_resp {
+	struct gdma_resp_hdr hdr;
+	uint32_t num_of_devs;
+	uint32_t reserved;
+	struct gdma_dev_id devs[64];
+}; /* HW DATA */
+
+/* GDMA_REGISTER_DEVICE */
+struct gdma_register_device_resp {
+	struct gdma_resp_hdr hdr;
+	uint32_t pdid;
+	uint32_t gpa_mkey;
+	uint32_t db_id;
+}; /* HW DATA */
+
+/* GDMA_CREATE_QUEUE */
+struct gdma_create_queue_req {
+	struct gdma_req_hdr hdr;
+	uint32_t type;
+	uint32_t reserved1;
+	uint32_t pdid;
+	uint32_t doolbell_id;
+	uint64_t gdma_region;
+	uint32_t reserved2;
+	uint32_t queue_size;
+	uint32_t log2_throttle_limit;
+	uint32_t eq_pci_msix_index;
+	uint32_t cq_mod_ctx_id;
+	uint32_t cq_parent_eq_id;
+	uint8_t  rq_drop_on_overrun;
+	uint8_t  rq_err_on_wqe_overflow;
+	uint8_t  rq_chain_rec_wqes;
+	uint8_t  sq_hw_db;
+	uint32_t reserved3;
+}; /* HW DATA */
+
+struct gdma_create_queue_resp {
+	struct gdma_resp_hdr hdr;
+	uint32_t queue_index;
+}; /* HW DATA */
+
+/* GDMA_DISABLE_QUEUE */
+struct gdma_disable_queue_req {
+	struct gdma_req_hdr hdr;
+	uint32_t type;
+	uint32_t queue_index;
+	uint32_t alloc_res_id_on_creation;
+}; /* HW DATA */
+
+/* GDMA_CREATE_DMA_REGION */
+struct gdma_create_dma_region_req {
+	struct gdma_req_hdr hdr;
+
+	/* The total size of the DMA region */
+	uint64_t length;
+
+	/* The offset in the first page */
+	uint32_t offset_in_page;
+
+	/* enum gdma_page_type */
+	uint32_t gdma_page_type;
+
+	/* The total number of pages */
+	uint32_t page_count;
+
+	/* If page_addr_list_len is smaller than page_count,
+	 * the remaining page addresses will be added via the
+	 * message GDMA_DMA_REGION_ADD_PAGES.
+	 */
+	uint32_t page_addr_list_len;
+	uint64_t page_addr_list[];
+}; /* HW DATA */
+
+struct gdma_create_dma_region_resp {
+	struct gdma_resp_hdr hdr;
+	uint64_t gdma_region;
+}; /* HW DATA */
+
+/* GDMA_DMA_REGION_ADD_PAGES */
+struct gdma_dma_region_add_pages_req {
+	struct gdma_req_hdr hdr;
+
+	uint64_t gdma_region;
+
+	uint32_t page_addr_list_len;
+	uint32_t reserved3;
+
+	uint64_t page_addr_list[];
+}; /* HW DATA */
+
+/* GDMA_DESTROY_DMA_REGION */
+struct gdma_destroy_dma_region_req {
+	struct gdma_req_hdr hdr;
+
+	uint64_t gdma_region;
+}; /* HW DATA */
+
+int mana_gd_verify_vf_version(device_t dev);
+
+int mana_gd_register_device(struct gdma_dev *gd);
+int mana_gd_deregister_device(struct gdma_dev *gd);
+
+int mana_gd_post_work_request(struct gdma_queue *wq,
+    const struct gdma_wqe_request *wqe_req,
+    struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_post_and_ring(struct gdma_queue *queue,
+    const struct gdma_wqe_request *wqe,
+    struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_alloc_res_map(uint32_t res_avil, struct gdma_resource *r,
+    const char *lock_name);
+void mana_gd_free_res_map(struct gdma_resource *r);
+
+void mana_gd_wq_ring_doorbell(struct gdma_context *gc,
+    struct gdma_queue *queue);
+
+int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
+    struct gdma_mem_info *gmi);
+
+void mana_gd_free_memory(struct gdma_mem_info *gmi);
+
+void mana_gd_dma_map_paddr(void *arg, bus_dma_segment_t *segs,
+    int nseg, int error);
+
+int mana_gd_send_request(struct gdma_context *gc, uint32_t req_len,
+    const void *req, uint32_t resp_len, void *resp);
+#endif /* _GDMA_H */
diff --git a/sys/dev/mana/gdma_main.c b/sys/dev/mana/gdma_main.c
new file mode 100644
index 000000000000..910992ce17a4
--- /dev/null
+++ b/sys/dev/mana/gdma_main.c
@@ -0,0 +1,1961 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <sys/eventhandler.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/in_cksum.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include "gdma_util.h"
+#include "mana.h"
+
+
+static mana_vendor_id_t mana_id_table[] = {
+    { PCI_VENDOR_ID_MICROSOFT, PCI_DEV_ID_MANA_VF},
+    /* Last entry */
+    { 0, 0}
+};
+
+static inline uint32_t
+mana_gd_r32(struct gdma_context *g, uint64_t offset)
+{
+	uint32_t v = bus_space_read_4(g->gd_bus.bar0_t,
+	    g->gd_bus.bar0_h, offset);
+	rmb();
+	return (v);
+}
+
+#if defined(__amd64__)
+static inline uint64_t
+mana_gd_r64(struct gdma_context *g, uint64_t offset)
+{
+	uint64_t v = bus_space_read_8(g->gd_bus.bar0_t,
+	    g->gd_bus.bar0_h, offset);
+	rmb();
+	return (v);
+}
+#else
+static inline uint64_t
+mana_gd_r64(struct gdma_context *g, uint64_t offset)
+{
+	uint64_t v;
+	uint32_t *vp = (uint32_t *)&v;
+
+	*vp =  mana_gd_r32(g, offset);
+	*(vp + 1) = mana_gd_r32(g, offset + 4);
+	rmb();
+	return (v);
+}
+#endif
+
+static int
+mana_gd_query_max_resources(device_t dev)
+{
+	struct gdma_context *gc = device_get_softc(dev);
+	struct gdma_query_max_resources_resp resp = {};
+	struct gdma_general_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+	    sizeof(req), sizeof(resp));
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		device_printf(gc->dev,
+		   "Failed to query resource info: %d, 0x%x\n",
+		   err, resp.hdr.status);
+		return err ? err : EPROTO;
+	}
+
+	mana_dbg(NULL, "max_msix %u, max_eq %u, max_cq %u, "
+	    "max_sq %u, max_rq %u\n",
+	    resp.max_msix, resp.max_eq, resp.max_cq,
+	    resp.max_sq, resp.max_rq);
+
+	if (gc->num_msix_usable > resp.max_msix)
+		gc->num_msix_usable = resp.max_msix;
+
+	if (gc->num_msix_usable <= 1)
+		return ENOSPC;
+
+	gc->max_num_queues = mp_ncpus;
+	if (gc->max_num_queues > MANA_MAX_NUM_QUEUES)
+		gc->max_num_queues = MANA_MAX_NUM_QUEUES;
+
+	if (gc->max_num_queues > resp.max_eq)
+		gc->max_num_queues = resp.max_eq;
+
+	if (gc->max_num_queues > resp.max_cq)
+		gc->max_num_queues = resp.max_cq;
+
+	if (gc->max_num_queues > resp.max_sq)
+		gc->max_num_queues = resp.max_sq;
+
+	if (gc->max_num_queues > resp.max_rq)
+		gc->max_num_queues = resp.max_rq;
+
+	return 0;
+}
+
+static int
+mana_gd_detect_devices(device_t dev)
+{
+	struct gdma_context *gc = device_get_softc(dev);
+	struct gdma_list_devices_resp resp = {};
+	struct gdma_general_req req = {};
+	struct gdma_dev_id gd_dev;
+	uint32_t i, max_num_devs;
+	uint16_t dev_type;
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req),
+	    sizeof(resp));
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		device_printf(gc->dev,
+		    "Failed to detect devices: %d, 0x%x\n", err,
+		    resp.hdr.status);
*** 7409 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202108201201.17KC1SmP006699>