Blob Blame History Raw
From 4ce4a535b66d38657317338d0d79809d11f71781 Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <denis.kirjanov@suse.com>
Date: Tue, 29 Oct 2019 17:31:04 +0100
Subject: [PATCH] RDMA/efa: Add Amazon EFA driver
Git-commit: 01edac3aa2b9002860f405f1af23536386d45db0
Git-commit: 853f56523565c7d3526799d3e2dc503128c336ec
Git-commit: 43eaa49d511cf2f5a9fa56316f02cb80ecf6578b
Git-commit: cd9b3d597054ce5f8e5e19359d7762e253c9019f
Git-commit: 0420e542569b2e56df29d243d09f5974bb6594a4
Git-commit: e9c6c537308895e8b2b98ad15e82938935bdf1cf
Git-commit: 40909f664d279765af430acc5db348a0b71c9b0a
Git-commit: b7f5e880f3774ff1934ce272e1c3b673a3a9c414
Git-commit: f23afd75fc998ce002400b7687f942ce5207909a
Git-commit: 62a38e704d5720e2b73d0e1d6dfc54441ee75985
Git-commit: 4f240dfec6bcc852b124ea7c419fb590949fbd4c
Git-commit: 255efcaeb623dd9777523b98d29aa5a0792d9245
Git-commit: e0e3f39759151fb1b445a0dbc5d6a14f3e1732aa
Git-commit: 4d50e084c560fa952e235d092cf53c86d8bf4b7b
Git-commit: 2367d00e2ca3dfda1b30f89d854cb1669b941e7f
Git-commit: b9560a419bfd498279333387817adcf5faef2825
Git-commit: 72c6ec18eb6161c8fc672ae96ec5c77df4d07405
Git-commit: 7a15414252ae4f1d450462d83f883b2d9d8036ee
Git-commit: a52c8e2469c30cf7ac453d624aed9c168b23d1af
Git-commit: e39afe3d6dbd908d8fd189571a3c1561088a86c2
Git-commit: 529254340c7f16d59b928e36568597c603bae917
Git-commit: 7a5834e456f7fb3eca9b63af2a6bc7f460ae482f
Git-commit: 836a0fbb3e76f704ad65ddfb57f00725245e509b
Git-commit: 40ddb3f020834f9afb7aab31385994811f4db259
Git-commit: b41f75724ab8aeeba1001e63c31db8623432f001
Git-commit: 4b06843d404855063decbccd206d3dc927280b0c
Git-commit: 371bb62158d53c1fc33e2fb9b6aeb9522caf6cf4
Git-commit: bcde9a83b13ede042fd76e4cf0b759b6d6c0abe9
Git-commit: 16e9111e9ee3edfcf6df120080378afc620cb4d3
Git-commit: cfa1f5f27c79997926f89df5dd1b4f8c920b9623
Git-commit: 72a7720fca37fec0daf295923f17ac5d88a613e1
Git-commit: 958b6813f0c077c45a36d0a10b5bdcd27216eabe
Git-commit: 1bc5ba836e3ba02b8c7981a1fb453fe33513526d
Git-commit: a3f4b8e31822650f815f4219a4abde2001fd2fd0
Git-commit: 64c264872b8879e2ab9017eefe9514d4c045c60e
Patch-mainline: v5.4-rc6
References: jsc#SLE-4805

Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
 drivers/infiniband/Kconfig                    |    1 +
 drivers/infiniband/hw/Makefile                |    1 +
 drivers/infiniband/hw/efa/Kconfig             |   15 +
 drivers/infiniband/hw/efa/Makefile            |    9 +
 drivers/infiniband/hw/efa/efa-abi.h           |  143 +
 drivers/infiniband/hw/efa/efa.h               |  252 ++
 .../infiniband/hw/efa/efa_admin_cmds_defs.h   |  794 ++++++
 drivers/infiniband/hw/efa/efa_admin_defs.h    |  136 +
 drivers/infiniband/hw/efa/efa_com.c           | 1101 ++++++++
 drivers/infiniband/hw/efa/efa_com.h           |  144 +
 drivers/infiniband/hw/efa/efa_com_cmd.c       |  765 ++++++
 drivers/infiniband/hw/efa/efa_com_cmd.h       |  293 ++
 drivers/infiniband/hw/efa/efa_common_defs.h   |   18 +
 drivers/infiniband/hw/efa/efa_main.c          |  627 +++++
 drivers/infiniband/hw/efa/efa_regs_defs.h     |  113 +
 drivers/infiniband/hw/efa/efa_verbs.c         | 2416 +++++++++++++++++
 drivers/infiniband/hw/efa/kcompat.h           |  207 ++
 include/uapi/rdma/rdma_user_ioctl_cmds.h      |    1 +
 18 files changed, 7036 insertions(+)
 create mode 100644 drivers/infiniband/hw/efa/Kconfig
 create mode 100644 drivers/infiniband/hw/efa/Makefile
 create mode 100644 drivers/infiniband/hw/efa/efa-abi.h
 create mode 100644 drivers/infiniband/hw/efa/efa.h
 create mode 100644 drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
 create mode 100644 drivers/infiniband/hw/efa/efa_admin_defs.h
 create mode 100644 drivers/infiniband/hw/efa/efa_com.c
 create mode 100644 drivers/infiniband/hw/efa/efa_com.h
 create mode 100644 drivers/infiniband/hw/efa/efa_com_cmd.c
 create mode 100644 drivers/infiniband/hw/efa/efa_com_cmd.h
 create mode 100644 drivers/infiniband/hw/efa/efa_common_defs.h
 create mode 100644 drivers/infiniband/hw/efa/efa_main.c
 create mode 100644 drivers/infiniband/hw/efa/efa_regs_defs.h
 create mode 100644 drivers/infiniband/hw/efa/efa_verbs.c
 create mode 100644 drivers/infiniband/hw/efa/kcompat.h

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 0a3ec7c726ec..0c60d51c1095 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -93,6 +93,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
+source "drivers/infiniband/hw/efa/Kconfig"
 source "drivers/infiniband/hw/i40iw/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 34c93abf0fe0..e3ebd17ee175 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA)		+= mthca/
 obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
+obj-$(CONFIG_INFINIBAND_EFA)            += efa/
 obj-$(CONFIG_INFINIBAND_I40IW)		+= i40iw/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
diff --git a/drivers/infiniband/hw/efa/Kconfig b/drivers/infiniband/hw/efa/Kconfig
new file mode 100644
index 000000000000..457e18ba1d57
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Amazon fabric device configuration
+#
+
+config INFINIBAND_EFA
+	tristate "Amazon Elastic Fabric Adapter (EFA) support"
+	depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN
+	depends on INFINIBAND_USER_ACCESS
+	help
+	  This driver supports Amazon Elastic Fabric Adapter (EFA).
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called efa.
diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile
new file mode 100644
index 000000000000..6e83083af0bc
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver.
+#
+
+obj-$(CONFIG_INFINIBAND_EFA) += efa.o
+
+efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o
diff --git a/drivers/infiniband/hw/efa/efa-abi.h b/drivers/infiniband/hw/efa/efa-abi.h
new file mode 100644
index 000000000000..5e372522960f
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa-abi.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef EFA_ABI_USER_H
+#define EFA_ABI_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define EFA_UVERBS_ABI_VERSION 1
+
+/*
+ * Keep structs aligned to 8 bytes.
+ * Keep reserved fields as arrays of __u8 named reserved_XXX where XXX is the
+ * hex bit offset of the field.
+ */
+
+enum efa_ibv_user_cmds_supp_udata {
+	EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0,
+	EFA_USER_CMDS_SUPP_UDATA_CREATE_AH    = 1 << 1,
+};
+
+struct efa_ibv_alloc_ucontext_resp {
+	__u32 comp_mask;
+	__u32 cmds_supp_udata_mask;
+	__u16 sub_cqs_per_cq;
+	__u16 inline_buf_size;
+	__u32 max_llq_size; /* bytes */
+};
+
+struct efa_ibv_alloc_pd_resp {
+	__u32 comp_mask;
+	__u16 pdn;
+	__u8 reserved_30[2];
+};
+
+struct efa_ibv_create_cq {
+	__u32 comp_mask;
+	__u32 cq_entry_size;
+	__u16 num_sub_cqs;
+	__u8 reserved_50[6];
+};
+
+struct efa_ibv_create_cq_resp {
+	__u32 comp_mask;
+	__u8 reserved_20[4];
+	__aligned_u64 q_mmap_key;
+	__aligned_u64 q_mmap_size;
+	__u16 cq_idx;
+	__u8 reserved_d0[6];
+};
+
+enum {
+	EFA_QP_DRIVER_TYPE_SRD = 0,
+};
+
+struct efa_ibv_create_qp {
+	__u32 comp_mask;
+	__u32 rq_ring_size; /* bytes */
+	__u32 sq_ring_size; /* bytes */
+	__u32 driver_qp_type;
+};
+
+struct efa_ibv_create_qp_resp {
+	__u32 comp_mask;
+	/* the offset inside the page of the rq db */
+	__u32 rq_db_offset;
+	/* the offset inside the page of the sq db */
+	__u32 sq_db_offset;
+	/* the offset inside the page of descriptors buffer */
+	__u32 llq_desc_offset;
+	__aligned_u64 rq_mmap_key;
+	__aligned_u64 rq_mmap_size;
+	__aligned_u64 rq_db_mmap_key;
+	__aligned_u64 sq_db_mmap_key;
+	__aligned_u64 llq_desc_mmap_key;
+	__u16 send_sub_cq_idx;
+	__u16 recv_sub_cq_idx;
+	__u8 reserved_1e0[4];
+};
+
+struct efa_ibv_create_ah_resp {
+	__u32 comp_mask;
+	__u16 efa_address_handle;
+	__u8 reserved_30[2];
+};
+
+struct efa_ibv_ex_query_device_resp {
+	__u32 comp_mask;
+	__u32 max_sq_wr;
+	__u32 max_rq_wr;
+	__u16 max_sq_sge;
+	__u16 max_rq_sge;
+};
+
+#ifdef HAVE_CUSTOM_COMMANDS
+/******************************************************************************/
+/*                            EFA CUSTOM COMMANDS                             */
+/******************************************************************************/
+#include <rdma/ib_user_verbs.h>
+
+enum efa_everbs_commands {
+	EFA_EVERBS_CMD_GET_AH = 1,
+	EFA_EVERBS_CMD_GET_EX_DEV_ATTRS,
+	EFA_EVERBS_CMD_MAX,
+};
+
+struct efa_everbs_get_ah {
+	__u32 comp_mask;
+	__u16 pdn;
+	__u8 reserved_30[2];
+	__aligned_u64 response;
+	__aligned_u64 user_handle;
+	__u8 gid[16];
+};
+
+struct efa_everbs_get_ah_resp {
+	__u32 comp_mask;
+	__u16 efa_address_handle;
+	__u8 reserved_30[2];
+};
+
+struct efa_everbs_get_ex_dev_attrs {
+	__u32 comp_mask;
+	__u8 reserved_20[4];
+	__aligned_u64 response;
+};
+
+struct efa_everbs_get_ex_dev_attrs_resp {
+	__u32 comp_mask;
+	__u32 max_sq_wr;
+	__u32 max_rq_wr;
+	__u16 max_sq_sge;
+	__u16 max_rq_sge;
+};
+#endif /* HAVE_CUSTOM_COMMANDS */
+
+#endif /* EFA_ABI_USER_H */
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
new file mode 100644
index 000000000000..dcdcbd86b12a
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_H_
+#define _EFA_H_
+
+#include "kcompat.h"
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/version.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "efa-abi.h"
+#include "efa_com_cmd.h"
+
+#define DRV_MODULE_NAME         "efa"
+#define DEVICE_NAME             "Elastic Fabric Adapter (EFA)"
+
+#define EFA_IRQNAME_SIZE        40
+
+/* 1 for AENQ + ADMIN */
+#define EFA_NUM_MSIX_VEC                  1
+#define EFA_MGMNT_MSIX_VEC_IDX            0
+
+struct efa_irq {
+	irq_handler_t handler;
+	void *data;
+	int cpu;
+	u32 vector;
+	cpumask_t affinity_hint_mask;
+	char name[EFA_IRQNAME_SIZE];
+};
+
+struct efa_sw_stats {
+	atomic64_t alloc_pd_err;
+	atomic64_t create_qp_err;
+	atomic64_t create_cq_err;
+	atomic64_t reg_mr_err;
+	atomic64_t alloc_ucontext_err;
+	atomic64_t create_ah_err;
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_stats {
+	struct efa_sw_stats sw_stats;
+	atomic64_t keep_alive_rcvd;
+};
+
+struct efa_dev {
+	struct ib_device ibdev;
+	struct efa_com_dev edev;
+	struct pci_dev *pdev;
+	struct efa_com_get_device_attr_result dev_attr;
+
+	u64 reg_bar_addr;
+	u64 reg_bar_len;
+	u64 mem_bar_addr;
+	u64 mem_bar_len;
+	u64 db_bar_addr;
+	u64 db_bar_len;
+	u8 addr[EFA_GID_SIZE];
+	u32 mtu;
+
+	int admin_msix_vector_idx;
+	struct efa_irq admin_irq;
+
+	struct efa_stats stats;
+};
+
+struct efa_ucontext {
+	struct ib_ucontext ibucontext;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+	struct xarray mmap_xa;
+#else
+	/* Protects ucontext state */
+	struct mutex lock;
+	struct list_head pending_mmaps;
+#endif
+	u32 mmap_xa_page;
+	u16 uarn;
+};
+
+struct efa_pd {
+	struct ib_pd ibpd;
+	u16 pdn;
+};
+
+struct efa_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+};
+
+struct efa_cq {
+	struct ib_cq ibcq;
+	struct efa_ucontext *ucontext;
+	dma_addr_t dma_addr;
+	void *cpu_addr;
+	size_t size;
+	u16 cq_idx;
+};
+
+struct efa_qp {
+	struct ib_qp ibqp;
+	dma_addr_t rq_dma_addr;
+	void *rq_cpu_addr;
+	size_t rq_size;
+	enum ib_qp_state state;
+	u32 qp_handle;
+	u32 max_send_wr;
+	u32 max_recv_wr;
+	u32 max_send_sge;
+	u32 max_recv_sge;
+	u32 max_inline_data;
+};
+
+struct efa_ah {
+	struct ib_ah ibah;
+	u16 ah;
+	/* dest_addr */
+	u8 id[EFA_GID_SIZE];
+};
+
+int efa_query_device(struct ib_device *ibdev,
+		     struct ib_device_attr *props,
+		     struct ib_udata *udata);
+int efa_query_port(struct ib_device *ibdev, u8 port,
+		   struct ib_port_attr *props);
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask,
+		 struct ib_qp_init_attr *qp_init_attr);
+int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+		  union ib_gid *gid);
+int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+		   u16 *pkey);
+#ifdef HAVE_ALLOC_PD_NO_UCONTEXT
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+#else
+int efa_alloc_pd(struct ib_pd *ibpd,
+		 struct ib_ucontext *ibucontext,
+		 struct ib_udata *udata);
+#endif
+#ifdef HAVE_DEALLOC_PD_UDATA
+void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+#elif defined(HAVE_PD_CORE_ALLOCATION)
+void efa_dealloc_pd(struct ib_pd *ibpd);
+#else
+int efa_dealloc_pd(struct ib_pd *ibpd);
+struct ib_pd *efa_kzalloc_pd(struct ib_device *ibdev,
+			     struct ib_ucontext *ibucontext,
+			     struct ib_udata *udata);
+#endif
+#ifdef HAVE_DESTROY_QP_UDATA
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+#else
+int efa_destroy_qp(struct ib_qp *ibqp);
+#endif
+struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata);
+#ifdef HAVE_IB_VOID_DESTROY_CQ
+void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+#elif defined(HAVE_DESTROY_CQ_UDATA)
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+#else
+int efa_destroy_cq(struct ib_cq *ibcq);
+#endif
+
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		  struct ib_udata *udata);
+#ifndef HAVE_CQ_CORE_ALLOCATION
+#ifdef HAVE_CREATE_CQ_NO_UCONTEXT
+struct ib_cq *efa_kzalloc_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_udata *udata);
+#else
+struct ib_cq *efa_kzalloc_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_ucontext *ibucontext,
+			     struct ib_udata *udata);
+#endif
+#endif
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+			 u64 virt_addr, int access_flags,
+			 struct ib_udata *udata);
+#ifdef HAVE_DEREG_MR_UDATA
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+#else
+int efa_dereg_mr(struct ib_mr *ibmr);
+#endif
+int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+			   struct ib_port_immutable *immutable);
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
+#ifdef HAVE_UCONTEXT_CORE_ALLOCATION
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
+#else
+int efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
+struct ib_ucontext *efa_kzalloc_ucontext(struct ib_device *ibdev,
+					 struct ib_udata *udata);
+#endif
+int efa_mmap(struct ib_ucontext *ibucontext,
+	     struct vm_area_struct *vma);
+int efa_create_ah(struct ib_ah *ibah,
+		  struct rdma_ah_attr *ah_attr,
+		  u32 flags,
+		  struct ib_udata *udata);
+#ifndef HAVE_AH_CORE_ALLOCATION
+#ifdef HAVE_CREATE_DESTROY_AH_FLAGS
+struct ib_ah *efa_kzalloc_ah(struct ib_pd *ibpd,
+			     struct rdma_ah_attr *ah_attr,
+			     u32 flags,
+			     struct ib_udata *udata);
+#else
+struct ib_ah *efa_kzalloc_ah(struct ib_pd *ibpd,
+			     struct rdma_ah_attr *ah_attr,
+			     struct ib_udata *udata);
+#endif
+#endif
+#ifdef HAVE_AH_CORE_ALLOCATION
+void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+#elif defined(HAVE_CREATE_DESTROY_AH_FLAGS)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+#else
+int efa_destroy_ah(struct ib_ah *ibah);
+#endif
+#ifndef HAVE_NO_KVERBS_DRIVERS
+int efa_post_send(struct ib_qp *ibqp,
+		  const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr);
+
+int efa_post_recv(struct ib_qp *ibqp,
+		  const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr);
+
+int efa_poll_cq(struct ib_cq *ibcq, int num_entries,
+		struct ib_wc *wc);
+int efa_req_notify_cq(struct ib_cq *ibcq,
+		      enum ib_cq_notify_flags flags);
+struct ib_mr *efa_get_dma_mr(struct ib_pd *ibpd, int acc);
+#endif
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_udata *udata);
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+					 u8 port_num);
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+		     u8 port_num, int index);
+
+#endif /* _EFA_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
new file mode 100644
index 000000000000..2be0469d545f
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -0,0 +1,794 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_CMDS_H_
+#define _EFA_ADMIN_CMDS_H_
+
+#define EFA_ADMIN_API_VERSION_MAJOR          0
+#define EFA_ADMIN_API_VERSION_MINOR          1
+
+/* EFA admin queue opcodes */
+enum efa_admin_aq_opcode {
+	EFA_ADMIN_CREATE_QP                         = 1,
+	EFA_ADMIN_MODIFY_QP                         = 2,
+	EFA_ADMIN_QUERY_QP                          = 3,
+	EFA_ADMIN_DESTROY_QP                        = 4,
+	EFA_ADMIN_CREATE_AH                         = 5,
+	EFA_ADMIN_DESTROY_AH                        = 6,
+	EFA_ADMIN_REG_MR                            = 7,
+	EFA_ADMIN_DEREG_MR                          = 8,
+	EFA_ADMIN_CREATE_CQ                         = 9,
+	EFA_ADMIN_DESTROY_CQ                        = 10,
+	EFA_ADMIN_GET_FEATURE                       = 11,
+	EFA_ADMIN_SET_FEATURE                       = 12,
+	EFA_ADMIN_GET_STATS                         = 13,
+	EFA_ADMIN_ALLOC_PD                          = 14,
+	EFA_ADMIN_DEALLOC_PD                        = 15,
+	EFA_ADMIN_ALLOC_UAR                         = 16,
+	EFA_ADMIN_DEALLOC_UAR                       = 17,
+	EFA_ADMIN_MAX_OPCODE                        = 17,
+};
+
+enum efa_admin_aq_feature_id {
+	EFA_ADMIN_DEVICE_ATTR                       = 1,
+	EFA_ADMIN_AENQ_CONFIG                       = 2,
+	EFA_ADMIN_NETWORK_ATTR                      = 3,
+	EFA_ADMIN_QUEUE_ATTR                        = 4,
+	EFA_ADMIN_HW_HINTS                          = 5,
+	EFA_ADMIN_FEATURES_OPCODE_NUM               = 8,
+};
+
+/* QP transport type */
+enum efa_admin_qp_type {
+	/* Unreliable Datagram */
+	EFA_ADMIN_QP_TYPE_UD                        = 1,
+	/* Scalable Reliable Datagram */
+	EFA_ADMIN_QP_TYPE_SRD                       = 2,
+};
+
+/* QP state */
+enum efa_admin_qp_state {
+	EFA_ADMIN_QP_STATE_RESET                    = 0,
+	EFA_ADMIN_QP_STATE_INIT                     = 1,
+	EFA_ADMIN_QP_STATE_RTR                      = 2,
+	EFA_ADMIN_QP_STATE_RTS                      = 3,
+	EFA_ADMIN_QP_STATE_SQD                      = 4,
+	EFA_ADMIN_QP_STATE_SQE                      = 5,
+	EFA_ADMIN_QP_STATE_ERR                      = 6,
+};
+
+enum efa_admin_get_stats_type {
+	EFA_ADMIN_GET_STATS_TYPE_BASIC              = 0,
+};
+
+enum efa_admin_get_stats_scope {
+	EFA_ADMIN_GET_STATS_SCOPE_ALL               = 0,
+	EFA_ADMIN_GET_STATS_SCOPE_QUEUE             = 1,
+};
+
+enum efa_admin_modify_qp_mask_bits {
+	EFA_ADMIN_QP_STATE_BIT                      = 0,
+	EFA_ADMIN_CUR_QP_STATE_BIT                  = 1,
+	EFA_ADMIN_QKEY_BIT                          = 2,
+	EFA_ADMIN_SQ_PSN_BIT                        = 3,
+	EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT       = 4,
+};
+
+/*
+ * QP allocation sizes, converted by fabric QueuePair (QP) create command
+ * from QP capabilities.
+ */
+struct efa_admin_qp_alloc_size {
+	/* Send descriptor ring size in bytes */
+	u32 send_queue_ring_size;
+
+	/* Max number of WQEs that can be outstanding on send queue. */
+	u32 send_queue_depth;
+
+	/*
+	 * Recv descriptor ring size in bytes, sufficient for user-provided
+	 * number of WQEs
+	 */
+	u32 recv_queue_ring_size;
+
+	/* Max number of WQEs that can be outstanding on recv queue */
+	u32 recv_queue_depth;
+};
+
+struct efa_admin_create_qp_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* Protection Domain associated with this QP */
+	u16 pd;
+
+	/* QP type */
+	u8 qp_type;
+
+	/*
+	 * 0 : sq_virt - If set, SQ ring base address is
+	 *    virtual (IOVA returned by MR registration)
+	 * 1 : rq_virt - If set, RQ ring base address is
+	 *    virtual (IOVA returned by MR registration)
+	 * 7:2 : reserved - MBZ
+	 */
+	u8 flags;
+
+	/*
+	 * Send queue (SQ) ring base physical address. This field is not
+	 * used if this is a Low Latency Queue(LLQ).
+	 */
+	u64 sq_base_addr;
+
+	/* Receive queue (RQ) ring base address. */
+	u64 rq_base_addr;
+
+	/* Index of CQ to be associated with Send Queue completions */
+	u32 send_cq_idx;
+
+	/* Index of CQ to be associated with Recv Queue completions */
+	u32 recv_cq_idx;
+
+	/*
+	 * Memory registration key for the SQ ring, used only when not in
+	 * LLQ mode and base address is virtual
+	 */
+	u32 sq_l_key;
+
+	/*
+	 * Memory registration key for the RQ ring, used only when base
+	 * address is virtual
+	 */
+	u32 rq_l_key;
+
+	/* Requested QP allocation sizes */
+	struct efa_admin_qp_alloc_size qp_alloc_size;
+
+	/* UAR number */
+	u16 uar;
+
+	/* MBZ */
+	u16 reserved;
+
+	/* MBZ */
+	u32 reserved2;
+};
+
+struct efa_admin_create_qp_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/* Opaque handle to be used for consequent operations on the QP */
+	u32 qp_handle;
+
+	/* QP number in the given EFA virtual device */
+	u16 qp_num;
+
+	/* MBZ */
+	u16 reserved;
+
+	/* Index of sub-CQ for Send Queue completions */
+	u16 send_sub_cq_idx;
+
+	/* Index of sub-CQ for Receive Queue completions */
+	u16 recv_sub_cq_idx;
+
+	/* SQ doorbell address, as offset to PCIe DB BAR */
+	u32 sq_db_offset;
+
+	/* RQ doorbell address, as offset to PCIe DB BAR */
+	u32 rq_db_offset;
+
+	/*
+	 * low latency send queue ring base address as an offset to PCIe
+	 * MMIO LLQ_MEM BAR
+	 */
+	u32 llq_descriptors_offset;
+};
+
+struct efa_admin_modify_qp_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/*
+	 * Mask indicating which fields should be updated see enum
+	 * efa_admin_modify_qp_mask_bits
+	 */
+	u32 modify_mask;
+
+	/* QP handle returned by create_qp command */
+	u32 qp_handle;
+
+	/* QP state */
+	u32 qp_state;
+
+	/* Override current QP state (before applying the transition) */
+	u32 cur_qp_state;
+
+	/* QKey */
+	u32 qkey;
+
+	/* SQ PSN */
+	u32 sq_psn;
+
+	/* Enable async notification when SQ is drained */
+	u8 sq_drained_async_notify;
+
+	/* MBZ */
+	u8 reserved1;
+
+	/* MBZ */
+	u16 reserved2;
+};
+
+struct efa_admin_modify_qp_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_query_qp_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* QP handle returned by create_qp command */
+	u32 qp_handle;
+};
+
+struct efa_admin_query_qp_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/* QP state */
+	u32 qp_state;
+
+	/* QKey */
+	u32 qkey;
+
+	/* SQ PSN */
+	u32 sq_psn;
+
+	/* Indicates that draining is in progress */
+	u8 sq_draining;
+
+	/* MBZ */
+	u8 reserved1;
+
+	/* MBZ */
+	u16 reserved2;
+};
+
+struct efa_admin_destroy_qp_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* QP handle returned by create_qp command */
+	u32 qp_handle;
+};
+
+struct efa_admin_destroy_qp_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Create Address Handle command parameters. Must not be called more than
+ * once for the same destination
+ */
+struct efa_admin_create_ah_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* Destination address in network byte order */
+	u8 dest_addr[16];
+
+	/* PD number */
+	u16 pd;
+
+	u16 reserved;
+};
+
+struct efa_admin_create_ah_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/* Target interface address handle (opaque) */
+	u16 ah;
+
+	u16 reserved;
+};
+
+struct efa_admin_destroy_ah_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* Target interface address handle (opaque) */
+	u16 ah;
+
+	/* PD number */
+	u16 pd;
+};
+
+struct efa_admin_destroy_ah_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Registration of MemoryRegion, required for QP working with Virtual
+ * Addresses. In standard verbs semantics, region length is limited to 2GB
+ * space, but EFA offers larger MR support for large memory space, to ease
+ * on users working with very large datasets (i.e. full GPU memory mapping).
+ */
+struct efa_admin_reg_mr_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* Protection Domain */
+	u16 pd;
+
+	/* MBZ */
+	u16 reserved16_w1;
+
+	/* Physical Buffer List, each element is page-aligned. */
+	union {
+		/*
+		 * Inline array of guest-physical page addresses of user
+		 * memory pages (optimization for short region
+		 * registrations)
+		 */
+		u64 inline_pbl_array[4];
+
+		/* points to PBL (direct or indirect, chained if needed) */
+		struct efa_admin_ctrl_buff_info pbl;
+	} pbl;
+
+	/* Memory region length, in bytes. */
+	u64 mr_length;
+
+	/*
+	 * flags and page size
+	 * 4:0 : phys_page_size_shift - page size is (1 <<
+	 *    phys_page_size_shift). Page size is used for
+	 *    building the Virtual to Physical address mapping
+	 * 6:5 : reserved - MBZ
+	 * 7 : mem_addr_phy_mode_en - Enable bit for physical
+	 *    memory registration (no translation), can be used
+	 *    only by privileged clients. If set, PBL must
+	 *    contain a single entry.
+	 */
+	u8 flags;
+
+	/*
+	 * permissions
+	 * 0 : local_write_enable - Write permissions: value
+	 *    of 1 needed for RQ buffers and for RDMA write
+	 * 7:1 : reserved1 - remote access flags, etc
+	 */
+	u8 permissions;
+
+	u16 reserved16_w5;
+
+	/* number of pages in PBL (redundant, could be calculated) */
+	u32 page_num;
+
+	/*
+	 * IO Virtual Address associated with this MR. If
+	 * mem_addr_phy_mode_en is set, contains the physical address of
+	 * the region.
+	 */
+	u64 iova;
+};
+
+struct efa_admin_reg_mr_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/*
+	 * L_Key, to be used in conjunction with local buffer references in
+	 * SQ and RQ WQE, or with virtual RQ/CQ rings
+	 */
+	u32 l_key;
+
+	/*
+	 * R_Key, to be used in RDMA messages to refer to remotely accessed
+	 * memory region
+	 */
+	u32 r_key;
+};
+
+struct efa_admin_dereg_mr_cmd {
+	/* Common Admin Queue descriptor */
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/* L_Key, memory region's l_key */
+	u32 l_key;
+};
+
+struct efa_admin_dereg_mr_resp {
+	/* Common Admin Queue completion descriptor */
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_create_cq_cmd {
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	/*
+	 * 4:0 : reserved5
+	 * 5 : interrupt_mode_enabled - if set, cq operates
+	 *    in interrupt mode (i.e. CQ events and MSI-X are
+	 *    generated), otherwise - polling
+	 * 6 : virt - If set, ring base address is virtual
+	 *    (IOVA returned by MR registration)
+	 * 7 : reserved6
+	 */
+	u8 cq_caps_1;
+
+	/*
+	 * 4:0 : cq_entry_size_words - size of CQ entry in
+	 *    32-bit words, valid values: 4, 8.
+	 * 7:5 : reserved7
+	 */
+	u8 cq_caps_2;
+
+	/* completion queue depth in # of entries. must be power of 2 */
+	u16 cq_depth;
+
+	/* msix vector assigned to this cq */
+	u32 msix_vector_idx;
+
+	/*
+	 * CQ ring base address, virtual or physical depending on 'virt'
+	 * flag
+	 */
+	struct efa_common_mem_addr cq_ba;
+
+	/*
+	 * Memory registration key for the ring, used only when base
+	 * address is virtual
+	 */
+	u32 l_key;
+
+	/*
+	 * number of sub cqs - must be equal to sub_cqs_per_cq of queue
+	 *    attributes.
+	 */
+	u16 num_sub_cqs;
+
+	/* UAR number */
+	u16 uar;
+};
+
+struct efa_admin_create_cq_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	u16 cq_idx;
+
+	/* actual cq depth in number of entries */
+	u16 cq_actual_depth;
+};
+
+struct efa_admin_destroy_cq_cmd {
+	struct efa_admin_aq_common_desc aq_common_desc;
+
+	u16 cq_idx;
+
+	u16 reserved1;
+};
+
+struct efa_admin_destroy_cq_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * EFA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct efa_admin_aq_get_stats_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		/* command specific inline data */
+		u32 inline_data_w1[3];
+
+		struct efa_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	/* stats type as defined in enum efa_admin_get_stats_type */
+	u8 type;
+
+	/* stats scope defined in enum efa_admin_get_stats_scope */
+	u8 scope;
+
+	u16 scope_modifier;
+};
+
+struct efa_admin_basic_stats {
+	u64 tx_bytes;
+
+	u64 tx_pkts;
+
+	u64 rx_bytes;
+
+	u64 rx_pkts;
+
+	u64 rx_drops;
+};
+
+struct efa_admin_acq_get_stats_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	struct efa_admin_basic_stats basic_stats;
+};
+
+struct efa_admin_get_set_feature_common_desc {
+	/*
+	 * 1:0 : select - 0x1 - current value; 0x3 - default
+	 *    value
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+
+	/* as appears in efa_admin_aq_feature_id */
+	u8 feature_id;
+
+	/* MBZ */
+	u16 reserved16;
+};
+
+struct efa_admin_feature_device_attr_desc {
+	/* Bitmap of efa_admin_aq_feature_id */
+	u64 supported_features;
+
+	/* Bitmap of supported page sizes in MR registrations */
+	u64 page_size_cap;
+
+	u32 fw_version;
+
+	u32 admin_api_version;
+
+	u32 device_version;
+
+	/* Bar used for SQ and RQ doorbells */
+	u16 db_bar;
+
+	/* Indicates how many bits are used physical address access */
+	u8 phys_addr_width;
+
+	/* Indicates how many bits are used virtual address access */
+	u8 virt_addr_width;
+};
+
+struct efa_admin_feature_queue_attr_desc {
+	/* The maximum number of queue pairs supported */
+	u32 max_qp;
+
+	u32 max_sq_depth;
+
+	/* max send wr used in inline-buf */
+	u32 inline_buf_size;
+
+	u32 max_rq_depth;
+
+	/* The maximum number of completion queues supported per VF */
+	u32 max_cq;
+
+	u32 max_cq_depth;
+
+	/* Number of sub-CQs to be created for each CQ */
+	u16 sub_cqs_per_cq;
+
+	u16 reserved;
+
+	/*
+	 * Maximum number of SGEs (buffs) allowed for a single send work
+	 *    queue element (WQE)
+	 */
+	u16 max_wr_send_sges;
+
+	/* Maximum number of SGEs allowed for a single recv WQE */
+	u16 max_wr_recv_sges;
+
+	/* The maximum number of memory regions supported */
+	u32 max_mr;
+
+	/* The maximum number of pages can be registered */
+	u32 max_mr_pages;
+
+	/* The maximum number of protection domains supported */
+	u32 max_pd;
+
+	/* The maximum number of address handles supported */
+	u32 max_ah;
+
+	/* The maximum size of LLQ in bytes */
+	u32 max_llq_size;
+};
+
+struct efa_admin_feature_aenq_desc {
+	/* bitmask for AENQ groups the device can report */
+	u32 supported_groups;
+
+	/* bitmask for AENQ groups to report */
+	u32 enabled_groups;
+};
+
+struct efa_admin_feature_network_attr_desc {
+	/* Raw address data in network byte order */
+	u8 addr[16];
+
+	u32 mtu;
+};
+
+/*
+ * When hint value is 0, hints capabilities are not supported or driver
+ * should use its own predefined value
+ */
+struct efa_admin_hw_hints {
+	/* value in ms */
+	u16 mmio_read_timeout;
+
+	/* value in ms */
+	u16 driver_watchdog_timeout;
+
+	/* value in ms */
+	u16 admin_completion_timeout;
+
+	/* poll interval in ms */
+	u16 poll_interval;
+};
+
+struct efa_admin_get_feature_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	struct efa_admin_ctrl_buff_info control_buffer;
+
+	struct efa_admin_get_set_feature_common_desc feature_common;
+
+	u32 raw[11];
+};
+
+struct efa_admin_get_feature_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+
+		struct efa_admin_feature_device_attr_desc device_attr;
+
+		struct efa_admin_feature_aenq_desc aenq;
+
+		struct efa_admin_feature_network_attr_desc network_attr;
+
+		struct efa_admin_feature_queue_attr_desc queue_attr;
+
+		struct efa_admin_hw_hints hw_hints;
+	} u;
+};
+
+struct efa_admin_set_feature_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	struct efa_admin_ctrl_buff_info control_buffer;
+
+	struct efa_admin_get_set_feature_common_desc feature_common;
+
+	union {
+		u32 raw[11];
+
+		/* AENQ configuration */
+		struct efa_admin_feature_aenq_desc aenq;
+	} u;
+};
+
+struct efa_admin_set_feature_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+	} u;
+};
+
+struct efa_admin_alloc_pd_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_pd_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/* PD number */
+	u16 pd;
+
+	/* MBZ */
+	u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	/* PD number */
+	u16 pd;
+
+	/* MBZ */
+	u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_alloc_uar_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_uar_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+
+	/* UAR number */
+	u16 uar;
+
+	/* MBZ */
+	u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_cmd {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	/* UAR number */
+	u16 uar;
+
+	/* MBZ */
+	u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_resp {
+	struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/* asynchronous event notification groups */
+enum efa_admin_aenq_group {
+	EFA_ADMIN_FATAL_ERROR                       = 1,
+	EFA_ADMIN_WARNING                           = 2,
+	EFA_ADMIN_NOTIFICATION                      = 3,
+	EFA_ADMIN_KEEP_ALIVE                        = 4,
+	EFA_ADMIN_AENQ_GROUPS_NUM                   = 5,
+};
+
+enum efa_admin_aenq_notification_syndrom {
+	EFA_ADMIN_SUSPEND                           = 0,
+	EFA_ADMIN_RESUME                            = 1,
+	EFA_ADMIN_UPDATE_HINTS                      = 2,
+};
+
+struct efa_admin_mmio_req_read_less_resp {
+	u16 req_id;
+
+	u16 reg_off;
+
+	/* value is valid when poll is cleared */
+	u32 reg_val;
+};
+
+/* create_qp_cmd */
+#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK                BIT(0)
+#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT               1
+#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK                BIT(1)
+
+/* reg_mr_cmd */
+#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK      GENMASK(4, 0)
+#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT     7
+#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK      BIT(7)
+#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK        BIT(0)
+
+/* create_cq_cmd */
+#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT                  6
+#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK                   BIT(6)
+#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK    GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK   GENMASK(1, 0)
+
+#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
new file mode 100644
index 000000000000..c8e0c8b905be
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_H_
+#define _EFA_ADMIN_H_
+
+enum efa_admin_aq_completion_status {
+	EFA_ADMIN_SUCCESS                           = 0,
+	EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE       = 1,
+	EFA_ADMIN_BAD_OPCODE                        = 2,
+	EFA_ADMIN_UNSUPPORTED_OPCODE                = 3,
+	EFA_ADMIN_MALFORMED_REQUEST                 = 4,
+	/* Additional status is provided in ACQ entry extended_status */
+	EFA_ADMIN_ILLEGAL_PARAMETER                 = 5,
+	EFA_ADMIN_UNKNOWN_ERROR                     = 6,
+	EFA_ADMIN_RESOURCE_BUSY                     = 7,
+};
+
+struct efa_admin_aq_common_desc {
+	/*
+	 * 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command_id;
+
+	/* as appears in efa_admin_aq_opcode */
+	u8 opcode;
+
+	/*
+	 * 0 : phase
+	 * 1 : ctrl_data - control buffer address valid
+	 * 2 : ctrl_data_indirect - control buffer address
+	 *    points to list of pages with addresses of control
+	 *    buffers
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+};
+
+/*
+ * used in efa_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct efa_admin_ctrl_buff_info {
+	u32 length;
+
+	struct efa_common_mem_addr address;
+};
+
+struct efa_admin_aq_entry {
+	struct efa_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		u32 inline_data_w1[3];
+
+		struct efa_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	u32 inline_data_w4[12];
+};
+
+struct efa_admin_acq_common_desc {
+	/*
+	 * command identifier to associate it with the aq descriptor
+	 * 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command;
+
+	u8 status;
+
+	/*
+	 * 0 : phase
+	 * 7:1 : reserved1
+	 */
+	u8 flags;
+
+	u16 extended_status;
+
+	/*
+	 * indicates to the driver which AQ entry has been consumed by the
+	 *    device and could be reused
+	 */
+	u16 sq_head_indx;
+};
+
+struct efa_admin_acq_entry {
+	struct efa_admin_acq_common_desc acq_common_descriptor;
+
+	u32 response_specific_data[14];
+};
+
+struct efa_admin_aenq_common_desc {
+	u16 group;
+
+	u16 syndrom;
+
+	/*
+	 * 0 : phase
+	 * 7:1 : reserved - MBZ
+	 */
+	u8 flags;
+
+	u8 reserved1[3];
+
+	u32 timestamp_low;
+
+	u32 timestamp_high;
+};
+
+struct efa_admin_aenq_entry {
+	struct efa_admin_aenq_common_desc aenq_common_desc;
+
+	/* command specific inline data */
+	u32 inline_data_w4[12];
+};
+
+/* aq_common_desc */
+#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK            GENMASK(11, 0)
+#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK                 BIT(0)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT            1
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK             BIT(1)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT   2
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK    BIT(2)
+
+/* acq_common_desc */
+#define EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK           GENMASK(11, 0)
+#define EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK                BIT(0)
+
+/* aenq_common_desc */
+#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK               BIT(0)
+
+#endif /* _EFA_ADMIN_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
new file mode 100644
index 000000000000..0778f4f7dccd
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -0,0 +1,1101 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa_com.h"
+#include "efa_regs_defs.h"
+
+#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
+
+#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
+#define EFA_MMIO_READ_INVALID 0xffffffff
+
+#define EFA_POLL_INTERVAL_MS 100 /* msecs */
+
+#define EFA_ASYNC_QUEUE_DEPTH 16
+#define EFA_ADMIN_QUEUE_DEPTH 32
+
+#define MIN_EFA_VER\
+	((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
+	 (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
+
+#define EFA_CTRL_MAJOR          0
+#define EFA_CTRL_MINOR          0
+#define EFA_CTRL_SUB_MINOR      1
+
+#define MIN_EFA_CTRL_VER \
+	(((EFA_CTRL_MAJOR) << \
+	(EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+	((EFA_CTRL_MINOR) << \
+	(EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+	(EFA_CTRL_SUB_MINOR))
+
+#define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
+#define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
+
+#define EFA_REGS_ADMIN_INTR_MASK 1
+
+enum efa_cmd_status {
+	EFA_CMD_SUBMITTED,
+	EFA_CMD_COMPLETED,
+};
+
+struct efa_comp_ctx {
+	struct completion wait_event;
+	struct efa_admin_acq_entry *user_cqe;
+	u32 comp_size;
+	enum efa_cmd_status status;
+	/* status from the device */
+	u8 comp_status;
+	u8 cmd_opcode;
+	u8 occupied;
+};
+
+static const char *efa_com_cmd_str(u8 cmd)
+{
+#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
+
+	switch (cmd) {
+	EFA_CMD_STR_CASE(CREATE_QP);
+	EFA_CMD_STR_CASE(MODIFY_QP);
+	EFA_CMD_STR_CASE(QUERY_QP);
+	EFA_CMD_STR_CASE(DESTROY_QP);
+	EFA_CMD_STR_CASE(CREATE_AH);
+	EFA_CMD_STR_CASE(DESTROY_AH);
+	EFA_CMD_STR_CASE(REG_MR);
+	EFA_CMD_STR_CASE(DEREG_MR);
+	EFA_CMD_STR_CASE(CREATE_CQ);
+	EFA_CMD_STR_CASE(DESTROY_CQ);
+	EFA_CMD_STR_CASE(GET_FEATURE);
+	EFA_CMD_STR_CASE(SET_FEATURE);
+	EFA_CMD_STR_CASE(GET_STATS);
+	EFA_CMD_STR_CASE(ALLOC_PD);
+	EFA_CMD_STR_CASE(DEALLOC_PD);
+	EFA_CMD_STR_CASE(ALLOC_UAR);
+	EFA_CMD_STR_CASE(DEALLOC_UAR);
+	default: return "unknown command opcode";
+	}
+#undef EFA_CMD_STR_CASE
+}
+
+static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
+{
+	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+	struct efa_admin_mmio_req_read_less_resp *read_resp;
+	unsigned long exp_time;
+	u32 mmio_read_reg;
+	u32 err;
+
+	read_resp = mmio_read->read_resp;
+
+	spin_lock(&mmio_read->lock);
+	mmio_read->seq_num++;
+
+	/* trash DMA req_id to identify when hardware is done */
+	read_resp->req_id = mmio_read->seq_num + 0x9aL;
+	mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+			EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+	mmio_read_reg |= mmio_read->seq_num &
+			 EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+	writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
+
+	exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
+	do {
+		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
+			break;
+		udelay(1);
+	} while (time_is_after_jiffies(exp_time));
+
+	if (read_resp->req_id != mmio_read->seq_num) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+			mmio_read->seq_num, offset, read_resp->req_id,
+			read_resp->reg_off);
+		err = EFA_MMIO_READ_INVALID;
+		goto out;
+	}
+
+	if (read_resp->reg_off != offset) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Reading register failed: wrong offset provided\n");
+		err = EFA_MMIO_READ_INVALID;
+		goto out;
+	}
+
+	err = read_resp->reg_val;
+out:
+	spin_unlock(&mmio_read->lock);
+	return err;
+}
+
+static int efa_com_admin_init_sq(struct efa_com_dev *edev)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_com_admin_sq *sq = &aq->sq;
+	u16 size = aq->depth * sizeof(*sq->entries);
+	u32 addr_high;
+	u32 addr_low;
+	u32 aq_caps;
+
+	sq->entries =
+		dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
+	if (!sq->entries)
+		return -ENOMEM;
+
+	spin_lock_init(&sq->lock);
+
+	sq->cc = 0;
+	sq->pc = 0;
+	sq->phase = 1;
+
+	sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
+
+	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
+	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
+
+	writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
+	writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
+
+	aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+	aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
+			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+	writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
+
+	return 0;
+}
+
+static int efa_com_admin_init_cq(struct efa_com_dev *edev)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_com_admin_cq *cq = &aq->cq;
+	u16 size = aq->depth * sizeof(*cq->entries);
+	u32 addr_high;
+	u32 addr_low;
+	u32 acq_caps;
+
+	cq->entries =
+		dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
+	if (!cq->entries)
+		return -ENOMEM;
+
+	spin_lock_init(&cq->lock);
+
+	cq->cc = 0;
+	cq->phase = 1;
+
+	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
+	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
+
+	writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
+	writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
+
+	acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+	acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
+			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+	acq_caps |= (aq->msix_vector_idx <<
+			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
+			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
+
+	writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
+
+	return 0;
+}
+
+static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
+				   struct efa_aenq_handlers *aenq_handlers)
+{
+	struct efa_com_aenq *aenq = &edev->aenq;
+	u32 addr_low, addr_high, aenq_caps;
+	u16 size;
+
+	if (!aenq_handlers) {
+		ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
+		return -EINVAL;
+	}
+
+	size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
+	aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
+					   GFP_KERNEL);
+	if (!aenq->entries)
+		return -ENOMEM;
+
+	aenq->aenq_handlers = aenq_handlers;
+	aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
+	aenq->cc = 0;
+	aenq->phase = 1;
+
+	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+	writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
+	writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
+
+	aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+	aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
+		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+	aenq_caps |= (aenq->msix_vector_idx
+		      << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
+		     EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
+	writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
+
+	/*
+	 * Init cons_db to mark that all entries in the queue
+	 * are initially available
+	 */
+	writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+
+	return 0;
+}
+
+/* ID to be used with efa_com_get_comp_ctx */
+static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
+{
+	u16 ctx_id;
+
+	spin_lock(&aq->comp_ctx_lock);
+	ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
+	aq->comp_ctx_pool_next++;
+	spin_unlock(&aq->comp_ctx_lock);
+
+	return ctx_id;
+}
+
+static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
+				   u16 ctx_id)
+{
+	spin_lock(&aq->comp_ctx_lock);
+	aq->comp_ctx_pool_next--;
+	aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
+	spin_unlock(&aq->comp_ctx_lock);
+}
+
+static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
+					struct efa_comp_ctx *comp_ctx)
+{
+	u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
+		     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+	u16 ctx_id = cmd_id & (aq->depth - 1);
+
+	ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
+	comp_ctx->occupied = 0;
+	efa_com_dealloc_ctx_id(aq, ctx_id);
+}
+
+static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
+						 u16 cmd_id, bool capture)
+{
+	u16 ctx_id = cmd_id & (aq->depth - 1);
+
+	if (aq->comp_ctx[ctx_id].occupied && capture) {
+		ibdev_err_ratelimited(
+			aq->efa_dev,
+			"Completion context for command_id %#x is occupied\n",
+			cmd_id);
+		return NULL;
+	}
+
+	if (capture) {
+		aq->comp_ctx[ctx_id].occupied = 1;
+		ibdev_dbg(aq->efa_dev,
+			  "Take completion ctxt for command_id %#x\n", cmd_id);
+	}
+
+	return &aq->comp_ctx[ctx_id];
+}
+
+static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+						       struct efa_admin_aq_entry *cmd,
+						       size_t cmd_size_in_bytes,
+						       struct efa_admin_acq_entry *comp,
+						       size_t comp_size_in_bytes)
+{
+	struct efa_admin_aq_entry *aqe;
+	struct efa_comp_ctx *comp_ctx;
+	u16 queue_size_mask;
+	u16 cmd_id;
+	u16 ctx_id;
+	u16 pi;
+
+	queue_size_mask = aq->depth - 1;
+	pi = aq->sq.pc & queue_size_mask;
+
+	ctx_id = efa_com_alloc_ctx_id(aq);
+
+	/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
+	cmd_id = ctx_id & queue_size_mask;
+	cmd_id |= aq->sq.pc & ~queue_size_mask;
+	cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	cmd->aq_common_descriptor.command_id = cmd_id;
+	cmd->aq_common_descriptor.flags |= aq->sq.phase &
+		EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
+	if (!comp_ctx) {
+		efa_com_dealloc_ctx_id(aq, ctx_id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	comp_ctx->status = EFA_CMD_SUBMITTED;
+	comp_ctx->comp_size = comp_size_in_bytes;
+	comp_ctx->user_cqe = comp;
+	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+	reinit_completion(&comp_ctx->wait_event);
+
+	aqe = &aq->sq.entries[pi];
+	memset(aqe, 0, sizeof(*aqe));
+	memcpy(aqe, cmd, cmd_size_in_bytes);
+
+	aq->sq.pc++;
+	atomic64_inc(&aq->stats.submitted_cmd);
+
+	if ((aq->sq.pc & queue_size_mask) == 0)
+		aq->sq.phase = !aq->sq.phase;
+
+	/* barrier not needed in case of writel */
+	writel(aq->sq.pc, aq->sq.db_addr);
+
+	return comp_ctx;
+}
+
+static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
+{
+	size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
+	size_t size = aq->depth * sizeof(struct efa_comp_ctx);
+	struct efa_comp_ctx *comp_ctx;
+	u16 i;
+
+	aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
+	aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
+	if (!aq->comp_ctx || !aq->comp_ctx_pool) {
+		devm_kfree(aq->dmadev, aq->comp_ctx_pool);
+		devm_kfree(aq->dmadev, aq->comp_ctx);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < aq->depth; i++) {
+		comp_ctx = efa_com_get_comp_ctx(aq, i, false);
+		if (comp_ctx)
+			init_completion(&comp_ctx->wait_event);
+
+		aq->comp_ctx_pool[i] = i;
+	}
+
+	spin_lock_init(&aq->comp_ctx_lock);
+
+	aq->comp_ctx_pool_next = 0;
+
+	return 0;
+}
+
+static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+						     struct efa_admin_aq_entry *cmd,
+						     size_t cmd_size_in_bytes,
+						     struct efa_admin_acq_entry *comp,
+						     size_t comp_size_in_bytes)
+{
+	struct efa_comp_ctx *comp_ctx;
+
+	spin_lock(&aq->sq.lock);
+	if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
+		ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
+		spin_unlock(&aq->sq.lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
+					      comp_size_in_bytes);
+	spin_unlock(&aq->sq.lock);
+	if (IS_ERR(comp_ctx))
+		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+	return comp_ctx;
+}
+
+static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+						   struct efa_admin_acq_entry *cqe)
+{
+	struct efa_comp_ctx *comp_ctx;
+	u16 cmd_id;
+
+	cmd_id = cqe->acq_common_descriptor.command &
+		 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
+	if (!comp_ctx) {
+		ibdev_err(aq->efa_dev,
+			  "comp_ctx is NULL. Changing the admin queue running state\n");
+		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+		return;
+	}
+
+	comp_ctx->status = EFA_CMD_COMPLETED;
+	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+	if (comp_ctx->user_cqe)
+		memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
+
+	if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+		complete(&comp_ctx->wait_event);
+}
+
+static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
+{
+	struct efa_admin_acq_entry *cqe;
+	u16 queue_size_mask;
+	u16 comp_num = 0;
+	u8 phase;
+	u16 ci;
+
+	queue_size_mask = aq->depth - 1;
+
+	ci = aq->cq.cc & queue_size_mask;
+	phase = aq->cq.phase;
+
+	cqe = &aq->cq.entries[ci];
+
+	/* Go over all the completions */
+	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
+		EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+		/*
+		 * Do not read the rest of the completion entry before the
+		 * phase bit was validated
+		 */
+		dma_rmb();
+		efa_com_handle_single_admin_completion(aq, cqe);
+
+		ci++;
+		comp_num++;
+		if (ci == aq->depth) {
+			ci = 0;
+			phase = !phase;
+		}
+
+		cqe = &aq->cq.entries[ci];
+	}
+
+	aq->cq.cc += comp_num;
+	aq->cq.phase = phase;
+	aq->sq.cc += comp_num;
+	atomic64_add(comp_num, &aq->stats.completed_cmd);
+}
+
+static int efa_com_comp_status_to_errno(u8 comp_status)
+{
+	switch (comp_status) {
+	case EFA_ADMIN_SUCCESS:
+		return 0;
+	case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+		return -ENOMEM;
+	case EFA_ADMIN_UNSUPPORTED_OPCODE:
+		return -EOPNOTSUPP;
+	case EFA_ADMIN_BAD_OPCODE:
+	case EFA_ADMIN_MALFORMED_REQUEST:
+	case EFA_ADMIN_ILLEGAL_PARAMETER:
+	case EFA_ADMIN_UNKNOWN_ERROR:
+		return -EINVAL;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
+						     struct efa_com_admin_queue *aq)
+{
+	unsigned long timeout;
+	unsigned long flags;
+	int err;
+
+	timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
+
+	while (1) {
+		spin_lock_irqsave(&aq->cq.lock, flags);
+		efa_com_handle_admin_completion(aq);
+		spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+		if (comp_ctx->status != EFA_CMD_SUBMITTED)
+			break;
+
+		if (time_is_before_jiffies(timeout)) {
+			ibdev_err_ratelimited(
+				aq->efa_dev,
+				"Wait for completion (polling) timeout\n");
+			/* EFA didn't have any completion */
+			atomic64_inc(&aq->stats.no_completion);
+
+			clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+			err = -ETIME;
+			goto out;
+		}
+
+		msleep(aq->poll_interval);
+	}
+
+	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+out:
+	efa_com_put_comp_ctx(aq, comp_ctx);
+	return err;
+}
+
+static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
+							struct efa_com_admin_queue *aq)
+{
+	unsigned long flags;
+	int err;
+
+	wait_for_completion_timeout(&comp_ctx->wait_event,
+				    usecs_to_jiffies(aq->completion_timeout));
+
+	/*
+	 * In case the command wasn't completed find out the root cause.
+	 * There might be 2 kinds of errors
+	 * 1) No completion (timeout reached)
+	 * 2) There is completion but the device didn't get any msi-x interrupt.
+	 */
+	if (comp_ctx->status == EFA_CMD_SUBMITTED) {
+		spin_lock_irqsave(&aq->cq.lock, flags);
+		efa_com_handle_admin_completion(aq);
+		spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+		atomic64_inc(&aq->stats.no_completion);
+
+		if (comp_ctx->status == EFA_CMD_COMPLETED)
+			ibdev_err_ratelimited(
+				aq->efa_dev,
+				"The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+				efa_com_cmd_str(comp_ctx->cmd_opcode),
+				comp_ctx->cmd_opcode, comp_ctx->status,
+				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+		else
+			ibdev_err_ratelimited(
+				aq->efa_dev,
+				"The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+				efa_com_cmd_str(comp_ctx->cmd_opcode),
+				comp_ctx->cmd_opcode, comp_ctx->status,
+				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+
+		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+		err = -ETIME;
+		goto out;
+	}
+
+	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+out:
+	efa_com_put_comp_ctx(aq, comp_ctx);
+	return err;
+}
+
+/*
+ * There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called efa_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
+					     struct efa_com_admin_queue *aq)
+{
+	if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+		return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
+
+	return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
+}
+
+/**
+ * efa_com_cmd_exec - Execute admin command
+ * @aq: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @comp: command completion return entry.
+ * @comp_size: command completion size.
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copied into comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+		     struct efa_admin_aq_entry *cmd,
+		     size_t cmd_size,
+		     struct efa_admin_acq_entry *comp,
+		     size_t comp_size)
+{
+	struct efa_comp_ctx *comp_ctx;
+	int err;
+
+	might_sleep();
+
+	/* In case of queue FULL */
+	down(&aq->avail_cmds);
+
+	ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
+		  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+		  cmd->aq_common_descriptor.opcode);
+	comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
+	if (IS_ERR(comp_ctx)) {
+		ibdev_err_ratelimited(
+			aq->efa_dev,
+			"Failed to submit command %s (opcode %u) err %ld\n",
+			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+			cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+
+		up(&aq->avail_cmds);
+		return PTR_ERR(comp_ctx);
+	}
+
+	err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
+	if (err)
+		ibdev_err_ratelimited(
+			aq->efa_dev,
+			"Failed to process command %s (opcode %u) comp_status %d err %d\n",
+			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+			cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
+			err);
+
+	up(&aq->avail_cmds);
+
+	return err;
+}
+
+/**
+ * efa_com_admin_destroy - Destroy the admin and the async events queues.
+ * @edev: EFA communication layer struct
+ */
+void efa_com_admin_destroy(struct efa_com_dev *edev)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_com_aenq *aenq = &edev->aenq;
+	struct efa_com_admin_cq *cq = &aq->cq;
+	struct efa_com_admin_sq *sq = &aq->sq;
+	u16 size;
+
+	clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+	devm_kfree(edev->dmadev, aq->comp_ctx_pool);
+	devm_kfree(edev->dmadev, aq->comp_ctx);
+
+	size = aq->depth * sizeof(*sq->entries);
+	dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
+
+	size = aq->depth * sizeof(*cq->entries);
+	dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
+
+	size = aenq->depth * sizeof(*aenq->entries);
+	dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
+}
+
+/**
+ * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @edev: EFA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
+{
+	u32 mask_value = 0;
+
+	if (polling)
+		mask_value = EFA_REGS_ADMIN_INTR_MASK;
+
+	writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
+	if (polling)
+		set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+	else
+		clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+}
+
+static void efa_com_stats_init(struct efa_com_dev *edev)
+{
+	atomic64_t *s = (atomic64_t *)&edev->aq.stats;
+	int i;
+
+	for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
+		atomic64_set(s, 0);
+}
+
+/**
+ * efa_com_admin_init - Init the admin and the async queues
+ * @edev: EFA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_admin_init(struct efa_com_dev *edev,
+		       struct efa_aenq_handlers *aenq_handlers)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	u32 timeout;
+	u32 dev_sts;
+	u32 cap;
+	int err;
+
+	dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+	if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
+		ibdev_err(edev->efa_dev,
+			  "Device isn't ready, abort com init %#x\n", dev_sts);
+		return -ENODEV;
+	}
+
+	aq->depth = EFA_ADMIN_QUEUE_DEPTH;
+
+	aq->dmadev = edev->dmadev;
+	aq->efa_dev = edev->efa_dev;
+	set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
+
+	sema_init(&aq->avail_cmds, aq->depth);
+
+	efa_com_stats_init(edev);
+
+	err = efa_com_init_comp_ctxt(aq);
+	if (err)
+		return err;
+
+	err = efa_com_admin_init_sq(edev);
+	if (err)
+		goto err_destroy_comp_ctxt;
+
+	err = efa_com_admin_init_cq(edev);
+	if (err)
+		goto err_destroy_sq;
+
+	efa_com_set_admin_polling_mode(edev, false);
+
+	err = efa_com_admin_init_aenq(edev, aenq_handlers);
+	if (err)
+		goto err_destroy_cq;
+
+	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+	if (timeout)
+		/* the resolution of timeout reg is 100ms */
+		aq->completion_timeout = timeout * 100000;
+	else
+		aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+	aq->poll_interval = EFA_POLL_INTERVAL_MS;
+
+	set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+	return 0;
+
+err_destroy_cq:
+	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
+			  aq->cq.entries, aq->cq.dma_addr);
+err_destroy_sq:
+	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
+			  aq->sq.entries, aq->sq.dma_addr);
+err_destroy_comp_ctxt:
+	devm_kfree(edev->dmadev, aq->comp_ctx);
+
+	return err;
+}
+
+/**
+ * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @edev: EFA communication layer struct
+ *
+ * This method goes over the admin completion queue and wakes up
+ * all the pending threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&edev->aq.cq.lock, flags);
+	efa_com_handle_admin_completion(&edev->aq);
+	spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
+}
+
+/*
+ * efa_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
+						     u16 group)
+{
+	struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
+
+	if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
+		return aenq_handlers->handlers[group];
+
+	return aenq_handlers->unimplemented_handler;
+}
+
+/**
+ * efa_com_aenq_intr_handler - AENQ interrupt handler
+ * @edev: EFA communication layer struct
+ * @data: Data of interrupt handler.
+ *
+ * Go over the async event notification queue and call the proper aenq handler.
+ */
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
+{
+	struct efa_admin_aenq_common_desc *aenq_common;
+	struct efa_com_aenq *aenq = &edev->aenq;
+	struct efa_admin_aenq_entry *aenq_e;
+	efa_aenq_handler handler_cb;
+	u32 processed = 0;
+	u8 phase;
+	u32 ci;
+
+	ci = aenq->cc & (aenq->depth - 1);
+	phase = aenq->phase;
+	aenq_e = &aenq->entries[ci]; /* Get first entry */
+	aenq_common = &aenq_e->aenq_common_desc;
+
+	/* Go over all the events */
+	while ((READ_ONCE(aenq_common->flags) &
+		EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+		/*
+		 * Do not read the rest of the completion entry before the
+		 * phase bit was validated
+		 */
+		dma_rmb();
+
+		/* Handle specific event*/
+		handler_cb = efa_com_get_specific_aenq_cb(edev,
+							  aenq_common->group);
+		handler_cb(data, aenq_e); /* call the actual event handler*/
+
+		/* Get next event entry */
+		ci++;
+		processed++;
+
+		if (ci == aenq->depth) {
+			ci = 0;
+			phase = !phase;
+		}
+		aenq_e = &aenq->entries[ci];
+		aenq_common = &aenq_e->aenq_common_desc;
+	}
+
+	aenq->cc += processed;
+	aenq->phase = phase;
+
+	/* Don't update aenq doorbell if there weren't any processed events */
+	if (!processed)
+		return;
+
+	/* barrier not needed in case of writel */
+	writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+}
+
+static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
+{
+	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+	u32 addr_high;
+	u32 addr_low;
+
+	/* dma_addr_bits is unknown at this point */
+	addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
+	addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
+
+	writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
+	writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
+}
+
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
+{
+	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+	spin_lock_init(&mmio_read->lock);
+	mmio_read->read_resp =
+		dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+	if (!mmio_read->read_resp)
+		return -ENOMEM;
+
+	efa_com_mmio_reg_read_resp_addr_init(edev);
+
+	mmio_read->read_resp->req_id = 0;
+	mmio_read->seq_num = 0;
+	mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
+
+	return 0;
+}
+
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
+{
+	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+	dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+}
+
+int efa_com_validate_version(struct efa_com_dev *edev)
+{
+	u32 ctrl_ver_masked;
+	u32 ctrl_ver;
+	u32 ver;
+
+	/*
+	 * Make sure the EFA version and the controller version are at least
+	 * as the driver expects
+	 */
+	ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
+	ctrl_ver = efa_com_reg_read32(edev,
+				      EFA_REGS_CONTROLLER_VERSION_OFF);
+
+	ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
+		  (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+			  EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+		  ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
+
+	if (ver < MIN_EFA_VER) {
+		ibdev_err(edev->efa_dev,
+			  "EFA version is lower than the minimal version the driver supports\n");
+		return -EOPNOTSUPP;
+	}
+
+	ibdev_dbg(edev->efa_dev,
+		  "efa controller version: %d.%d.%d implementation version %d\n",
+		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+			  EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+			  EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+			  EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+	ctrl_ver_masked =
+		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+	/* Validate the ctrl version without the implementation ID */
+	if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
+		ibdev_err(edev->efa_dev,
+			  "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * efa_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @edev: EFA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int efa_com_get_dma_width(struct efa_com_dev *edev)
+{
+	u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+	int width;
+
+	width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+		EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+	ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
+
+	if (width < 32 || width > 64) {
+		ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
+		return -EINVAL;
+	}
+
+	edev->dma_addr_bits = width;
+
+	return width;
+}
+
+static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
+				u16 exp_state)
+{
+	u32 val, i;
+
+	for (i = 0; i < timeout; i++) {
+		val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+
+		if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+		    exp_state)
+			return 0;
+
+		ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
+		msleep(EFA_POLL_INTERVAL_MS);
+	}
+
+	return -ETIME;
+}
+
+/**
+ * efa_com_dev_reset - Perform device FLR to the device.
+ * @edev: EFA communication layer struct
+ * @reset_reason: Specify what is the trigger for the reset in case of an error.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_dev_reset(struct efa_com_dev *edev,
+		      enum efa_regs_reset_reason_types reset_reason)
+{
+	u32 stat, timeout, cap, reset_val;
+	int err;
+
+	stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+
+	if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
+		ibdev_err(edev->efa_dev,
+			  "Device isn't ready, can't reset device\n");
+		return -EINVAL;
+	}
+
+	timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+		  EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+	if (!timeout) {
+		ibdev_err(edev->efa_dev, "Invalid timeout value\n");
+		return -EINVAL;
+	}
+
+	/* start reset */
+	reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
+	reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
+		     EFA_REGS_DEV_CTL_RESET_REASON_MASK;
+	writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+
+	/* reset clears the mmio readless address, restore it */
+	efa_com_mmio_reg_read_resp_addr_init(edev);
+
+	err = wait_for_reset_state(edev, timeout,
+				   EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+	if (err) {
+		ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
+		return err;
+	}
+
+	/* reset done */
+	writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+	err = wait_for_reset_state(edev, timeout, 0);
+	if (err) {
+		ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
+		return err;
+	}
+
+	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+	if (timeout)
+		/* the resolution of timeout reg is 100ms */
+		edev->aq.completion_timeout = timeout * 100000;
+	else
+		edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
new file mode 100644
index 000000000000..3243a29c7eba
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_H_
+#define _EFA_COM_H_
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/semaphore.h>
+#include <linux/sched.h>
+
+#include <rdma/ib_verbs.h>
+#include "kcompat.h"
+
+#include "efa_common_defs.h"
+#include "efa_admin_defs.h"
+#include "efa_admin_cmds_defs.h"
+#include "efa_regs_defs.h"
+
+#define EFA_MAX_HANDLERS 256
+
+struct efa_com_admin_cq {
+	struct efa_admin_acq_entry *entries;
+	dma_addr_t dma_addr;
+	spinlock_t lock; /* Protects ACQ */
+
+	u16 cc; /* consumer counter */
+	u8 phase;
+};
+
+struct efa_com_admin_sq {
+	struct efa_admin_aq_entry *entries;
+	dma_addr_t dma_addr;
+	spinlock_t lock; /* Protects ASQ */
+
+	u32 __iomem *db_addr;
+
+	u16 cc; /* consumer counter */
+	u16 pc; /* producer counter */
+	u8 phase;
+
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_com_stats_admin {
+	atomic64_t submitted_cmd;
+	atomic64_t completed_cmd;
+	atomic64_t no_completion;
+};
+
+enum {
+	EFA_AQ_STATE_RUNNING_BIT = 0,
+	EFA_AQ_STATE_POLLING_BIT = 1,
+};
+
+struct efa_com_admin_queue {
+	void *dmadev;
+	void *efa_dev;
+	struct efa_comp_ctx *comp_ctx;
+	u32 completion_timeout; /* usecs */
+	u16 poll_interval; /* msecs */
+	u16 depth;
+	struct efa_com_admin_cq cq;
+	struct efa_com_admin_sq sq;
+	u16 msix_vector_idx;
+
+	unsigned long state;
+
+	/* Count the number of available admin commands */
+	struct semaphore avail_cmds;
+
+	struct efa_com_stats_admin stats;
+
+	spinlock_t comp_ctx_lock; /* Protects completion context pool */
+	u32 *comp_ctx_pool;
+	u16 comp_ctx_pool_next;
+};
+
+struct efa_aenq_handlers;
+
+struct efa_com_aenq {
+	struct efa_admin_aenq_entry *entries;
+	struct efa_aenq_handlers *aenq_handlers;
+	dma_addr_t dma_addr;
+	u32 cc; /* consumer counter */
+	u16 msix_vector_idx;
+	u16 depth;
+	u8 phase;
+};
+
+struct efa_com_mmio_read {
+	struct efa_admin_mmio_req_read_less_resp *read_resp;
+	dma_addr_t read_resp_dma_addr;
+	u16 seq_num;
+	u16 mmio_read_timeout; /* usecs */
+	/* serializes mmio reads */
+	spinlock_t lock;
+};
+
+struct efa_com_dev {
+	struct efa_com_admin_queue aq;
+	struct efa_com_aenq aenq;
+	u8 __iomem *reg_bar;
+	void *dmadev;
+	void *efa_dev;
+	u32 supported_features;
+	u32 dma_addr_bits;
+
+	struct efa_com_mmio_read mmio_read;
+};
+
+typedef void (*efa_aenq_handler)(void *data,
+	      struct efa_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct efa_aenq_handlers {
+	efa_aenq_handler handlers[EFA_MAX_HANDLERS];
+	efa_aenq_handler unimplemented_handler;
+};
+
+int efa_com_admin_init(struct efa_com_dev *edev,
+		       struct efa_aenq_handlers *aenq_handlers);
+void efa_com_admin_destroy(struct efa_com_dev *edev);
+int efa_com_dev_reset(struct efa_com_dev *edev,
+		      enum efa_regs_reset_reason_types reset_reason);
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling);
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev);
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev);
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev);
+
+int efa_com_validate_version(struct efa_com_dev *edev);
+int efa_com_get_dma_width(struct efa_com_dev *edev);
+
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+		     struct efa_admin_aq_entry *cmd,
+		     size_t cmd_size,
+		     struct efa_admin_acq_entry *comp,
+		     size_t comp_size);
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data);
+
+#endif /* _EFA_COM_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
new file mode 100644
index 000000000000..c079f1332082
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -0,0 +1,765 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa_com.h"
+#include "efa_com_cmd.h"
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
+{
+	*addr_low = lower_32_bits(addr);
+	*addr_high = upper_32_bits(addr);
+}
+
+int efa_com_create_qp(struct efa_com_dev *edev,
+		      struct efa_com_create_qp_params *params,
+		      struct efa_com_create_qp_result *res)
+{
+	struct efa_admin_create_qp_cmd create_qp_cmd = {};
+	struct efa_admin_create_qp_resp cmd_completion;
+	struct efa_com_admin_queue *aq = &edev->aq;
+	int err;
+
+	create_qp_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_QP;
+
+	create_qp_cmd.pd = params->pd;
+	create_qp_cmd.qp_type = params->qp_type;
+	create_qp_cmd.rq_base_addr = params->rq_base_addr;
+	create_qp_cmd.send_cq_idx = params->send_cq_idx;
+	create_qp_cmd.recv_cq_idx = params->recv_cq_idx;
+	create_qp_cmd.qp_alloc_size.send_queue_ring_size =
+		params->sq_ring_size_in_bytes;
+	create_qp_cmd.qp_alloc_size.send_queue_depth =
+			params->sq_depth;
+	create_qp_cmd.qp_alloc_size.recv_queue_ring_size =
+			params->rq_ring_size_in_bytes;
+	create_qp_cmd.qp_alloc_size.recv_queue_depth =
+			params->rq_depth;
+	create_qp_cmd.uar = params->uarn;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&create_qp_cmd,
+			       sizeof(create_qp_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to create qp [%d]\n", err);
+		return err;
+	}
+
+	res->qp_handle = cmd_completion.qp_handle;
+	res->qp_num = cmd_completion.qp_num;
+	res->sq_db_offset = cmd_completion.sq_db_offset;
+	res->rq_db_offset = cmd_completion.rq_db_offset;
+	res->llq_descriptors_offset = cmd_completion.llq_descriptors_offset;
+	res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx;
+	res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx;
+
+	return 0;
+}
+
+int efa_com_modify_qp(struct efa_com_dev *edev,
+		      struct efa_com_modify_qp_params *params)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_modify_qp_cmd cmd = {};
+	struct efa_admin_modify_qp_resp resp;
+	int err;
+
+	cmd.aq_common_desc.opcode = EFA_ADMIN_MODIFY_QP;
+	cmd.modify_mask = params->modify_mask;
+	cmd.qp_handle = params->qp_handle;
+	cmd.qp_state = params->qp_state;
+	cmd.cur_qp_state = params->cur_qp_state;
+	cmd.qkey = params->qkey;
+	cmd.sq_psn = params->sq_psn;
+	cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+			cmd.qp_handle, cmd.modify_mask, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_query_qp(struct efa_com_dev *edev,
+		     struct efa_com_query_qp_params *params,
+		     struct efa_com_query_qp_result *result)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_query_qp_cmd cmd = {};
+	struct efa_admin_query_qp_resp resp;
+	int err;
+
+	cmd.aq_common_desc.opcode = EFA_ADMIN_QUERY_QP;
+	cmd.qp_handle = params->qp_handle;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to query qp-%u [%d]\n",
+				      cmd.qp_handle, err);
+		return err;
+	}
+
+	result->qp_state = resp.qp_state;
+	result->qkey = resp.qkey;
+	result->sq_draining = resp.sq_draining;
+	result->sq_psn = resp.sq_psn;
+
+	return 0;
+}
+
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+		       struct efa_com_destroy_qp_params *params)
+{
+	struct efa_admin_destroy_qp_resp cmd_completion;
+	struct efa_admin_destroy_qp_cmd qp_cmd = {};
+	struct efa_com_admin_queue *aq = &edev->aq;
+	int err;
+
+	qp_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_QP;
+	qp_cmd.qp_handle = params->qp_handle;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&qp_cmd,
+			       sizeof(qp_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to destroy qp-%u [%d]\n",
+				      qp_cmd.qp_handle, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_create_cq(struct efa_com_dev *edev,
+		      struct efa_com_create_cq_params *params,
+		      struct efa_com_create_cq_result *result)
+{
+	struct efa_admin_create_cq_resp cmd_completion;
+	struct efa_admin_create_cq_cmd create_cmd = {};
+	struct efa_com_admin_queue *aq = &edev->aq;
+	int err;
+
+	create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ;
+	create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) &
+				EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+	create_cmd.cq_depth = params->cq_depth;
+	create_cmd.num_sub_cqs = params->num_sub_cqs;
+	create_cmd.uar = params->uarn;
+
+	efa_com_set_dma_addr(params->dma_addr,
+			     &create_cmd.cq_ba.mem_addr_high,
+			     &create_cmd.cq_ba.mem_addr_low);
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&create_cmd,
+			       sizeof(create_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to create cq[%d]\n", err);
+		return err;
+	}
+
+	result->cq_idx = cmd_completion.cq_idx;
+	result->actual_depth = params->cq_depth;
+
+	return 0;
+}
+
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+		       struct efa_com_destroy_cq_params *params)
+{
+	struct efa_admin_destroy_cq_cmd destroy_cmd = {};
+	struct efa_admin_destroy_cq_resp destroy_resp;
+	struct efa_com_admin_queue *aq = &edev->aq;
+	int err;
+
+	destroy_cmd.cq_idx = params->cq_idx;
+	destroy_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_CQ;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&destroy_cmd,
+			       sizeof(destroy_cmd),
+			       (struct efa_admin_acq_entry *)&destroy_resp,
+			       sizeof(destroy_resp));
+
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to destroy CQ-%u [%d]\n",
+				      params->cq_idx, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_register_mr(struct efa_com_dev *edev,
+			struct efa_com_reg_mr_params *params,
+			struct efa_com_reg_mr_result *result)
+{
+	struct efa_admin_reg_mr_resp cmd_completion;
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_reg_mr_cmd mr_cmd = {};
+	int err;
+
+	mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR;
+	mr_cmd.pd = params->pd;
+	mr_cmd.mr_length = params->mr_length_in_bytes;
+	mr_cmd.flags |= params->page_shift &
+		EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
+	mr_cmd.iova = params->iova;
+	mr_cmd.permissions |= params->permissions &
+			      EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK;
+
+	if (params->inline_pbl) {
+		memcpy(mr_cmd.pbl.inline_pbl_array,
+		       params->pbl.inline_pbl_array,
+		       sizeof(mr_cmd.pbl.inline_pbl_array));
+	} else {
+		mr_cmd.pbl.pbl.length = params->pbl.pbl.length;
+		mr_cmd.pbl.pbl.address.mem_addr_low =
+			params->pbl.pbl.address.mem_addr_low;
+		mr_cmd.pbl.pbl.address.mem_addr_high =
+			params->pbl.pbl.address.mem_addr_high;
+		mr_cmd.aq_common_desc.flags |=
+			EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK;
+		if (params->indirect)
+			mr_cmd.aq_common_desc.flags |=
+				EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	}
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&mr_cmd,
+			       sizeof(mr_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to register mr [%d]\n", err);
+		return err;
+	}
+
+	result->l_key = cmd_completion.l_key;
+	result->r_key = cmd_completion.r_key;
+
+	return 0;
+}
+
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+		     struct efa_com_dereg_mr_params *params)
+{
+	struct efa_admin_dereg_mr_resp cmd_completion;
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_dereg_mr_cmd mr_cmd = {};
+	int err;
+
+	mr_cmd.aq_common_desc.opcode = EFA_ADMIN_DEREG_MR;
+	mr_cmd.l_key = params->l_key;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&mr_cmd,
+			       sizeof(mr_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to de-register mr(lkey-%u) [%d]\n",
+				      mr_cmd.l_key, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_create_ah(struct efa_com_dev *edev,
+		      struct efa_com_create_ah_params *params,
+		      struct efa_com_create_ah_result *result)
+{
+	struct efa_admin_create_ah_resp cmd_completion;
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_create_ah_cmd ah_cmd = {};
+	int err;
+
+	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH;
+
+	memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr));
+	ah_cmd.pd = params->pdn;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&ah_cmd,
+			       sizeof(ah_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to create ah for %pI6 [%d]\n",
+				      ah_cmd.dest_addr, err);
+		return err;
+	}
+
+	result->ah = cmd_completion.ah;
+
+	return 0;
+}
+
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+		       struct efa_com_destroy_ah_params *params)
+{
+	struct efa_admin_destroy_ah_resp cmd_completion;
+	struct efa_admin_destroy_ah_cmd ah_cmd = {};
+	struct efa_com_admin_queue *aq = &edev->aq;
+	int err;
+
+	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH;
+	ah_cmd.ah = params->ah;
+	ah_cmd.pd = params->pdn;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&ah_cmd,
+			       sizeof(ah_cmd),
+			       (struct efa_admin_acq_entry *)&cmd_completion,
+			       sizeof(cmd_completion));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to destroy ah-%d pd-%d [%d]\n",
+				      ah_cmd.ah, ah_cmd.pd, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static bool
+efa_com_check_supported_feature_id(struct efa_com_dev *edev,
+				   enum efa_admin_aq_feature_id feature_id)
+{
+	u32 feature_mask = 1 << feature_id;
+
+	/* Device attributes is always supported */
+	if (feature_id != EFA_ADMIN_DEVICE_ATTR &&
+	    !(edev->supported_features & feature_mask))
+		return false;
+
+	return true;
+}
+
+static int efa_com_get_feature_ex(struct efa_com_dev *edev,
+				  struct efa_admin_get_feature_resp *get_resp,
+				  enum efa_admin_aq_feature_id feature_id,
+				  dma_addr_t control_buf_dma_addr,
+				  u32 control_buff_size)
+{
+	struct efa_admin_get_feature_cmd get_cmd = {};
+	struct efa_com_admin_queue *aq;
+	int err;
+
+	if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Feature %d isn't supported\n",
+				      feature_id);
+		return -EOPNOTSUPP;
+	}
+
+	aq = &edev->aq;
+
+	get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE;
+
+	if (control_buff_size)
+		get_cmd.aq_common_descriptor.flags =
+			EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+
+
+	efa_com_set_dma_addr(control_buf_dma_addr,
+			     &get_cmd.control_buffer.address.mem_addr_high,
+			     &get_cmd.control_buffer.address.mem_addr_low);
+
+	get_cmd.control_buffer.length = control_buff_size;
+	get_cmd.feature_common.feature_id = feature_id;
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)
+			       &get_cmd,
+			       sizeof(get_cmd),
+			       (struct efa_admin_acq_entry *)
+			       get_resp,
+			       sizeof(*get_resp));
+
+	if (err) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Failed to submit get_feature command %d [%d]\n",
+			feature_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int efa_com_get_feature(struct efa_com_dev *edev,
+			       struct efa_admin_get_feature_resp *get_resp,
+			       enum efa_admin_aq_feature_id feature_id)
+{
+	return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
+}
+
+int efa_com_get_network_attr(struct efa_com_dev *edev,
+			     struct efa_com_get_network_attr_result *result)
+{
+	struct efa_admin_get_feature_resp resp;
+	int err;
+
+	err = efa_com_get_feature(edev, &resp,
+				  EFA_ADMIN_NETWORK_ATTR);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get network attributes %d\n",
+				      err);
+		return err;
+	}
+
+	memcpy(result->addr, resp.u.network_attr.addr,
+	       sizeof(resp.u.network_attr.addr));
+	result->mtu = resp.u.network_attr.mtu;
+
+	return 0;
+}
+
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+			    struct efa_com_get_device_attr_result *result)
+{
+	struct efa_admin_get_feature_resp resp;
+	int err;
+
+	err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get device attributes %d\n",
+				      err);
+		return err;
+	}
+
+	result->page_size_cap = resp.u.device_attr.page_size_cap;
+	result->fw_version = resp.u.device_attr.fw_version;
+	result->admin_api_version = resp.u.device_attr.admin_api_version;
+	result->device_version = resp.u.device_attr.device_version;
+	result->supported_features = resp.u.device_attr.supported_features;
+	result->phys_addr_width = resp.u.device_attr.phys_addr_width;
+	result->virt_addr_width = resp.u.device_attr.virt_addr_width;
+	result->db_bar = resp.u.device_attr.db_bar;
+
+	if (result->admin_api_version < 1) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Failed to get device attr api version [%u < 1]\n",
+			result->admin_api_version);
+		return -EINVAL;
+	}
+
+	edev->supported_features = resp.u.device_attr.supported_features;
+	err = efa_com_get_feature(edev, &resp,
+				  EFA_ADMIN_QUEUE_ATTR);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get queue attributes %d\n",
+				      err);
+		return err;
+	}
+
+	result->max_qp = resp.u.queue_attr.max_qp;
+	result->max_sq_depth = resp.u.queue_attr.max_sq_depth;
+	result->max_rq_depth = resp.u.queue_attr.max_rq_depth;
+	result->max_cq = resp.u.queue_attr.max_cq;
+	result->max_cq_depth = resp.u.queue_attr.max_cq_depth;
+	result->inline_buf_size = resp.u.queue_attr.inline_buf_size;
+	result->max_sq_sge = resp.u.queue_attr.max_wr_send_sges;
+	result->max_rq_sge = resp.u.queue_attr.max_wr_recv_sges;
+	result->max_mr = resp.u.queue_attr.max_mr;
+	result->max_mr_pages = resp.u.queue_attr.max_mr_pages;
+	result->max_pd = resp.u.queue_attr.max_pd;
+	result->max_ah = resp.u.queue_attr.max_ah;
+	result->max_llq_size = resp.u.queue_attr.max_llq_size;
+	result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+
+	return 0;
+}
+
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+			 struct efa_com_get_hw_hints_result *result)
+{
+	struct efa_admin_get_feature_resp resp;
+	int err;
+
+	err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get hw hints %d\n", err);
+		return err;
+	}
+
+	result->admin_completion_timeout = resp.u.hw_hints.admin_completion_timeout;
+	result->driver_watchdog_timeout = resp.u.hw_hints.driver_watchdog_timeout;
+	result->mmio_read_timeout = resp.u.hw_hints.mmio_read_timeout;
+	result->poll_interval = resp.u.hw_hints.poll_interval;
+
+	return 0;
+}
+
+static int efa_com_set_feature_ex(struct efa_com_dev *edev,
+				  struct efa_admin_set_feature_resp *set_resp,
+				  struct efa_admin_set_feature_cmd *set_cmd,
+				  enum efa_admin_aq_feature_id feature_id,
+				  dma_addr_t control_buf_dma_addr,
+				  u32 control_buff_size)
+{
+	struct efa_com_admin_queue *aq;
+	int err;
+
+	if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Feature %d isn't supported\n",
+				      feature_id);
+		return -EOPNOTSUPP;
+	}
+
+	aq = &edev->aq;
+
+	set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE;
+	if (control_buff_size) {
+		set_cmd->aq_common_descriptor.flags =
+			EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+		efa_com_set_dma_addr(control_buf_dma_addr,
+				     &set_cmd->control_buffer.address.mem_addr_high,
+				     &set_cmd->control_buffer.address.mem_addr_low);
+	}
+
+	set_cmd->control_buffer.length = control_buff_size;
+	set_cmd->feature_common.feature_id = feature_id;
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)set_cmd,
+			       sizeof(*set_cmd),
+			       (struct efa_admin_acq_entry *)set_resp,
+			       sizeof(*set_resp));
+
+	if (err) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Failed to submit set_feature command %d error: %d\n",
+			feature_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int efa_com_set_feature(struct efa_com_dev *edev,
+			       struct efa_admin_set_feature_resp *set_resp,
+			       struct efa_admin_set_feature_cmd *set_cmd,
+			       enum efa_admin_aq_feature_id feature_id)
+{
+	return efa_com_set_feature_ex(edev, set_resp, set_cmd, feature_id,
+				      0, 0);
+}
+
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
+{
+	struct efa_admin_get_feature_resp get_resp;
+	struct efa_admin_set_feature_resp set_resp;
+	struct efa_admin_set_feature_cmd cmd = {};
+	int err;
+
+	ibdev_dbg(edev->efa_dev, "Configuring aenq with groups[%#x]\n", groups);
+
+	err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get aenq attributes: %d\n",
+				      err);
+		return err;
+	}
+
+	ibdev_dbg(edev->efa_dev,
+		  "Get aenq groups: supported[%#x] enabled[%#x]\n",
+		  get_resp.u.aenq.supported_groups,
+		  get_resp.u.aenq.enabled_groups);
+
+	if ((get_resp.u.aenq.supported_groups & groups) != groups) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+			groups, get_resp.u.aenq.supported_groups);
+		return -EOPNOTSUPP;
+	}
+
+	cmd.u.aenq.enabled_groups = groups;
+	err = efa_com_set_feature(edev, &set_resp, &cmd,
+				  EFA_ADMIN_AENQ_CONFIG);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to set aenq attributes: %d\n",
+				      err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+		     struct efa_com_alloc_pd_result *result)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_alloc_pd_cmd cmd = {};
+	struct efa_admin_alloc_pd_resp resp;
+	int err;
+
+	cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_PD;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to allocate pd[%d]\n", err);
+		return err;
+	}
+
+	result->pdn = resp.pd;
+
+	return 0;
+}
+
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+		       struct efa_com_dealloc_pd_params *params)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_dealloc_pd_cmd cmd = {};
+	struct efa_admin_dealloc_pd_resp resp;
+	int err;
+
+	cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_PD;
+	cmd.pd = params->pdn;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to deallocate pd-%u [%d]\n",
+				      cmd.pd, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+		      struct efa_com_alloc_uar_result *result)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_alloc_uar_cmd cmd = {};
+	struct efa_admin_alloc_uar_resp resp;
+	int err;
+
+	cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_UAR;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to allocate uar[%d]\n", err);
+		return err;
+	}
+
+	result->uarn = resp.uar;
+
+	return 0;
+}
+
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+			struct efa_com_dealloc_uar_params *params)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_dealloc_uar_cmd cmd = {};
+	struct efa_admin_dealloc_uar_resp resp;
+	int err;
+
+	cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_UAR;
+	cmd.uar = params->uarn;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to deallocate uar-%u [%d]\n",
+				      cmd.uar, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_com_get_stats(struct efa_com_dev *edev,
+		      struct efa_com_get_stats_params *params,
+		      union efa_com_get_stats_result *result)
+{
+	struct efa_com_admin_queue *aq = &edev->aq;
+	struct efa_admin_aq_get_stats_cmd cmd = {};
+	struct efa_admin_acq_get_stats_resp resp;
+	int err;
+
+	cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS;
+	cmd.type = params->type;
+	cmd.scope = params->scope;
+	cmd.scope_modifier = params->scope_modifier;
+
+	err = efa_com_cmd_exec(aq,
+			       (struct efa_admin_aq_entry *)&cmd,
+			       sizeof(cmd),
+			       (struct efa_admin_acq_entry *)&resp,
+			       sizeof(resp));
+	if (err) {
+		ibdev_err_ratelimited(
+			edev->efa_dev,
+			"Failed to get stats type-%u scope-%u.%u [%d]\n",
+			cmd.type, cmd.scope, cmd.scope_modifier, err);
+		return err;
+	}
+
+	result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
+	result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
+	result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
+	result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
+	result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
new file mode 100644
index 000000000000..7f6c13052f49
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_CMD_H_
+#define _EFA_COM_CMD_H_
+
+#include "efa_com.h"
+
+#define EFA_GID_SIZE 16
+
+struct efa_com_create_qp_params {
+	u64 rq_base_addr;
+	u32 send_cq_idx;
+	u32 recv_cq_idx;
+	/*
+	 * Send descriptor ring size in bytes,
+	 * sufficient for user-provided number of WQEs and SGL size
+	 */
+	u32 sq_ring_size_in_bytes;
+	/* Max number of WQEs that will be posted on send queue */
+	u32 sq_depth;
+	/* Recv descriptor ring size in bytes */
+	u32 rq_ring_size_in_bytes;
+	u32 rq_depth;
+	u16 pd;
+	u16 uarn;
+	u8 qp_type;
+};
+
+struct efa_com_create_qp_result {
+	u32 qp_handle;
+	u32 qp_num;
+	u32 sq_db_offset;
+	u32 rq_db_offset;
+	u32 llq_descriptors_offset;
+	u16 send_sub_cq_idx;
+	u16 recv_sub_cq_idx;
+};
+
+struct efa_com_modify_qp_params {
+	u32 modify_mask;
+	u32 qp_handle;
+	u32 qp_state;
+	u32 cur_qp_state;
+	u32 qkey;
+	u32 sq_psn;
+	u8 sq_drained_async_notify;
+};
+
+struct efa_com_query_qp_params {
+	u32 qp_handle;
+};
+
+struct efa_com_query_qp_result {
+	u32 qp_state;
+	u32 qkey;
+	u32 sq_draining;
+	u32 sq_psn;
+};
+
+struct efa_com_destroy_qp_params {
+	u32 qp_handle;
+};
+
+struct efa_com_create_cq_params {
+	/* cq physical base address in OS memory */
+	dma_addr_t dma_addr;
+	/* completion queue depth in # of entries */
+	u16 cq_depth;
+	u16 num_sub_cqs;
+	u16 uarn;
+	u8 entry_size_in_bytes;
+};
+
+struct efa_com_create_cq_result {
+	/* cq identifier */
+	u16 cq_idx;
+	/* actual cq depth in # of entries */
+	u16 actual_depth;
+};
+
+struct efa_com_destroy_cq_params {
+	u16 cq_idx;
+};
+
+struct efa_com_create_ah_params {
+	u16 pdn;
+	/* Destination address in network byte order */
+	u8 dest_addr[EFA_GID_SIZE];
+};
+
+struct efa_com_create_ah_result {
+	u16 ah;
+};
+
+struct efa_com_destroy_ah_params {
+	u16 ah;
+	u16 pdn;
+};
+
+struct efa_com_get_network_attr_result {
+	u8 addr[EFA_GID_SIZE];
+	u32 mtu;
+};
+
+struct efa_com_get_device_attr_result {
+	u64 page_size_cap;
+	u64 max_mr_pages;
+	u32 fw_version;
+	u32 admin_api_version;
+	u32 device_version;
+	u32 supported_features;
+	u32 phys_addr_width;
+	u32 virt_addr_width;
+	u32 max_qp;
+	u32 max_sq_depth; /* wqes */
+	u32 max_rq_depth; /* wqes */
+	u32 max_cq;
+	u32 max_cq_depth; /* cqes */
+	u32 inline_buf_size;
+	u32 max_mr;
+	u32 max_pd;
+	u32 max_ah;
+	u32 max_llq_size;
+	u16 sub_cqs_per_cq;
+	u16 max_sq_sge;
+	u16 max_rq_sge;
+	u8 db_bar;
+};
+
+struct efa_com_get_hw_hints_result {
+	u16 mmio_read_timeout;
+	u16 driver_watchdog_timeout;
+	u16 admin_completion_timeout;
+	u16 poll_interval;
+	u32 reserved[4];
+};
+
+struct efa_com_mem_addr {
+	u32 mem_addr_low;
+	u32 mem_addr_high;
+};
+
+/* Used at indirect mode page list chunks for chaining */
+struct efa_com_ctrl_buff_info {
+	/* indicates length of the buffer pointed by control_buffer_address. */
+	u32 length;
+	/* points to control buffer (direct or indirect) */
+	struct efa_com_mem_addr address;
+};
+
+struct efa_com_reg_mr_params {
+	/* Memory region length, in bytes. */
+	u64 mr_length_in_bytes;
+	/* IO Virtual Address associated with this MR. */
+	u64 iova;
+	/* words 8:15: Physical Buffer List, each element is page-aligned. */
+	union {
+		/*
+		 * Inline array of physical addresses of app pages
+		 * (optimization for short region reservations)
+		 */
+		u64 inline_pbl_array[4];
+		/*
+		 * Describes the next physically contiguous chunk of indirect
+		 * page list. A page list contains physical addresses of command
+		 * data pages. Data pages are 4KB; page list chunks are
+		 * variable-sized.
+		 */
+		struct efa_com_ctrl_buff_info pbl;
+	} pbl;
+	/* number of pages in PBL (redundant, could be calculated) */
+	u32 page_num;
+	/* Protection Domain */
+	u16 pd;
+	/*
+	 * phys_page_size_shift - page size is (1 << phys_page_size_shift)
+	 * Page size is used for building the Virtual to Physical
+	 * address mapping
+	 */
+	u8 page_shift;
+	/*
+	 * permissions
+	 * 0: local_write_enable - Write permissions: value of 1 needed
+	 * for RQ buffers and for RDMA write:1: reserved1 - remote
+	 * access flags, etc
+	 */
+	u8 permissions;
+	u8 inline_pbl;
+	u8 indirect;
+};
+
+struct efa_com_reg_mr_result {
+	/*
+	 * To be used in conjunction with local buffers references in SQ and
+	 * RQ WQE
+	 */
+	u32 l_key;
+	/*
+	 * To be used in incoming RDMA semantics messages to refer to remotely
+	 * accessed memory region
+	 */
+	u32 r_key;
+};
+
+struct efa_com_dereg_mr_params {
+	u32 l_key;
+};
+
+struct efa_com_alloc_pd_result {
+	u16 pdn;
+};
+
+struct efa_com_dealloc_pd_params {
+	u16 pdn;
+};
+
+struct efa_com_alloc_uar_result {
+	u16 uarn;
+};
+
+struct efa_com_dealloc_uar_params {
+	u16 uarn;
+};
+
+struct efa_com_get_stats_params {
+	/* see enum efa_admin_get_stats_type */
+	u8 type;
+	/* see enum efa_admin_get_stats_scope */
+	u8 scope;
+	u16 scope_modifier;
+};
+
+struct efa_com_basic_stats {
+	u64 tx_bytes;
+	u64 tx_pkts;
+	u64 rx_bytes;
+	u64 rx_pkts;
+	u64 rx_drops;
+};
+
+union efa_com_get_stats_result {
+	struct efa_com_basic_stats basic_stats;
+};
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
+int efa_com_create_qp(struct efa_com_dev *edev,
+		      struct efa_com_create_qp_params *params,
+		      struct efa_com_create_qp_result *res);
+int efa_com_modify_qp(struct efa_com_dev *edev,
+		      struct efa_com_modify_qp_params *params);
+int efa_com_query_qp(struct efa_com_dev *edev,
+		     struct efa_com_query_qp_params *params,
+		     struct efa_com_query_qp_result *result);
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+		       struct efa_com_destroy_qp_params *params);
+int efa_com_create_cq(struct efa_com_dev *edev,
+		      struct efa_com_create_cq_params *params,
+		      struct efa_com_create_cq_result *result);
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+		       struct efa_com_destroy_cq_params *params);
+int efa_com_register_mr(struct efa_com_dev *edev,
+			struct efa_com_reg_mr_params *params,
+			struct efa_com_reg_mr_result *result);
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+		     struct efa_com_dereg_mr_params *params);
+int efa_com_create_ah(struct efa_com_dev *edev,
+		      struct efa_com_create_ah_params *params,
+		      struct efa_com_create_ah_result *result);
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+		       struct efa_com_destroy_ah_params *params);
+int efa_com_get_network_attr(struct efa_com_dev *edev,
+			     struct efa_com_get_network_attr_result *result);
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+			    struct efa_com_get_device_attr_result *result);
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+			 struct efa_com_get_hw_hints_result *result);
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups);
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+		     struct efa_com_alloc_pd_result *result);
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+		       struct efa_com_dealloc_pd_params *params);
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+		      struct efa_com_alloc_uar_result *result);
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+			struct efa_com_dealloc_uar_params *params);
+int efa_com_get_stats(struct efa_com_dev *edev,
+		      struct efa_com_get_stats_params *params,
+		      union efa_com_get_stats_result *result);
+
+#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h
new file mode 100644
index 000000000000..c559ec08898e
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_common_defs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COMMON_H_
+#define _EFA_COMMON_H_
+
+#define EFA_COMMON_SPEC_VERSION_MAJOR        2
+#define EFA_COMMON_SPEC_VERSION_MINOR        0
+
+struct efa_common_mem_addr {
+	u32 mem_addr_low;
+
+	u32 mem_addr_high;
+};
+
+#endif /* _EFA_COMMON_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
new file mode 100644
index 000000000000..726532c6238a
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_user_verbs.h>
+
+#include "efa.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
+#define PCI_VENDOR_ID_AMAZON 0x1d0f
+#endif
+#define PCI_DEV_ID_EFA_VF 0xefa0
+
+static const struct pci_device_id efa_pci_tbl[] = {
+	{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) },
+	{ }
+};
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
+
+#define EFA_REG_BAR 0
+#define EFA_MEM_BAR 2
+#define EFA_BASE_BAR_MASK (BIT(EFA_REG_BAR) | BIT(EFA_MEM_BAR))
+
+#define EFA_AENQ_ENABLED_GROUPS \
+	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
+	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+
+static void efa_update_network_attr(struct efa_dev *dev,
+				    struct efa_com_get_network_attr_result *network_attr)
+{
+	memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr));
+	dev->mtu = network_attr->mtu;
+
+	dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr);
+}
+
+/* This handler will called for unknown event group or unimplemented handlers */
+static void unimplemented_aenq_handler(void *data,
+				       struct efa_admin_aenq_entry *aenq_e)
+{
+	struct efa_dev *dev = (struct efa_dev *)data;
+
+	ibdev_err(&dev->ibdev,
+		  "Unknown event was received or event with unimplemented handler\n");
+}
+
+static void efa_keep_alive(void *data, struct efa_admin_aenq_entry *aenq_e)
+{
+	struct efa_dev *dev = (struct efa_dev *)data;
+
+	atomic64_inc(&dev->stats.keep_alive_rcvd);
+}
+
+static struct efa_aenq_handlers aenq_handlers = {
+	.handlers = {
+		[EFA_ADMIN_KEEP_ALIVE] = efa_keep_alive,
+	},
+	.unimplemented_handler = unimplemented_aenq_handler
+};
+
+static void efa_release_bars(struct efa_dev *dev, int bars_mask)
+{
+	struct pci_dev *pdev = dev->pdev;
+	int release_bars;
+
+	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & bars_mask;
+	pci_release_selected_regions(pdev, release_bars);
+}
+
+static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data)
+{
+	struct efa_dev *dev = data;
+
+	efa_com_admin_q_comp_intr_handler(&dev->edev);
+	efa_com_aenq_intr_handler(&dev->edev, data);
+
+	return IRQ_HANDLED;
+}
+
+static int efa_request_mgmnt_irq(struct efa_dev *dev)
+{
+	struct efa_irq *irq;
+	int err;
+
+	irq = &dev->admin_irq;
+	err = request_irq(irq->vector, irq->handler, 0, irq->name,
+			  irq->data);
+	if (err) {
+		dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n",
+			err);
+		return err;
+	}
+
+	dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n",
+		nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector);
+	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+	return 0;
+}
+
+static void efa_setup_mgmnt_irq(struct efa_dev *dev)
+{
+	u32 cpu;
+
+	snprintf(dev->admin_irq.name, EFA_IRQNAME_SIZE,
+		 "efa-mgmnt@pci:%s", pci_name(dev->pdev));
+	dev->admin_irq.handler = efa_intr_msix_mgmnt;
+	dev->admin_irq.data = dev;
+	dev->admin_irq.vector =
+		pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx);
+	cpu = cpumask_first(cpu_online_mask);
+	dev->admin_irq.cpu = cpu;
+	cpumask_set_cpu(cpu,
+			&dev->admin_irq.affinity_hint_mask);
+	dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n",
+		 &dev->admin_irq,
+		 dev->admin_irq.vector,
+		 dev->admin_irq.name);
+}
+
+static void efa_free_mgmnt_irq(struct efa_dev *dev)
+{
+	struct efa_irq *irq;
+
+	irq = &dev->admin_irq;
+	irq_set_affinity_hint(irq->vector, NULL);
+	free_irq(irq->vector, irq->data);
+}
+
+static int efa_set_mgmnt_irq(struct efa_dev *dev)
+{
+	efa_setup_mgmnt_irq(dev);
+
+	return efa_request_mgmnt_irq(dev);
+}
+
+static int efa_request_doorbell_bar(struct efa_dev *dev)
+{
+	u8 db_bar_idx = dev->dev_attr.db_bar;
+	struct pci_dev *pdev = dev->pdev;
+	int bars;
+	int err;
+
+	if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
+		bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+
+		err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+		if (err) {
+			dev_err(&dev->pdev->dev,
+				"pci_request_selected_regions for bar %d failed %d\n",
+				db_bar_idx, err);
+			return err;
+		}
+	}
+
+	dev->db_bar_addr = pci_resource_start(dev->pdev, db_bar_idx);
+	dev->db_bar_len = pci_resource_len(dev->pdev, db_bar_idx);
+
+	return 0;
+}
+
+static void efa_release_doorbell_bar(struct efa_dev *dev)
+{
+	if (!(BIT(dev->dev_attr.db_bar) & EFA_BASE_BAR_MASK))
+		efa_release_bars(dev, BIT(dev->dev_attr.db_bar));
+}
+
+static void efa_update_hw_hints(struct efa_dev *dev,
+				struct efa_com_get_hw_hints_result *hw_hints)
+{
+	struct efa_com_dev *edev = &dev->edev;
+
+	if (hw_hints->mmio_read_timeout)
+		edev->mmio_read.mmio_read_timeout =
+			hw_hints->mmio_read_timeout * 1000;
+
+	if (hw_hints->poll_interval)
+		edev->aq.poll_interval = hw_hints->poll_interval;
+
+	if (hw_hints->admin_completion_timeout)
+		edev->aq.completion_timeout =
+			hw_hints->admin_completion_timeout;
+}
+
+static void efa_stats_init(struct efa_dev *dev)
+{
+	atomic64_t *s = (atomic64_t *)&dev->stats;
+	int i;
+
+	for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++)
+		atomic64_set(s, 0);
+}
+
+#ifdef HAVE_IB_DEV_OPS
+static const struct ib_device_ops efa_dev_ops = {
+#ifdef HAVE_IB_DEVICE_OPS_COMMON
+	.owner = THIS_MODULE,
+	.driver_id = RDMA_DRIVER_EFA,
+	.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+#endif
+
+	.alloc_hw_stats = efa_alloc_hw_stats,
+#ifdef HAVE_PD_CORE_ALLOCATION
+	.alloc_pd = efa_alloc_pd,
+#else
+	.alloc_pd = efa_kzalloc_pd,
+#endif
+#ifdef HAVE_UCONTEXT_CORE_ALLOCATION
+	.alloc_ucontext = efa_alloc_ucontext,
+#else
+	.alloc_ucontext = efa_kzalloc_ucontext,
+#endif
+#ifdef HAVE_AH_CORE_ALLOCATION
+	.create_ah = efa_create_ah,
+#else
+	.create_ah = efa_kzalloc_ah,
+#endif
+#ifdef HAVE_CQ_CORE_ALLOCATION
+	.create_cq = efa_create_cq,
+#else
+	.create_cq = efa_kzalloc_cq,
+#endif
+	.create_qp = efa_create_qp,
+	.dealloc_pd = efa_dealloc_pd,
+	.dealloc_ucontext = efa_dealloc_ucontext,
+	.dereg_mr = efa_dereg_mr,
+	.destroy_ah = efa_destroy_ah,
+	.destroy_cq = efa_destroy_cq,
+	.destroy_qp = efa_destroy_qp,
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	.get_dma_mr = efa_get_dma_mr,
+#endif
+	.get_hw_stats = efa_get_hw_stats,
+	.get_link_layer = efa_port_link_layer,
+	.get_port_immutable = efa_get_port_immutable,
+	.mmap = efa_mmap,
+	.modify_qp = efa_modify_qp,
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	.poll_cq = efa_poll_cq,
+	.post_recv = efa_post_recv,
+	.post_send = efa_post_send,
+#endif
+	.query_device = efa_query_device,
+	.query_gid = efa_query_gid,
+	.query_pkey = efa_query_pkey,
+	.query_port = efa_query_port,
+	.query_qp = efa_query_qp,
+	.reg_user_mr = efa_reg_mr,
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	.req_notify_cq = efa_req_notify_cq,
+#endif
+
+#ifdef HAVE_AH_CORE_ALLOCATION
+	INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
+#endif
+#ifdef HAVE_CQ_CORE_ALLOCATION
+	INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
+#endif
+#ifdef HAVE_PD_CORE_ALLOCATION
+	INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
+#endif
+#ifdef HAVE_UCONTEXT_CORE_ALLOCATION
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
+#endif
+};
+#endif
+
+static int efa_ib_device_add(struct efa_dev *dev)
+{
+	struct efa_com_get_network_attr_result network_attr;
+	struct efa_com_get_hw_hints_result hw_hints;
+	struct pci_dev *pdev = dev->pdev;
+	int err;
+
+	efa_stats_init(dev);
+
+	err = efa_com_get_device_attr(&dev->edev, &dev->dev_attr);
+	if (err)
+		return err;
+
+	dev_dbg(&dev->pdev->dev, "Doorbells bar (%d)\n", dev->dev_attr.db_bar);
+	err = efa_request_doorbell_bar(dev);
+	if (err)
+		return err;
+
+	err = efa_com_get_network_attr(&dev->edev, &network_attr);
+	if (err)
+		goto err_release_doorbell_bar;
+
+	efa_update_network_attr(dev, &network_attr);
+
+	err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
+	if (err)
+		goto err_release_doorbell_bar;
+
+	efa_update_hw_hints(dev, &hw_hints);
+
+	/* Try to enable all the available aenq groups */
+	err = efa_com_set_aenq_config(&dev->edev, EFA_AENQ_ENABLED_GROUPS);
+	if (err)
+		goto err_release_doorbell_bar;
+
+#ifdef HAVE_UPSTREAM_EFA
+	dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+#else
+	dev->ibdev.node_type = RDMA_NODE_IB_CA;
+#endif
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.num_comp_vectors = 1;
+	dev->ibdev.dev.parent = &pdev->dev;
+
+	dev->ibdev.uverbs_cmd_mask =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+		(1ull << IB_USER_VERBS_CMD_REG_MR) |
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+
+	dev->ibdev.uverbs_ex_cmd_mask =
+		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+
+#ifndef HAVE_IB_DEVICE_OPS_COMMON
+	dev->ibdev.driver_id = RDMA_DRIVER_EFA;
+	dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION;
+	dev->ibdev.owner = THIS_MODULE;
+#endif
+#ifdef HAVE_IB_DEV_OPS
+	ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
+#else
+	dev->ibdev.alloc_hw_stats = efa_alloc_hw_stats;
+	dev->ibdev.alloc_pd = efa_kzalloc_pd;
+	dev->ibdev.alloc_ucontext = efa_kzalloc_ucontext;
+	dev->ibdev.create_ah = efa_kzalloc_ah;
+	dev->ibdev.create_cq = efa_kzalloc_cq;
+	dev->ibdev.create_qp = efa_create_qp;
+	dev->ibdev.dealloc_pd = efa_dealloc_pd;
+	dev->ibdev.dealloc_ucontext = efa_dealloc_ucontext;
+	dev->ibdev.dereg_mr = efa_dereg_mr;
+	dev->ibdev.destroy_ah = efa_destroy_ah;
+	dev->ibdev.destroy_cq = efa_destroy_cq;
+	dev->ibdev.destroy_qp = efa_destroy_qp;
+	dev->ibdev.get_dma_mr = efa_get_dma_mr;
+	dev->ibdev.get_hw_stats = efa_get_hw_stats;
+	dev->ibdev.get_link_layer = efa_port_link_layer;
+	dev->ibdev.get_port_immutable = efa_get_port_immutable;
+	dev->ibdev.mmap = efa_mmap;
+	dev->ibdev.modify_qp = efa_modify_qp;
+	dev->ibdev.poll_cq = efa_poll_cq;
+	dev->ibdev.post_recv = efa_post_recv;
+	dev->ibdev.post_send = efa_post_send;
+	dev->ibdev.query_device = efa_query_device;
+	dev->ibdev.query_gid = efa_query_gid;
+	dev->ibdev.query_pkey = efa_query_pkey;
+	dev->ibdev.query_port = efa_query_port;
+	dev->ibdev.query_qp = efa_query_qp;
+	dev->ibdev.reg_user_mr = efa_reg_mr;
+	dev->ibdev.req_notify_cq = efa_req_notify_cq;
+#endif
+
+#ifdef HAVE_IB_REGISTER_DEVICE_TWO_PARAMS
+	err = ib_register_device(&dev->ibdev, "efa_%d");
+#else
+	err = ib_register_device(&dev->ibdev, "efa_%d", NULL);
+#endif
+	if (err)
+		goto err_release_doorbell_bar;
+
+	ibdev_info(&dev->ibdev, "IB device registered\n");
+
+	return 0;
+
+err_release_doorbell_bar:
+	efa_release_doorbell_bar(dev);
+	return err;
+}
+
+static void efa_ib_device_remove(struct efa_dev *dev)
+{
+	efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL);
+	ibdev_info(&dev->ibdev, "Unregister ib device\n");
+	ib_unregister_device(&dev->ibdev);
+	efa_release_doorbell_bar(dev);
+}
+
+static void efa_disable_msix(struct efa_dev *dev)
+{
+	pci_free_irq_vectors(dev->pdev);
+}
+
+static int efa_enable_msix(struct efa_dev *dev)
+{
+	int msix_vecs, irq_num;
+
+	/* Reserve the max msix vectors we might need */
+	msix_vecs = EFA_NUM_MSIX_VEC;
+	dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
+		msix_vecs);
+
+	dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
+	irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
+					msix_vecs, PCI_IRQ_MSIX);
+
+	if (irq_num < 0) {
+		dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
+			irq_num);
+		return -ENOSPC;
+	}
+
+	if (irq_num != msix_vecs) {
+		dev_err(&dev->pdev->dev,
+			"Allocated %d MSI-X (out of %d requested)\n",
+			irq_num, msix_vecs);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
+{
+	int dma_width;
+	int err;
+
+	err = efa_com_dev_reset(edev, EFA_REGS_RESET_NORMAL);
+	if (err)
+		return err;
+
+	err = efa_com_validate_version(edev);
+	if (err)
+		return err;
+
+	dma_width = efa_com_get_dma_width(edev);
+	if (dma_width < 0) {
+		err = dma_width;
+		return err;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (err) {
+		dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err);
+		return err;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (err) {
+		dev_err(&pdev->dev,
+			"err_pci_set_consistent_dma_mask failed %d\n",
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
+{
+	struct efa_com_dev *edev;
+	struct efa_dev *dev;
+	int bars;
+	int err;
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+		return ERR_PTR(err);
+	}
+
+	pci_set_master(pdev);
+
+#ifdef HAVE_SAFE_IB_ALLOC_DEVICE
+	dev = ib_alloc_device(efa_dev, ibdev);
+#else
+	dev = (struct efa_dev *)ib_alloc_device(sizeof(*dev));
+#endif
+	if (!dev) {
+		dev_err(&pdev->dev, "Device alloc failed\n");
+		err = -ENOMEM;
+		goto err_disable_device;
+	}
+
+	pci_set_drvdata(pdev, dev);
+	edev = &dev->edev;
+	edev->efa_dev = dev;
+	edev->dmadev = &pdev->dev;
+	dev->pdev = pdev;
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
+	err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+			err);
+		goto err_ibdev_destroy;
+	}
+
+	dev->reg_bar_addr = pci_resource_start(pdev, EFA_REG_BAR);
+	dev->reg_bar_len = pci_resource_len(pdev, EFA_REG_BAR);
+	dev->mem_bar_addr = pci_resource_start(pdev, EFA_MEM_BAR);
+	dev->mem_bar_len = pci_resource_len(pdev, EFA_MEM_BAR);
+
+	edev->reg_bar = devm_ioremap(&pdev->dev,
+				     dev->reg_bar_addr,
+				     dev->reg_bar_len);
+	if (!edev->reg_bar) {
+		dev_err(&pdev->dev, "Failed to remap register bar\n");
+		err = -EFAULT;
+		goto err_release_bars;
+	}
+
+	err = efa_com_mmio_reg_read_init(edev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init readless MMIO\n");
+		goto err_iounmap;
+	}
+
+	err = efa_device_init(edev, pdev);
+	if (err) {
+		dev_err(&pdev->dev, "EFA device init failed\n");
+		if (err == -ETIME)
+			err = -EPROBE_DEFER;
+		goto err_reg_read_destroy;
+	}
+
+	err = efa_enable_msix(dev);
+	if (err)
+		goto err_reg_read_destroy;
+
+	edev->aq.msix_vector_idx = dev->admin_msix_vector_idx;
+	edev->aenq.msix_vector_idx = dev->admin_msix_vector_idx;
+
+	err = efa_set_mgmnt_irq(dev);
+	if (err)
+		goto err_disable_msix;
+
+	err = efa_com_admin_init(edev, &aenq_handlers);
+	if (err)
+		goto err_free_mgmnt_irq;
+
+	return dev;
+
+err_free_mgmnt_irq:
+	efa_free_mgmnt_irq(dev);
+err_disable_msix:
+	efa_disable_msix(dev);
+err_reg_read_destroy:
+	efa_com_mmio_reg_read_destroy(edev);
+err_iounmap:
+	devm_iounmap(&pdev->dev, edev->reg_bar);
+err_release_bars:
+	efa_release_bars(dev, EFA_BASE_BAR_MASK);
+err_ibdev_destroy:
+	ib_dealloc_device(&dev->ibdev);
+err_disable_device:
+	pci_disable_device(pdev);
+	return ERR_PTR(err);
+}
+
+static void efa_remove_device(struct pci_dev *pdev)
+{
+	struct efa_dev *dev = pci_get_drvdata(pdev);
+	struct efa_com_dev *edev;
+
+	edev = &dev->edev;
+	efa_com_admin_destroy(edev);
+	efa_free_mgmnt_irq(dev);
+	efa_disable_msix(dev);
+	efa_com_mmio_reg_read_destroy(edev);
+	devm_iounmap(&pdev->dev, edev->reg_bar);
+	efa_release_bars(dev, EFA_BASE_BAR_MASK);
+	ib_dealloc_device(&dev->ibdev);
+	pci_disable_device(pdev);
+}
+
+static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct efa_dev *dev;
+	int err;
+
+	dev = efa_probe_device(pdev);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	err = efa_ib_device_add(dev);
+	if (err)
+		goto err_remove_device;
+
+	return 0;
+
+err_remove_device:
+	efa_remove_device(pdev);
+	return err;
+}
+
+static void efa_remove(struct pci_dev *pdev)
+{
+	struct efa_dev *dev = pci_get_drvdata(pdev);
+
+	efa_ib_device_remove(dev);
+	efa_remove_device(pdev);
+}
+
+static struct pci_driver efa_pci_driver = {
+	.name           = DRV_MODULE_NAME,
+	.id_table       = efa_pci_tbl,
+	.probe          = efa_probe,
+	.remove         = efa_remove,
+};
+
+module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h
new file mode 100644
index 000000000000..bb9cad3d6a15
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_regs_defs.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_REGS_H_
+#define _EFA_REGS_H_
+
+enum efa_regs_reset_reason_types {
+	EFA_REGS_RESET_NORMAL                       = 0,
+	/* Keep alive timeout */
+	EFA_REGS_RESET_KEEP_ALIVE_TO                = 1,
+	EFA_REGS_RESET_ADMIN_TO                     = 2,
+	EFA_REGS_RESET_INIT_ERR                     = 3,
+	EFA_REGS_RESET_DRIVER_INVALID_STATE         = 4,
+	EFA_REGS_RESET_OS_TRIGGER                   = 5,
+	EFA_REGS_RESET_SHUTDOWN                     = 6,
+	EFA_REGS_RESET_USER_TRIGGER                 = 7,
+	EFA_REGS_RESET_GENERIC                      = 8,
+};
+
+/* efa_registers offsets */
+
+/* 0 base */
+#define EFA_REGS_VERSION_OFF                                0x0
+#define EFA_REGS_CONTROLLER_VERSION_OFF                     0x4
+#define EFA_REGS_CAPS_OFF                                   0x8
+#define EFA_REGS_AQ_BASE_LO_OFF                             0x10
+#define EFA_REGS_AQ_BASE_HI_OFF                             0x14
+#define EFA_REGS_AQ_CAPS_OFF                                0x18
+#define EFA_REGS_ACQ_BASE_LO_OFF                            0x20
+#define EFA_REGS_ACQ_BASE_HI_OFF                            0x24
+#define EFA_REGS_ACQ_CAPS_OFF                               0x28
+#define EFA_REGS_AQ_PROD_DB_OFF                             0x2c
+#define EFA_REGS_AENQ_CAPS_OFF                              0x34
+#define EFA_REGS_AENQ_BASE_LO_OFF                           0x38
+#define EFA_REGS_AENQ_BASE_HI_OFF                           0x3c
+#define EFA_REGS_AENQ_CONS_DB_OFF                           0x40
+#define EFA_REGS_INTR_MASK_OFF                              0x4c
+#define EFA_REGS_DEV_CTL_OFF                                0x54
+#define EFA_REGS_DEV_STS_OFF                                0x58
+#define EFA_REGS_MMIO_REG_READ_OFF                          0x5c
+#define EFA_REGS_MMIO_RESP_LO_OFF                           0x60
+#define EFA_REGS_MMIO_RESP_HI_OFF                           0x64
+
+/* version register */
+#define EFA_REGS_VERSION_MINOR_VERSION_MASK                 0xff
+#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT                8
+#define EFA_REGS_VERSION_MAJOR_VERSION_MASK                 0xff00
+
+/* controller_version register */
+#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK   0xff
+#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT     8
+#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK      0xff00
+#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT     16
+#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK      0xff0000
+#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT           24
+#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK            0xff000000
+
+/* caps register */
+#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK        0x1
+#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT                   1
+#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK                    0x3e
+#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT                  8
+#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK                   0xff00
+#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT                    16
+#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK                     0xf0000
+
+/* aq_caps register */
+#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK                      0xffff
+#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT                16
+#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK                 0xffff0000
+
+/* acq_caps register */
+#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK                    0xffff
+#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT              16
+#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK               0xff0000
+#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT             24
+#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK              0xff000000
+
+/* aenq_caps register */
+#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK                  0xffff
+#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT            16
+#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK             0xff0000
+#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT           24
+#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK            0xff000000
+
+/* dev_ctl register */
+#define EFA_REGS_DEV_CTL_DEV_RESET_MASK                     0x1
+#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT                   1
+#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK                    0x2
+#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT                 28
+#define EFA_REGS_DEV_CTL_RESET_REASON_MASK                  0xf0000000
+
+/* dev_sts register */
+#define EFA_REGS_DEV_STS_READY_MASK                         0x1
+#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT       1
+#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK        0x2
+#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT          2
+#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK           0x4
+#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT            3
+#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK             0x8
+#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT               4
+#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK                0x10
+#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT                  5
+#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK                   0x20
+
+/* mmio_reg_read register */
+#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK                  0xffff
+#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT                16
+#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK                 0xffff0000
+
+#endif /* _EFA_REGS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
new file mode 100644
index 000000000000..0ce693874f27
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -0,0 +1,2416 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "kcompat.h"
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#ifdef HAVE_UDATA_TO_DRV_CONTEXT
+#include <rdma/uverbs_ioctl.h>
+#endif
+
+#include "efa.h"
+
+#define EFA_MMAP_FLAG_SHIFT 56
+#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
+#define EFA_MMAP_INVALID U64_MAX
+
+enum {
+	EFA_MMAP_DMA_PAGE = 0,
+	EFA_MMAP_IO_WC,
+	EFA_MMAP_IO_NC,
+};
+
+#define EFA_AENQ_ENABLED_GROUPS \
+	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
+	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+
+struct efa_mmap_entry {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
+	struct list_head list;
+#endif
+	void  *obj;
+	u64 address;
+	u64 length;
+	u32 mmap_page;
+	u8 mmap_flag;
+};
+
+static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
+{
+	return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
+	       ((u64)efa->mmap_page << PAGE_SHIFT);
+}
+
+#define EFA_DEFINE_STATS(op) \
+	op(EFA_TX_BYTES, "tx_bytes") \
+	op(EFA_TX_PKTS, "tx_pkts") \
+	op(EFA_RX_BYTES, "rx_bytes") \
+	op(EFA_RX_PKTS, "rx_pkts") \
+	op(EFA_RX_DROPS, "rx_drops") \
+	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+	op(EFA_COMPLETED_CMDS, "completed_cmds") \
+	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+	op(EFA_CREATE_QP_ERR, "create_qp_err") \
+	op(EFA_REG_MR_ERR, "reg_mr_err") \
+	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+	op(EFA_CREATE_AH_ERR, "create_ah_err")
+
+#define EFA_STATS_ENUM(ename, name) ename,
+#define EFA_STATS_STR(ename, name) [ename] = name,
+
+enum efa_hw_stats {
+	EFA_DEFINE_STATS(EFA_STATS_ENUM)
+};
+
+static const char *const efa_stats_names[] = {
+	EFA_DEFINE_STATS(EFA_STATS_STR)
+};
+
+#define EFA_CHUNK_PAYLOAD_SHIFT       12
+#define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
+#define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
+
+#define EFA_CHUNK_SHIFT               12
+#define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
+#define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
+
+#define EFA_PTRS_PER_CHUNK \
+	((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
+
+#define EFA_CHUNK_USED_SIZE \
+	((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
+
+#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
+
+struct pbl_chunk {
+	dma_addr_t dma_addr;
+	u64 *buf;
+	u32 length;
+};
+
+struct pbl_chunk_list {
+	struct pbl_chunk *chunks;
+	unsigned int size;
+};
+
+struct pbl_context {
+	union {
+		struct {
+			dma_addr_t dma_addr;
+		} continuous;
+		struct {
+			u32 pbl_buf_size_in_pages;
+			struct scatterlist *sgl;
+			int sg_dma_cnt;
+			struct pbl_chunk_list chunk_list;
+		} indirect;
+	} phys;
+	u64 *pbl_buf;
+	u32 pbl_buf_size_in_bytes;
+	u8 physically_continuous;
+};
+
+static inline struct efa_dev *to_edev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct efa_dev, ibdev);
+}
+
+static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct efa_ucontext, ibucontext);
+}
+
+static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct efa_pd, ibpd);
+}
+
+static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct efa_mr, ibmr);
+}
+
+static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct efa_qp, ibqp);
+}
+
+static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct efa_cq, ibcq);
+}
+
+static inline struct efa_ah *to_eah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct efa_ah, ibah);
+}
+
+#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
+				 FIELD_SIZEOF(typeof(x), fld) <= (sz))
+
+#define is_reserved_cleared(reserved) \
+	!memchr_inv(reserved, 0, sizeof(reserved))
+
+static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
+			       size_t size, enum dma_data_direction dir)
+{
+	void *addr;
+
+	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+	if (!addr)
+		return NULL;
+
+	*dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
+	if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
+		ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
+		free_pages_exact(addr, size);
+		return NULL;
+	}
+
+	return addr;
+}
+
+/*
+ * This is only called when the ucontext is destroyed and there can be no
+ * concurrent query via mmap or allocate on the xarray, thus we can be sure no
+ * other thread is using the entry pointer. We also know that all the BAR
+ * pages have either been zap'd or munmaped at this point.  Normal pages are
+ * refcounted and will be freed at the proper time.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+static void mmap_entries_remove_free(struct efa_dev *dev,
+				     struct efa_ucontext *ucontext)
+{
+	struct efa_mmap_entry *entry;
+	unsigned long mmap_page;
+
+	xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
+		xa_erase(&ucontext->mmap_xa, mmap_page);
+
+		ibdev_dbg(
+			&dev->ibdev,
+			"mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+			entry->obj, get_mmap_key(entry), entry->address,
+			entry->length);
+		if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+			/* DMA mapping is already gone, now free the pages */
+			free_pages_exact(phys_to_virt(entry->address),
+					 entry->length);
+		kfree(entry);
+	}
+}
+#else
+static void mmap_entries_remove_free(struct efa_dev *dev,
+				     struct efa_ucontext *ucontext)
+{
+	struct efa_mmap_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &ucontext->pending_mmaps, list) {
+		list_del(&entry->list);
+		ibdev_dbg(
+			&dev->ibdev,
+			"mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+			entry->obj, get_mmap_key(entry), entry->address,
+			entry->length);
+		if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+			/* DMA mapping is already gone, now free the pages */
+			free_pages_exact(phys_to_virt(entry->address),
+					 entry->length);
+		kfree(entry);
+	}
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
+					     struct efa_ucontext *ucontext,
+					     u64 key, u64 len)
+{
+	struct efa_mmap_entry *entry;
+	u64 mmap_page;
+
+	mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
+	if (mmap_page > U32_MAX)
+		return NULL;
+
+	entry = xa_load(&ucontext->mmap_xa, mmap_page);
+	if (!entry || get_mmap_key(entry) != key || entry->length != len)
+		return NULL;
+
+	ibdev_dbg(&dev->ibdev,
+		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+		  entry->obj, key, entry->address, entry->length);
+
+	return entry;
+}
+#else
+static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
+					     struct efa_ucontext *ucontext,
+					     u64 key,
+					     u64 len)
+{
+	struct efa_mmap_entry *entry, *tmp;
+
+	mutex_lock(&ucontext->lock);
+	list_for_each_entry_safe(entry, tmp, &ucontext->pending_mmaps, list) {
+		if (get_mmap_key(entry) == key && entry->length == len) {
+			ibdev_dbg(&dev->ibdev,
+				  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+				  entry->obj, key, entry->address,
+				  entry->length);
+			mutex_unlock(&ucontext->lock);
+			return entry;
+		}
+	}
+	mutex_unlock(&ucontext->lock);
+
+	return NULL;
+}
+#endif
+
+/*
+ * Note this locking scheme cannot support removal of entries, except during
+ * ucontext destruction when the core code guarentees no concurrency.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
+			     void *obj, u64 address, u64 length, u8 mmap_flag)
+{
+	struct efa_mmap_entry *entry;
+	u32 next_mmap_page;
+	int err;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return EFA_MMAP_INVALID;
+
+	entry->obj = obj;
+	entry->address = address;
+	entry->length = length;
+	entry->mmap_flag = mmap_flag;
+
+	xa_lock(&ucontext->mmap_xa);
+	if (check_add_overflow(ucontext->mmap_xa_page,
+			       (u32)(length >> PAGE_SHIFT),
+			       &next_mmap_page))
+		goto err_unlock;
+
+	entry->mmap_page = ucontext->mmap_xa_page;
+	ucontext->mmap_xa_page = next_mmap_page;
+	err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
+			  GFP_KERNEL);
+	if (err)
+		goto err_unlock;
+
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(
+		&dev->ibdev,
+		"mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
+		entry->obj, entry->address, entry->length, get_mmap_key(entry));
+
+	return get_mmap_key(entry);
+
+err_unlock:
+	xa_unlock(&ucontext->mmap_xa);
+	kfree(entry);
+	return EFA_MMAP_INVALID;
+
+}
+#else
+static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
+			     void *obj, u64 address, u64 length, u8 mmap_flag)
+{
+	struct efa_mmap_entry *entry;
+	u64 next_mmap_page;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return EFA_MMAP_INVALID;
+
+	entry->obj = obj;
+	entry->address = address;
+	entry->length = length;
+	entry->mmap_flag = mmap_flag;
+
+	mutex_lock(&ucontext->lock);
+	next_mmap_page = ucontext->mmap_xa_page + (length >> PAGE_SHIFT);
+	if (next_mmap_page >= U32_MAX) {
+		ibdev_dbg(&dev->ibdev, "Too many mmap pages\n");
+		mutex_unlock(&ucontext->lock);
+		kfree(entry);
+		return EFA_MMAP_INVALID;
+	}
+
+	entry->mmap_page = ucontext->mmap_xa_page;
+	ucontext->mmap_xa_page = next_mmap_page;
+	list_add_tail(&entry->list, &ucontext->pending_mmaps);
+	mutex_unlock(&ucontext->lock);
+
+	ibdev_dbg(
+		&dev->ibdev,
+		"mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
+		entry->obj, entry->address, entry->length, get_mmap_key(entry));
+
+	return get_mmap_key(entry);
+}
+#endif
+
+
+int efa_query_device(struct ib_device *ibdev,
+		     struct ib_device_attr *props,
+		     struct ib_udata *udata)
+{
+	struct efa_com_get_device_attr_result *dev_attr;
+	struct efa_ibv_ex_query_device_resp resp = {};
+	struct efa_dev *dev = to_edev(ibdev);
+	int err;
+
+	if (udata && udata->inlen &&
+	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+		ibdev_dbg(ibdev,
+			  "Incompatible ABI params, udata not cleared\n");
+		return -EINVAL;
+	}
+
+	dev_attr = &dev->dev_attr;
+
+	memset(props, 0, sizeof(*props));
+	props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
+	props->page_size_cap = dev_attr->page_size_cap;
+	props->vendor_id = dev->pdev->vendor;
+	props->vendor_part_id = dev->pdev->device;
+	props->hw_ver = dev->pdev->subsystem_device;
+	props->max_qp = dev_attr->max_qp;
+	props->max_cq = dev_attr->max_cq;
+	props->max_pd = dev_attr->max_pd;
+	props->max_mr = dev_attr->max_mr;
+	props->max_ah = dev_attr->max_ah;
+	props->max_cqe = dev_attr->max_cq_depth;
+	props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
+				 dev_attr->max_rq_depth);
+	props->max_send_sge = dev_attr->max_sq_sge;
+	props->max_recv_sge = dev_attr->max_rq_sge;
+
+	if (udata && udata->outlen) {
+		resp.max_sq_sge = dev_attr->max_sq_sge;
+		resp.max_rq_sge = dev_attr->max_rq_sge;
+		resp.max_sq_wr = dev_attr->max_sq_depth;
+		resp.max_rq_wr = dev_attr->max_rq_depth;
+
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err) {
+			ibdev_dbg(ibdev,
+				  "Failed to copy udata for query_device\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int efa_query_port(struct ib_device *ibdev, u8 port,
+		   struct ib_port_attr *props)
+{
+	struct efa_dev *dev = to_edev(ibdev);
+
+	props->lmc = 1;
+
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 5;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->active_speed = IB_SPEED_EDR;
+	props->active_width = IB_WIDTH_4X;
+	props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
+	props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
+	props->max_msg_sz = dev->mtu;
+	props->max_vl_num = 1;
+
+	return 0;
+}
+
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask,
+		 struct ib_qp_init_attr *qp_init_attr)
+{
+	struct efa_dev *dev = to_edev(ibqp->device);
+	struct efa_com_query_qp_params params = {};
+	struct efa_com_query_qp_result result;
+	struct efa_qp *qp = to_eqp(ibqp);
+	int err;
+
+#define EFA_QUERY_QP_SUPP_MASK \
+	(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
+	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+
+	if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
+		ibdev_dbg(&dev->ibdev,
+			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+			  qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
+		return -EOPNOTSUPP;
+	}
+
+	memset(qp_attr, 0, sizeof(*qp_attr));
+	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+	params.qp_handle = qp->qp_handle;
+	err = efa_com_query_qp(&dev->edev, &params, &result);
+	if (err)
+		return err;
+
+	qp_attr->qp_state = result.qp_state;
+	qp_attr->qkey = result.qkey;
+	qp_attr->sq_psn = result.sq_psn;
+	qp_attr->sq_draining = result.sq_draining;
+	qp_attr->port_num = 1;
+
+	qp_attr->cap.max_send_wr = qp->max_send_wr;
+	qp_attr->cap.max_recv_wr = qp->max_recv_wr;
+	qp_attr->cap.max_send_sge = qp->max_send_sge;
+	qp_attr->cap.max_recv_sge = qp->max_recv_sge;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+	qp_init_attr->qp_type = ibqp->qp_type;
+	qp_init_attr->recv_cq = ibqp->recv_cq;
+	qp_init_attr->send_cq = ibqp->send_cq;
+	qp_init_attr->qp_context = ibqp->qp_context;
+	qp_init_attr->cap = qp_attr->cap;
+
+	return 0;
+}
+
+int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+		  union ib_gid *gid)
+{
+	struct efa_dev *dev = to_edev(ibdev);
+
+	memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+
+	return 0;
+}
+
+int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+		   u16 *pkey)
+{
+	if (index > 0)
+		return -EINVAL;
+
+	*pkey = 0xffff;
+	return 0;
+}
+
+static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
+{
+	struct efa_com_dealloc_pd_params params = {
+		.pdn = pdn,
+	};
+
+	return efa_com_dealloc_pd(&dev->edev, &params);
+}
+
+#ifdef HAVE_ALLOC_PD_NO_UCONTEXT
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+#else
+int efa_alloc_pd(struct ib_pd *ibpd,
+		 struct ib_ucontext *ibucontext,
+		 struct ib_udata *udata)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibpd->device);
+	struct efa_ibv_alloc_pd_resp resp = {};
+	struct efa_com_alloc_pd_result result;
+	struct efa_pd *pd = to_epd(ibpd);
+	int err;
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(&dev->ibdev, "udata is NULL\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+	if (udata->inlen &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, 0, udata->inlen - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, udata not cleared\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	err = efa_com_alloc_pd(&dev->edev, &result);
+	if (err)
+		goto err_out;
+
+	pd->pdn = result.pdn;
+	resp.pdn = result.pdn;
+
+	if (udata->outlen) {
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err) {
+			ibdev_dbg(&dev->ibdev,
+				  "Failed to copy udata for alloc_pd\n");
+			goto err_dealloc_pd;
+		}
+	}
+
+	ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
+
+	return 0;
+
+err_dealloc_pd:
+	efa_pd_dealloc(dev, result.pdn);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+	return err;
+}
+
+#ifndef HAVE_PD_CORE_ALLOCATION
+struct ib_pd *efa_kzalloc_pd(struct ib_device *ibdev,
+			     struct ib_ucontext *ibucontext,
+			     struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibdev);
+	struct efa_pd *pd;
+	int err;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pd->ibpd.device = ibdev;
+
+#ifdef HAVE_ALLOC_PD_NO_UCONTEXT
+	err = efa_alloc_pd(&pd->ibpd, udata);
+#else
+	err = efa_alloc_pd(&pd->ibpd, ibucontext, udata);
+#endif
+	if (err)
+		goto err_free;
+
+	return &pd->ibpd;
+
+err_free:
+	kfree(pd);
+	return ERR_PTR(err);
+}
+#endif
+
+#ifdef HAVE_DEALLOC_PD_UDATA
+void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+#elif defined(HAVE_PD_CORE_ALLOCATION)
+void efa_dealloc_pd(struct ib_pd *ibpd)
+#else
+int efa_dealloc_pd(struct ib_pd *ibpd)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibpd->device);
+	struct efa_pd *pd = to_epd(ibpd);
+
+	ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
+	efa_pd_dealloc(dev, pd->pdn);
+#ifndef HAVE_PD_CORE_ALLOCATION
+	kfree(pd);
+
+	return 0;
+#endif
+}
+
+static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
+{
+	struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
+
+	return efa_com_destroy_qp(&dev->edev, &params);
+}
+
+#ifdef HAVE_DESTROY_QP_UDATA
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+#else
+int efa_destroy_qp(struct ib_qp *ibqp)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibqp->pd->device);
+	struct efa_qp *qp = to_eqp(ibqp);
+	int err;
+
+	ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
+	err = efa_destroy_qp_handle(dev, qp->qp_handle);
+	if (err)
+		return err;
+
+	if (qp->rq_cpu_addr) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
+			  qp->rq_cpu_addr, qp->rq_size,
+			  &qp->rq_dma_addr);
+		dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
+				 DMA_TO_DEVICE);
+	}
+
+	kfree(qp);
+	return 0;
+}
+
+static int qp_mmap_entries_setup(struct efa_qp *qp,
+				 struct efa_dev *dev,
+				 struct efa_ucontext *ucontext,
+				 struct efa_com_create_qp_params *params,
+				 struct efa_ibv_create_qp_resp *resp)
+{
+	/*
+	 * Once an entry is inserted it might be mmapped, hence cannot be
+	 * cleaned up until dealloc_ucontext.
+	 */
+	resp->sq_db_mmap_key =
+		mmap_entry_insert(dev, ucontext, qp,
+				  dev->db_bar_addr + resp->sq_db_offset,
+				  PAGE_SIZE, EFA_MMAP_IO_NC);
+	if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+		return -ENOMEM;
+
+	resp->sq_db_offset &= ~PAGE_MASK;
+
+	resp->llq_desc_mmap_key =
+		mmap_entry_insert(dev, ucontext, qp,
+				  dev->mem_bar_addr + resp->llq_desc_offset,
+				  PAGE_ALIGN(params->sq_ring_size_in_bytes +
+					     (resp->llq_desc_offset & ~PAGE_MASK)),
+				  EFA_MMAP_IO_WC);
+	if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
+		return -ENOMEM;
+
+	resp->llq_desc_offset &= ~PAGE_MASK;
+
+	if (qp->rq_size) {
+		resp->rq_db_mmap_key =
+			mmap_entry_insert(dev, ucontext, qp,
+					  dev->db_bar_addr + resp->rq_db_offset,
+					  PAGE_SIZE, EFA_MMAP_IO_NC);
+		if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
+			return -ENOMEM;
+
+		resp->rq_db_offset &= ~PAGE_MASK;
+
+		resp->rq_mmap_key =
+			mmap_entry_insert(dev, ucontext, qp,
+					  virt_to_phys(qp->rq_cpu_addr),
+					  qp->rq_size, EFA_MMAP_DMA_PAGE);
+		if (resp->rq_mmap_key == EFA_MMAP_INVALID)
+			return -ENOMEM;
+
+		resp->rq_mmap_size = qp->rq_size;
+	}
+
+	return 0;
+}
+
+static int efa_qp_validate_cap(struct efa_dev *dev,
+			       struct ib_qp_init_attr *init_attr)
+{
+	if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp: requested send wr[%u] exceeds the max[%u]\n",
+			  init_attr->cap.max_send_wr,
+			  dev->dev_attr.max_sq_depth);
+		return -EINVAL;
+	}
+	if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp: requested receive wr[%u] exceeds the max[%u]\n",
+			  init_attr->cap.max_recv_wr,
+			  dev->dev_attr.max_rq_depth);
+		return -EINVAL;
+	}
+	if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp: requested sge send[%u] exceeds the max[%u]\n",
+			  init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
+		return -EINVAL;
+	}
+	if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp: requested sge recv[%u] exceeds the max[%u]\n",
+			  init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
+		return -EINVAL;
+	}
+	if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
+		ibdev_dbg(&dev->ibdev,
+			  "qp: requested inline data[%u] exceeds the max[%u]\n",
+			  init_attr->cap.max_inline_data,
+			  dev->dev_attr.inline_buf_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int efa_qp_validate_attr(struct efa_dev *dev,
+				struct ib_qp_init_attr *init_attr)
+{
+	if (init_attr->qp_type != IB_QPT_DRIVER &&
+	    init_attr->qp_type != IB_QPT_UD) {
+		ibdev_dbg(&dev->ibdev,
+			  "Unsupported qp type %d\n", init_attr->qp_type);
+		return -EOPNOTSUPP;
+	}
+
+	if (init_attr->srq) {
+		ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (init_attr->create_flags) {
+		ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata)
+{
+	struct efa_com_create_qp_params create_qp_params = {};
+	struct efa_com_create_qp_result create_qp_resp;
+	struct efa_dev *dev = to_edev(ibpd->device);
+	struct efa_ibv_create_qp_resp resp = {};
+	struct efa_ibv_create_qp cmd = {};
+	bool rq_entry_inserted = false;
+	struct efa_ucontext *ucontext;
+	struct efa_qp *qp;
+	int err;
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(&dev->ibdev, "udata is NULL\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+#ifdef HAVE_UDATA_TO_DRV_CONTEXT
+	ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
+					     ibucontext);
+#else
+	ucontext = ibpd->uobject ? to_eucontext(ibpd->uobject->context) :
+				   NULL;
+#endif
+
+	err = efa_qp_validate_cap(dev, init_attr);
+	if (err)
+		goto err_out;
+
+	err = efa_qp_validate_attr(dev, init_attr);
+	if (err)
+		goto err_out;
+
+	if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, no input udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (udata->inlen > sizeof(cmd) &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, sizeof(cmd),
+				 udata->inlen - sizeof(cmd))) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, sizeof(cmd),
+				 udata->inlen - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, unknown fields in udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	err = ib_copy_from_udata(&cmd, udata,
+				 min(sizeof(cmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(&dev->ibdev,
+			  "Cannot copy udata for create_qp\n");
+		goto err_out;
+	}
+
+	if (cmd.comp_mask) {
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, unknown fields in udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	create_qp_params.uarn = ucontext->uarn;
+	create_qp_params.pd = to_epd(ibpd)->pdn;
+
+	if (init_attr->qp_type == IB_QPT_UD) {
+		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
+	} else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
+		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
+	} else {
+		ibdev_dbg(&dev->ibdev,
+			  "Unsupported qp type %d driver qp type %d\n",
+			  init_attr->qp_type, cmd.driver_qp_type);
+		err = -EOPNOTSUPP;
+		goto err_free_qp;
+	}
+
+	ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
+		  init_attr->qp_type, cmd.driver_qp_type);
+	create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
+	create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
+	create_qp_params.sq_depth = init_attr->cap.max_send_wr;
+	create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
+
+	create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
+	create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
+	qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
+	if (qp->rq_size) {
+		qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
+						    qp->rq_size, DMA_TO_DEVICE);
+		if (!qp->rq_cpu_addr) {
+			err = -ENOMEM;
+			goto err_free_qp;
+		}
+
+		ibdev_dbg(&dev->ibdev,
+			  "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
+			  qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
+		create_qp_params.rq_base_addr = qp->rq_dma_addr;
+	}
+
+	err = efa_com_create_qp(&dev->edev, &create_qp_params,
+				&create_qp_resp);
+	if (err)
+		goto err_free_mapped;
+
+	resp.sq_db_offset = create_qp_resp.sq_db_offset;
+	resp.rq_db_offset = create_qp_resp.rq_db_offset;
+	resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
+	resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
+	resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
+
+	err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
+				    &resp);
+	if (err)
+		goto err_destroy_qp;
+
+	rq_entry_inserted = true;
+	qp->qp_handle = create_qp_resp.qp_handle;
+	qp->ibqp.qp_num = create_qp_resp.qp_num;
+	qp->ibqp.qp_type = init_attr->qp_type;
+	qp->max_send_wr = init_attr->cap.max_send_wr;
+	qp->max_recv_wr = init_attr->cap.max_recv_wr;
+	qp->max_send_sge = init_attr->cap.max_send_sge;
+	qp->max_recv_sge = init_attr->cap.max_recv_sge;
+	qp->max_inline_data = init_attr->cap.max_inline_data;
+
+	if (udata->outlen) {
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err) {
+			ibdev_dbg(&dev->ibdev,
+				  "Failed to copy udata for qp[%u]\n",
+				  create_qp_resp.qp_num);
+			goto err_destroy_qp;
+		}
+	}
+
+	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
+
+	return &qp->ibqp;
+
+err_destroy_qp:
+	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
+err_free_mapped:
+	if (qp->rq_size) {
+		dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
+				 DMA_TO_DEVICE);
+		if (!rq_entry_inserted)
+			free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
+	}
+err_free_qp:
+	kfree(qp);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.create_qp_err);
+	return ERR_PTR(err);
+}
+
+static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
+				  struct ib_qp_attr *qp_attr, int qp_attr_mask,
+				  enum ib_qp_state cur_state,
+				  enum ib_qp_state new_state)
+{
+#define EFA_MODIFY_QP_SUPP_MASK \
+	(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
+	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+
+	if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
+		ibdev_dbg(&dev->ibdev,
+			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+			  qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
+		return -EOPNOTSUPP;
+	}
+
+	if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+				qp_attr_mask)) {
+		ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
+		return -EINVAL;
+	}
+
+	if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
+		ibdev_dbg(&dev->ibdev, "Can't change port num\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
+		ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibqp->device);
+	struct efa_com_modify_qp_params params = {};
+	struct efa_qp *qp = to_eqp(ibqp);
+	enum ib_qp_state cur_state;
+	enum ib_qp_state new_state;
+	int err;
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(&dev->ibdev, "udata is NULL\n");
+		return -EOPNOTSUPP;
+	}
+#endif
+
+	if (udata->inlen &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, 0, udata->inlen - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, udata not cleared\n");
+		return -EINVAL;
+	}
+
+	cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
+						     qp->state;
+	new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
+
+	err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
+				     new_state);
+	if (err)
+		return err;
+
+	params.qp_handle = qp->qp_handle;
+
+	if (qp_attr_mask & IB_QP_STATE) {
+		params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
+				      BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
+		params.cur_qp_state = qp_attr->cur_qp_state;
+		params.qp_state = qp_attr->qp_state;
+	}
+
+	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+		params.modify_mask |=
+			BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+		params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
+	}
+
+	if (qp_attr_mask & IB_QP_QKEY) {
+		params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+		params.qkey = qp_attr->qkey;
+	}
+
+	if (qp_attr_mask & IB_QP_SQ_PSN) {
+		params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+		params.sq_psn = qp_attr->sq_psn;
+	}
+
+	err = efa_com_modify_qp(&dev->edev, &params);
+	if (err)
+		return err;
+
+	qp->state = new_state;
+
+	return 0;
+}
+
+static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
+{
+	struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
+
+	return efa_com_destroy_cq(&dev->edev, &params);
+}
+
+#ifdef HAVE_IB_VOID_DESTROY_CQ
+void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibcq->device);
+	struct efa_cq *cq = to_ecq(ibcq);
+
+	ibdev_dbg(&dev->ibdev,
+		  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
+		  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
+
+	efa_destroy_cq_idx(dev, cq->cq_idx);
+	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
+			 DMA_FROM_DEVICE);
+#ifndef HAVE_CQ_CORE_ALLOCATION
+	kfree(cq);
+#endif
+}
+#else
+#ifdef HAVE_DESTROY_CQ_UDATA
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+#else
+int efa_destroy_cq(struct ib_cq *ibcq)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibcq->device);
+	struct efa_cq *cq = to_ecq(ibcq);
+	int err;
+
+	ibdev_dbg(&dev->ibdev,
+		  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
+		  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
+
+	err = efa_destroy_cq_idx(dev, cq->cq_idx);
+	if (err)
+		return err;
+
+	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
+			 DMA_FROM_DEVICE);
+
+	kfree(cq);
+	return 0;
+}
+#endif
+
+static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
+				 struct efa_ibv_create_cq_resp *resp)
+{
+	resp->q_mmap_size = cq->size;
+	resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
+					     virt_to_phys(cq->cpu_addr),
+					     cq->size, EFA_MMAP_DMA_PAGE);
+	if (resp->q_mmap_key == EFA_MMAP_INVALID)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		  struct ib_udata *udata)
+{
+#ifdef HAVE_CREATE_CQ_NO_UCONTEXT
+	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct efa_ucontext, ibucontext);
+#else
+	struct efa_ucontext *ucontext = to_ecq(ibcq)->ucontext;
+#endif
+	struct efa_ibv_create_cq_resp resp = {};
+	struct efa_com_create_cq_params params;
+	struct efa_com_create_cq_result result;
+	struct ib_device *ibdev = ibcq->device;
+	struct efa_dev *dev = to_edev(ibdev);
+	struct efa_ibv_create_cq cmd = {};
+	struct efa_cq *cq = to_ecq(ibcq);
+	bool cq_entry_inserted = false;
+	int entries = attr->cqe;
+	int err;
+
+	ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
+
+	if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
+		ibdev_dbg(ibdev,
+			  "cq: requested entries[%u] non-positive or greater than max[%u]\n",
+			  entries, dev->dev_attr.max_cq_depth);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(ibdev, "udata is NULL\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+	if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
+		ibdev_dbg(ibdev,
+			  "Incompatible ABI params, no input udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (udata->inlen > sizeof(cmd) &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, sizeof(cmd),
+				 udata->inlen - sizeof(cmd))) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, sizeof(cmd),
+				 udata->inlen - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(ibdev,
+			  "Incompatible ABI params, unknown fields in udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	err = ib_copy_from_udata(&cmd, udata,
+				 min(sizeof(cmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
+		goto err_out;
+	}
+
+	if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
+		ibdev_dbg(ibdev,
+			  "Incompatible ABI params, unknown fields in udata\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (!cmd.cq_entry_size) {
+		ibdev_dbg(ibdev,
+			  "Invalid entry size [%u]\n", cmd.cq_entry_size);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
+		ibdev_dbg(ibdev,
+			  "Invalid number of sub cqs[%u] expected[%u]\n",
+			  cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	cq->ucontext = ucontext;
+	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
+	cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+					 DMA_FROM_DEVICE);
+	if (!cq->cpu_addr) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	params.uarn = cq->ucontext->uarn;
+	params.cq_depth = entries;
+	params.dma_addr = cq->dma_addr;
+	params.entry_size_in_bytes = cmd.cq_entry_size;
+	params.num_sub_cqs = cmd.num_sub_cqs;
+	err = efa_com_create_cq(&dev->edev, &params, &result);
+	if (err)
+		goto err_free_mapped;
+
+	resp.cq_idx = result.cq_idx;
+	cq->cq_idx = result.cq_idx;
+	cq->ibcq.cqe = result.actual_depth;
+	WARN_ON_ONCE(entries != result.actual_depth);
+
+	err = cq_mmap_entries_setup(dev, cq, &resp);
+	if (err) {
+		ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
+			  cq->cq_idx);
+		goto err_destroy_cq;
+	}
+
+	cq_entry_inserted = true;
+
+	if (udata->outlen) {
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err) {
+			ibdev_dbg(ibdev,
+				  "Failed to copy udata for create_cq\n");
+			goto err_destroy_cq;
+		}
+	}
+
+	ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
+		  cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
+
+	return 0;
+
+err_destroy_cq:
+	efa_destroy_cq_idx(dev, cq->cq_idx);
+err_free_mapped:
+	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
+			 DMA_FROM_DEVICE);
+	if (!cq_entry_inserted)
+		free_pages_exact(cq->cpu_addr, cq->size);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+	return err;
+}
+
+#ifndef HAVE_CQ_CORE_ALLOCATION
+#ifdef HAVE_CREATE_CQ_NO_UCONTEXT
+struct ib_cq *efa_kzalloc_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_udata *udata)
+#else
+struct ib_cq *efa_kzalloc_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_ucontext *ibucontext,
+			     struct ib_udata *udata)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibdev);
+	struct efa_cq *cq;
+	int err;
+
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+		return ERR_PTR(-ENOMEM);
+	}
+
+#ifdef HAVE_CREATE_CQ_NO_UCONTEXT
+	cq->ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
+						 ibucontext);
+#else
+	cq->ucontext = to_eucontext(ibucontext);
+#endif
+
+	cq->ibcq.device = ibdev;
+	err = efa_create_cq(&cq->ibcq, attr, udata);
+	if (err)
+		goto err_free_cq;
+
+	return &cq->ibcq;
+
+err_free_cq:
+	kfree(cq);
+	return ERR_PTR(err);
+}
+#endif
+
+#ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+static int umem_to_page_list(struct efa_dev *dev,
+			     struct ib_umem *umem,
+			     u64 *page_list,
+			     u32 hp_cnt,
+			     u8 hp_shift)
+{
+	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
+	struct ib_block_iter biter;
+	unsigned int hp_idx = 0;
+
+	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
+		  hp_cnt, pages_in_hp);
+
+	rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
+			    BIT(hp_shift))
+		page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
+
+	return 0;
+}
+#elif defined(HAVE_SG_DMA_PAGE_ITER)
+static int umem_to_page_list(struct efa_dev *dev,
+			     struct ib_umem *umem,
+			     u64 *page_list,
+			     u32 hp_cnt,
+			     u8 hp_shift)
+{
+	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
+	struct sg_dma_page_iter sg_iter;
+	unsigned int page_idx = 0;
+	unsigned int hp_idx = 0;
+
+	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
+		  hp_cnt, pages_in_hp);
+
+	for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+		if (page_idx % pages_in_hp == 0) {
+			page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter);
+			hp_idx++;
+		}
+
+		page_idx++;
+	}
+
+	return 0;
+}
+#else
+static int umem_to_page_list(struct efa_dev *dev,
+			     struct ib_umem *umem,
+			     u64 *page_list,
+			     u32 hp_cnt,
+			     u8 hp_shift)
+{
+	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
+	unsigned int page_idx = 0;
+	unsigned int hp_idx = 0;
+	struct scatterlist *sg;
+	unsigned int entry;
+
+	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
+		  hp_cnt, pages_in_hp);
+
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		if (sg_dma_len(sg) != PAGE_SIZE) {
+			ibdev_dbg(&dev->ibdev,
+				  "sg_dma_len[%u] != PAGE_SIZE[%lu]\n",
+				  sg_dma_len(sg), PAGE_SIZE);
+			return -EINVAL;
+		}
+
+		if (page_idx % pages_in_hp == 0) {
+			page_list[hp_idx] = sg_dma_address(sg);
+			hp_idx++;
+		}
+		page_idx++;
+	}
+
+	return 0;
+}
+#endif
+
+static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
+{
+	struct scatterlist *sglist;
+	struct page *pg;
+	int i;
+
+	sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+	if (!sglist)
+		return NULL;
+	sg_init_table(sglist, page_cnt);
+	for (i = 0; i < page_cnt; i++) {
+		pg = vmalloc_to_page(buf);
+		if (!pg)
+			goto err;
+		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
+		buf += PAGE_SIZE / sizeof(*buf);
+	}
+	return sglist;
+
+err:
+	kfree(sglist);
+	return NULL;
+}
+
+/*
+ * create a chunk list of physical pages dma addresses from the supplied
+ * scatter gather list
+ */
+static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
+{
+	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+	int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
+	struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
+	unsigned int chunk_list_size, chunk_idx, payload_idx;
+	int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
+	struct efa_com_ctrl_buff_info *ctrl_buf;
+	u64 *cur_chunk_buf, *prev_chunk_buf;
+#ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+	struct ib_block_iter biter;
+#else
+	struct scatterlist *sg;
+	unsigned int entry, payloads_in_sg;
+#endif
+	dma_addr_t dma_addr;
+	int i;
+
+	/* allocate a chunk list that consists of 4KB chunks */
+	chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
+
+	chunk_list->size = chunk_list_size;
+	chunk_list->chunks = kcalloc(chunk_list_size,
+				     sizeof(*chunk_list->chunks),
+				     GFP_KERNEL);
+	if (!chunk_list->chunks)
+		return -ENOMEM;
+
+	ibdev_dbg(&dev->ibdev,
+		  "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
+		  page_cnt);
+
+	/* allocate chunk buffers: */
+	for (i = 0; i < chunk_list_size; i++) {
+		chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
+		if (!chunk_list->chunks[i].buf)
+			goto chunk_list_dealloc;
+
+		chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
+	}
+	chunk_list->chunks[chunk_list_size - 1].length =
+		((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
+			EFA_CHUNK_PTR_SIZE;
+
+	/* fill the dma addresses of sg list pages to chunks: */
+	chunk_idx = 0;
+	payload_idx = 0;
+	cur_chunk_buf = chunk_list->chunks[0].buf;
+#ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+	rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
+			    EFA_CHUNK_PAYLOAD_SIZE) {
+		cur_chunk_buf[payload_idx++] =
+			rdma_block_iter_dma_address(&biter);
+
+		if (payload_idx == EFA_PTRS_PER_CHUNK) {
+			chunk_idx++;
+			cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+			payload_idx = 0;
+		}
+	}
+#else
+	for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) {
+		payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT;
+		for (i = 0; i < payloads_in_sg; i++) {
+			cur_chunk_buf[payload_idx++] =
+				(sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) +
+				(EFA_CHUNK_PAYLOAD_SIZE * i);
+
+			if (payload_idx == EFA_PTRS_PER_CHUNK) {
+				chunk_idx++;
+				cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+				payload_idx = 0;
+			}
+		}
+	}
+#endif
+
+	/* map chunks to dma and fill chunks next ptrs */
+	for (i = chunk_list_size - 1; i >= 0; i--) {
+		dma_addr = dma_map_single(&dev->pdev->dev,
+					  chunk_list->chunks[i].buf,
+					  chunk_list->chunks[i].length,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+			ibdev_err(&dev->ibdev,
+				  "chunk[%u] dma_map_failed\n", i);
+			goto chunk_list_unmap;
+		}
+
+		chunk_list->chunks[i].dma_addr = dma_addr;
+		ibdev_dbg(&dev->ibdev,
+			  "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
+
+		if (!i)
+			break;
+
+		prev_chunk_buf = chunk_list->chunks[i - 1].buf;
+
+		ctrl_buf = (struct efa_com_ctrl_buff_info *)
+				&prev_chunk_buf[EFA_PTRS_PER_CHUNK];
+		ctrl_buf->length = chunk_list->chunks[i].length;
+
+		efa_com_set_dma_addr(dma_addr,
+				     &ctrl_buf->address.mem_addr_high,
+				     &ctrl_buf->address.mem_addr_low);
+	}
+
+	return 0;
+
+chunk_list_unmap:
+	for (; i < chunk_list_size; i++) {
+		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
+	}
+chunk_list_dealloc:
+	for (i = 0; i < chunk_list_size; i++)
+		kfree(chunk_list->chunks[i].buf);
+
+	kfree(chunk_list->chunks);
+	return -ENOMEM;
+}
+
+static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+	int i;
+
+	for (i = 0; i < chunk_list->size; i++) {
+		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
+		kfree(chunk_list->chunks[i].buf);
+	}
+
+	kfree(chunk_list->chunks);
+}
+
+/* initialize pbl continuous mode: map pbl buffer to a dma address. */
+static int pbl_continuous_initialize(struct efa_dev *dev,
+				     struct pbl_context *pbl)
+{
+	dma_addr_t dma_addr;
+
+	dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
+				  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+	if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+		ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
+		return -ENOMEM;
+	}
+
+	pbl->phys.continuous.dma_addr = dma_addr;
+	ibdev_dbg(&dev->ibdev,
+		  "pbl continuous - dma_addr = %pad, size[%u]\n",
+		  &dma_addr, pbl->pbl_buf_size_in_bytes);
+
+	return 0;
+}
+
+/*
+ * initialize pbl indirect mode:
+ * create a chunk list out of the dma addresses of the physical pages of
+ * pbl buffer.
+ */
+static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
+{
+	u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+	struct scatterlist *sgl;
+	int sg_dma_cnt, err;
+
+	BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
+	sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
+	if (!sgl)
+		return -ENOMEM;
+
+	sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+	if (!sg_dma_cnt) {
+		err = -EINVAL;
+		goto err_map;
+	}
+
+	pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
+	pbl->phys.indirect.sgl = sgl;
+	pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
+	err = pbl_chunk_list_create(dev, pbl);
+	if (err) {
+		ibdev_dbg(&dev->ibdev,
+			  "chunk_list creation failed[%d]\n", err);
+		goto err_chunk;
+	}
+
+	ibdev_dbg(&dev->ibdev,
+		  "pbl indirect - size[%u], chunks[%u]\n",
+		  pbl->pbl_buf_size_in_bytes,
+		  pbl->phys.indirect.chunk_list.size);
+
+	return 0;
+
+err_chunk:
+	dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+err_map:
+	kfree(sgl);
+	return err;
+}
+
+static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
+{
+	pbl_chunk_list_destroy(dev, pbl);
+	dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
+		     pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
+	kfree(pbl->phys.indirect.sgl);
+}
+
+/* create a page buffer list from a mapped user memory region */
+static int pbl_create(struct efa_dev *dev,
+		      struct pbl_context *pbl,
+		      struct ib_umem *umem,
+		      int hp_cnt,
+		      u8 hp_shift)
+{
+	int err;
+
+	pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
+	pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
+	if (!pbl->pbl_buf)
+		return -ENOMEM;
+
+	if (is_vmalloc_addr(pbl->pbl_buf)) {
+		pbl->physically_continuous = 0;
+		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+					hp_shift);
+		if (err)
+			goto err_free;
+
+		err = pbl_indirect_initialize(dev, pbl);
+		if (err)
+			goto err_free;
+	} else {
+		pbl->physically_continuous = 1;
+		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+					hp_shift);
+		if (err)
+			goto err_free;
+
+		err = pbl_continuous_initialize(dev, pbl);
+		if (err)
+			goto err_free;
+	}
+
+	ibdev_dbg(&dev->ibdev,
+		  "user_pbl_created: user_pages[%u], continuous[%u]\n",
+		  hp_cnt, pbl->physically_continuous);
+
+	return 0;
+
+err_free:
+	kvfree(pbl->pbl_buf);
+	return err;
+}
+
+static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+	if (pbl->physically_continuous)
+		dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
+				 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+	else
+		pbl_indirect_terminate(dev, pbl);
+
+	kvfree(pbl->pbl_buf);
+}
+
+static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
+				 struct efa_com_reg_mr_params *params)
+{
+	int err;
+
+	params->inline_pbl = 1;
+	err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
+				params->page_num, params->page_shift);
+	if (err)
+		return err;
+
+	ibdev_dbg(&dev->ibdev,
+		  "inline_pbl_array - pages[%u]\n", params->page_num);
+
+	return 0;
+}
+
+static int efa_create_pbl(struct efa_dev *dev,
+			  struct pbl_context *pbl,
+			  struct efa_mr *mr,
+			  struct efa_com_reg_mr_params *params)
+{
+	int err;
+
+	err = pbl_create(dev, pbl, mr->umem, params->page_num,
+			 params->page_shift);
+	if (err) {
+		ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
+		return err;
+	}
+
+	params->inline_pbl = 0;
+	params->indirect = !pbl->physically_continuous;
+	if (pbl->physically_continuous) {
+		params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
+
+		efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
+				     &params->pbl.pbl.address.mem_addr_high,
+				     &params->pbl.pbl.address.mem_addr_low);
+	} else {
+		params->pbl.pbl.length =
+			pbl->phys.indirect.chunk_list.chunks[0].length;
+
+		efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
+				     &params->pbl.pbl.address.mem_addr_high,
+				     &params->pbl.pbl.address.mem_addr_low);
+	}
+
+	return 0;
+}
+
+#ifndef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+static unsigned long efa_cont_pages(struct ib_umem *umem,
+				    unsigned long page_size_cap,
+				    u64 addr)
+{
+	unsigned long max_page_shift = fls64(page_size_cap);
+	struct scatterlist *sg;
+	u64 base = ~0, p = 0;
+	unsigned long tmp;
+	unsigned long m;
+	u64 len, pfn;
+	int i = 0;
+	int entry;
+
+	addr = addr >> PAGE_SHIFT;
+	tmp = (unsigned long)addr;
+	m = find_first_bit(&tmp, BITS_PER_LONG);
+	m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
+
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE);
+		pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+		if (base + p != pfn) {
+			/*
+			 * If either the offset or the new
+			 * base are unaligned update m
+			 */
+			tmp = (unsigned long)(pfn | p);
+			if (!IS_ALIGNED(tmp, 1 << m))
+				m = find_first_bit(&tmp, BITS_PER_LONG);
+
+			base = pfn;
+			p = 0;
+		}
+
+		p += len;
+		i += len;
+	}
+
+	if (i)
+		m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
+	else
+		m = 0;
+
+	return BIT(PAGE_SHIFT + m);
+}
+#endif
+
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+			 u64 virt_addr, int access_flags,
+			 struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibpd->device);
+	struct efa_com_reg_mr_params params = {};
+	struct efa_com_reg_mr_result result = {};
+	struct pbl_context pbl;
+	unsigned int pg_sz;
+	struct efa_mr *mr;
+	int inline_size;
+	int err;
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(&dev->ibdev, "udata is NULL\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+	if (udata->inlen &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen) - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(&dev->ibdev,
+			  "Incompatible ABI params, udata not cleared\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+		ibdev_dbg(&dev->ibdev,
+			  "Unsupported access flags[%#x], supported[%#x]\n",
+			  access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+#ifdef HAVE_IB_UMEM_GET_UDATA
+	mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+#else
+	mr->umem = ib_umem_get(ibpd->uobject->context, start, length,
+			       access_flags, 0);
+#endif
+	if (IS_ERR(mr->umem)) {
+		err = PTR_ERR(mr->umem);
+		ibdev_dbg(&dev->ibdev,
+			  "Failed to pin and map user space memory[%d]\n", err);
+		goto err_free;
+	}
+
+	params.pd = to_epd(ibpd)->pdn;
+	params.iova = virt_addr;
+	params.mr_length_in_bytes = length;
+	params.permissions = access_flags & 0x1;
+
+#ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+	pg_sz = ib_umem_find_best_pgsz(mr->umem,
+				       dev->dev_attr.page_size_cap,
+				       virt_addr);
+	if (!pg_sz) {
+		err = -EOPNOTSUPP;
+		ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
+			  dev->dev_attr.page_size_cap);
+		goto err_unmap;
+	}
+#else
+	pg_sz = efa_cont_pages(mr->umem, dev->dev_attr.page_size_cap,
+			       virt_addr);
+#endif
+
+	params.page_shift = __ffs(pg_sz);
+	params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
+				       pg_sz);
+
+	ibdev_dbg(&dev->ibdev,
+		  "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
+		  start, length, params.page_shift, params.page_num);
+
+	inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
+	if (params.page_num <= inline_size) {
+		err = efa_create_inline_pbl(dev, mr, &params);
+		if (err)
+			goto err_unmap;
+
+		err = efa_com_register_mr(&dev->edev, &params, &result);
+		if (err)
+			goto err_unmap;
+	} else {
+		err = efa_create_pbl(dev, &pbl, mr, &params);
+		if (err)
+			goto err_unmap;
+
+		err = efa_com_register_mr(&dev->edev, &params, &result);
+		pbl_destroy(dev, &pbl);
+
+		if (err)
+			goto err_unmap;
+	}
+
+	mr->ibmr.lkey = result.l_key;
+	mr->ibmr.rkey = result.r_key;
+	mr->ibmr.length = length;
+	ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
+
+	return &mr->ibmr;
+
+err_unmap:
+	ib_umem_release(mr->umem);
+err_free:
+	kfree(mr);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+	return ERR_PTR(err);
+}
+
+#ifdef HAVE_DEREG_MR_UDATA
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+#else
+int efa_dereg_mr(struct ib_mr *ibmr)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibmr->device);
+	struct efa_com_dereg_mr_params params;
+	struct efa_mr *mr = to_emr(ibmr);
+	int err;
+
+	ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
+
+	params.l_key = mr->ibmr.lkey;
+	err = efa_com_dereg_mr(&dev->edev, &params);
+	if (err)
+		return err;
+
+	ib_umem_release(mr->umem);
+	kfree(mr);
+
+	return 0;
+}
+
+int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+			   struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	err = ib_query_port(ibdev, port_num, &attr);
+	if (err) {
+		ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
+		return err;
+	}
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+
+	return 0;
+}
+
+static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
+{
+	struct efa_com_dealloc_uar_params params = {
+		.uarn = uarn,
+	};
+
+	return efa_com_dealloc_uar(&dev->edev, &params);
+}
+
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
+{
+	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+	struct efa_dev *dev = to_edev(ibucontext->device);
+	struct efa_ibv_alloc_ucontext_resp resp = {};
+	struct efa_com_alloc_uar_result result;
+	int err;
+
+	/*
+	 * it's fine if the driver does not know all request fields,
+	 * we will ack input fields in our response.
+	 */
+
+	err = efa_com_alloc_uar(&dev->edev, &result);
+	if (err)
+		goto err_out;
+
+	ucontext->uarn = result.uarn;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+	xa_init(&ucontext->mmap_xa);
+#else
+	mutex_init(&ucontext->lock);
+	INIT_LIST_HEAD(&ucontext->pending_mmaps);
+#endif
+
+	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
+	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
+	resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
+	resp.inline_buf_size = dev->dev_attr.inline_buf_size;
+	resp.max_llq_size = dev->dev_attr.max_llq_size;
+
+	if (udata && udata->outlen) {
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err)
+			goto err_dealloc_uar;
+	}
+
+	return 0;
+
+err_dealloc_uar:
+	efa_dealloc_uar(dev, result.uarn);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+	return err;
+}
+
+#ifndef HAVE_UCONTEXT_CORE_ALLOCATION
+struct ib_ucontext *efa_kzalloc_ucontext(struct ib_device *ibdev,
+					 struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibdev);
+	struct efa_ucontext *ucontext;
+	int err;
+
+	/*
+	 * it's fine if the driver does not know all request fields,
+	 * we will ack input fields in our response.
+	 */
+
+	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
+	if (!ucontext) {
+		atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ucontext->ibucontext.device = ibdev;
+	err = efa_alloc_ucontext(&ucontext->ibucontext, udata);
+	if (err)
+		goto err_free_ucontext;
+
+	return &ucontext->ibucontext;
+
+err_free_ucontext:
+	kfree(ucontext);
+	return ERR_PTR(err);
+}
+#endif
+
+#ifdef HAVE_UCONTEXT_CORE_ALLOCATION
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
+#else
+int efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
+#endif
+{
+	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+	struct efa_dev *dev = to_edev(ibucontext->device);
+
+	mmap_entries_remove_free(dev, ucontext);
+	efa_dealloc_uar(dev, ucontext->uarn);
+#ifndef HAVE_UCONTEXT_CORE_ALLOCATION
+	kfree(ucontext);
+
+	return 0;
+#endif
+}
+
+static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
+		      struct vm_area_struct *vma, u64 key, u64 length)
+{
+	struct efa_mmap_entry *entry;
+	unsigned long va;
+	int err = 0;
+	u64 pfn;
+
+	entry = mmap_entry_get(dev, ucontext, key, length);
+	if (!entry) {
+		ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
+			  key);
+		return -EINVAL;
+	}
+
+	ibdev_dbg(&dev->ibdev,
+		  "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
+		  entry->address, length, entry->mmap_flag);
+
+	pfn = entry->address >> PAGE_SHIFT;
+	switch (entry->mmap_flag) {
+	case EFA_MMAP_IO_NC:
+		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
+					pgprot_noncached(vma->vm_page_prot));
+		break;
+	case EFA_MMAP_IO_WC:
+		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
+					pgprot_writecombine(vma->vm_page_prot));
+		break;
+	case EFA_MMAP_DMA_PAGE:
+		for (va = vma->vm_start; va < vma->vm_end;
+		     va += PAGE_SIZE, pfn++) {
+			err = vm_insert_page(vma, va, pfn_to_page(pfn));
+			if (err)
+				break;
+		}
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	if (err) {
+		ibdev_dbg(
+			&dev->ibdev,
+			"Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
+			entry->address, length, entry->mmap_flag, err);
+		return err;
+	}
+
+	return 0;
+}
+
+int efa_mmap(struct ib_ucontext *ibucontext,
+	     struct vm_area_struct *vma)
+{
+	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+	struct efa_dev *dev = to_edev(ibucontext->device);
+	u64 length = vma->vm_end - vma->vm_start;
+	u64 key = vma->vm_pgoff << PAGE_SHIFT;
+
+	ibdev_dbg(&dev->ibdev,
+		  "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
+		  vma->vm_start, vma->vm_end, length, key);
+
+	if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
+		ibdev_dbg(&dev->ibdev,
+			  "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
+			  length, PAGE_SIZE, vma->vm_flags);
+		return -EINVAL;
+	}
+
+	if (vma->vm_flags & VM_EXEC) {
+		ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
+		return -EPERM;
+	}
+
+	return __efa_mmap(dev, ucontext, vma, key, length);
+}
+
+static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
+{
+	struct efa_com_destroy_ah_params params = {
+		.ah = ah->ah,
+		.pdn = to_epd(ah->ibah.pd)->pdn,
+	};
+
+	return efa_com_destroy_ah(&dev->edev, &params);
+}
+
+int efa_create_ah(struct ib_ah *ibah,
+		  struct rdma_ah_attr *ah_attr,
+		  u32 flags,
+		  struct ib_udata *udata)
+{
+	struct efa_dev *dev = to_edev(ibah->device);
+	struct efa_com_create_ah_params params = {};
+	struct efa_ibv_create_ah_resp resp = {};
+	struct efa_com_create_ah_result result;
+	struct efa_ah *ah = to_eah(ibah);
+	int err;
+
+#ifdef HAVE_CREATE_DESTROY_AH_FLAGS
+	if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
+		ibdev_dbg(&dev->ibdev,
+			  "Create address handle is not supported in atomic context\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+	if (!udata) {
+		ibdev_dbg(&dev->ibdev, "udata is NULL\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+#endif
+
+	if (udata->inlen &&
+#ifndef WORKAROUND_E093111DDB6C
+	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+#else
+	    /* WA for e093111ddb6c ("IB/core: Fix input len in multiple user verbs") */
+	    !ib_is_udata_cleared(udata, 0, udata->inlen - sizeof(struct ib_uverbs_cmd_hdr))) {
+#endif
+		ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
+	       sizeof(params.dest_addr));
+	params.pdn = to_epd(ibah->pd)->pdn;
+	err = efa_com_create_ah(&dev->edev, &params, &result);
+	if (err)
+		goto err_out;
+
+	memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
+	ah->ah = result.ah;
+
+
+	resp.efa_address_handle = result.ah;
+
+	if (udata->outlen) {
+		err = ib_copy_to_udata(udata, &resp,
+				       min(sizeof(resp), udata->outlen));
+		if (err) {
+			ibdev_dbg(&dev->ibdev,
+				  "Failed to copy udata for create_ah response\n");
+			goto err_destroy_ah;
+		}
+	}
+	ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
+
+	return 0;
+
+err_destroy_ah:
+	efa_ah_destroy(dev, ah);
+err_out:
+	atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+	return err;
+}
+
+#ifndef HAVE_AH_CORE_ALLOCATION
+#ifdef HAVE_CREATE_DESTROY_AH_FLAGS
+struct ib_ah *efa_kzalloc_ah(struct ib_pd *ibpd,
+			     struct rdma_ah_attr *ah_attr,
+			     u32 flags,
+			     struct ib_udata *udata)
+#else
+struct ib_ah *efa_kzalloc_ah(struct ib_pd *ibpd,
+			     struct rdma_ah_attr *ah_attr,
+			     struct ib_udata *udata)
+#endif
+{
+	struct efa_ah *ah;
+	int err;
+	u32 flags = 0;
+
+	ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	ah->ibah.device = ibpd->device;
+	ah->ibah.pd = ibpd;
+	err = efa_create_ah(&ah->ibah, ah_attr, flags, udata);
+	if (err)
+		goto err_free;
+
+	return &ah->ibah;
+
+err_free:
+	kfree(ah);
+	return ERR_PTR(err);
+}
+#endif
+
+#ifdef HAVE_AH_CORE_ALLOCATION
+void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+#elif defined(HAVE_CREATE_DESTROY_AH_FLAGS)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+#else
+int efa_destroy_ah(struct ib_ah *ibah)
+#endif
+{
+	struct efa_dev *dev = to_edev(ibah->pd->device);
+	struct efa_ah *ah = to_eah(ibah);
+#ifndef HAVE_AH_CORE_ALLOCATION
+	int err;
+#endif
+
+	ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
+
+#ifdef HAVE_CREATE_DESTROY_AH_FLAGS
+	if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
+		ibdev_dbg(&dev->ibdev,
+			  "Destroy address handle is not supported in atomic context\n");
+#ifdef HAVE_AH_CORE_ALLOCATION
+		return;
+#else
+		return -EOPNOTSUPP;
+#endif
+	}
+#endif
+
+#ifdef HAVE_AH_CORE_ALLOCATION
+	efa_ah_destroy(dev, ah);
+#else
+	err = efa_ah_destroy(dev, ah);
+	if (err)
+		return err;
+#ifndef HAVE_AH_CORE_ALLOCATION
+	kfree(ah);
+	return 0;
+#endif
+#endif
+}
+
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
+{
+	return rdma_alloc_hw_stats_struct(efa_stats_names,
+					  ARRAY_SIZE(efa_stats_names),
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+		     u8 port_num, int index)
+{
+	struct efa_com_get_stats_params params = {};
+	union efa_com_get_stats_result result;
+	struct efa_dev *dev = to_edev(ibdev);
+	struct efa_com_basic_stats *bs;
+	struct efa_com_stats_admin *as;
+	struct efa_stats *s;
+	int err;
+
+	params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
+	params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+
+	err = efa_com_get_stats(&dev->edev, &params, &result);
+	if (err)
+		return err;
+
+	bs = &result.basic_stats;
+	stats->value[EFA_TX_BYTES] = bs->tx_bytes;
+	stats->value[EFA_TX_PKTS] = bs->tx_pkts;
+	stats->value[EFA_RX_BYTES] = bs->rx_bytes;
+	stats->value[EFA_RX_PKTS] = bs->rx_pkts;
+	stats->value[EFA_RX_DROPS] = bs->rx_drops;
+
+	as = &dev->edev.aq.stats;
+	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+	s = &dev->stats;
+	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
+	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
+	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
+	stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
+	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
+
+	return ARRAY_SIZE(efa_stats_names);
+}
+
+#ifndef HAVE_NO_KVERBS_DRIVERS
+int efa_post_send(struct ib_qp *ibqp,
+		  const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr)
+{
+	struct efa_dev *dev = to_edev(ibqp->device);
+
+	ibdev_warn(&dev->ibdev.dev, "Function not supported\n");
+	return -EOPNOTSUPP;
+}
+
+int efa_post_recv(struct ib_qp *ibqp,
+		  const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr)
+{
+	struct efa_dev *dev = to_edev(ibqp->device);
+
+	ibdev_warn(&dev->ibdev.dev, "Function not supported\n");
+	return -EOPNOTSUPP;
+}
+
+int efa_poll_cq(struct ib_cq *ibcq, int num_entries,
+		struct ib_wc *wc)
+{
+	struct efa_dev *dev = to_edev(ibcq->device);
+
+	ibdev_warn(&dev->ibdev.dev, "Function not supported\n");
+	return -EOPNOTSUPP;
+}
+
+int efa_req_notify_cq(struct ib_cq *ibcq,
+		      enum ib_cq_notify_flags flags)
+{
+	struct efa_dev *dev = to_edev(ibcq->device);
+
+	ibdev_warn(&dev->ibdev.dev, "Function not supported\n");
+	return -EOPNOTSUPP;
+}
+
+struct ib_mr *efa_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+	struct efa_dev *dev = to_edev(ibpd->device);
+
+	ibdev_warn(&dev->ibdev.dev, "Function not supported\n");
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif
+
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+					 u8 port_num)
+{
+	return IB_LINK_LAYER_UNSPECIFIED;
+}
+
diff --git a/drivers/infiniband/hw/efa/kcompat.h b/drivers/infiniband/hw/efa/kcompat.h
new file mode 100644
index 000000000000..700b9268d9f1
--- /dev/null
+++ b/drivers/infiniband/hw/efa/kcompat.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _KCOMPAT_H_
+#define _KCOMPAT_H_
+
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
+#else
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33))
+#include <linux/utsrelease.h>
+#else
+#include <generated/utsrelease.h>
+#endif
+
+/******************************************************************************/
+/**************************** RHEL macros *************************************/
+/******************************************************************************/
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#ifndef RHEL_RELEASE_CODE
+#define RHEL_RELEASE_CODE 0
+#endif
+
+/*****************************************************************************/
+/* Start of upstream defines */
+#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,15,0)) || \
+	(RHEL_RELEASE_CODE && \
+	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,4))))
+#define HAVE_UMEM_SCATTERLIST_IF
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) || \
+	(RHEL_RELEASE_CODE && \
+	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_GET_PORT_IMMUTABLE
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) || \
+	(RHEL_RELEASE_CODE && \
+	(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_CREATE_AH_UDATA
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) || \
+	(RHEL_RELEASE_CODE && \
+	(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_IB_QUERY_DEVICE_UDATA
+#define HAVE_CREATE_CQ_ATTR
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+#define HAVE_HW_STATS
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \
+	(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,6))
+#define HAVE_CREATE_AH_RDMA_ATTR
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) || \
+	(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,6))
+#define HAVE_DEV_PARENT
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) || \
+	(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,7)) || \
+	(CONFIG_SUSE_VERSION >= 15)
+#define HAVE_POST_CONST_WR
+#define HAVE_MAX_SEND_RCV_SGE
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) || \
+	(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,7)) || \
+	(CONFIG_SUSE_VERSION >= 15)
+#define HAVE_IB_REGISTER_DEVICE_NAME_PARAM
+#define HAVE_IB_MODIFY_QP_IS_OK_FOUR_PARAMS
+#define HAVE_RDMA_USER_MMAP_IO
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
+#define HAVE_IB_DEV_OPS
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
+#define HAVE_CREATE_DESTROY_AH_FLAGS
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
+#define HAVE_SG_DMA_PAGE_ITER
+#define HAVE_PD_CORE_ALLOCATION
+#define HAVE_UCONTEXT_CORE_ALLOCATION
+#define HAVE_NO_KVERBS_DRIVERS
+#define HAVE_IB_UMEM_GET_UDATA
+#define HAVE_UDATA_TO_DRV_CONTEXT
+#define HAVE_IB_REGISTER_DEVICE_TWO_PARAMS
+#define HAVE_SAFE_IB_ALLOC_DEVICE
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+#define HAVE_AH_CORE_ALLOCATION
+#define HAVE_ALLOC_PD_NO_UCONTEXT
+#define HAVE_CREATE_CQ_NO_UCONTEXT
+#define HAVE_DEALLOC_PD_UDATA
+#define HAVE_DEREG_MR_UDATA
+#define HAVE_DESTROY_CQ_UDATA
+#define HAVE_DESTROY_QP_UDATA
+#define HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE
+#define HAVE_UPSTREAM_EFA
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
+#define HAVE_IB_DEVICE_OPS_COMMON
+#define HAVE_IB_VOID_DESTROY_CQ
+#define HAVE_CQ_CORE_ALLOCATION
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) && \
+	(CONFIG_SUSE_VERSION < 15)
+#define WORKAROUND_E093111DDB6C
+#endif
+/* End of upstream defines */
+
+#if !defined(HAVE_CREATE_AH_UDATA) || !defined(HAVE_IB_QUERY_DEVICE_UDATA)
+#define HAVE_CUSTOM_COMMANDS
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0) && \
+	(RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+				       size_t offset,
+				       size_t len)
+{
+	const void __user *p = udata->inbuf + offset;
+	bool ret = false;
+	u8 *buf;
+
+	if (len > USHRT_MAX)
+		return false;
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return false;
+
+	if (copy_from_user(buf, p, len))
+		goto free;
+
+	ret = !memchr_inv(buf, 0, len);
+
+free:
+	kfree(buf);
+	return ret;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)
+#define ibdev_err(_ibdev, format, arg...) \
+	dev_err(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_dbg(_ibdev, format, arg...) \
+	dev_dbg(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_warn(_ibdev, format, arg...) \
+	dev_warn(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_info(_ibdev, format, arg...) \
+	dev_info(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)
+#define ibdev_err_ratelimited(_ibdev, format, arg...) \
+	dev_err_ratelimited(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_dbg_ratelimited(_ibdev, format, arg...) \
+	dev_dbg_ratelimited(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_warn_ratelimited(_ibdev, format, arg...) \
+	dev_warn_ratelimited(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#define ibdev_info_ratelimited(_ibdev, format, arg...) \
+	dev_info_ratelimited(&((struct ib_device *)(_ibdev))->dev, format, ##arg)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) && \
+	RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7, 6)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+static inline void *kvzalloc(size_t size, gfp_t flags)
+{
+	void *addr;
+
+	addr = kzalloc(size, flags | __GFP_NOWARN);
+	if (addr)
+		return addr;
+
+	return vzalloc(size);
+}
+#endif
+
+#endif /* _KCOMPAT_H_ */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 06c34d99be85..26213f49f5c8 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -102,6 +102,7 @@ enum rdma_driver_id {
 	RDMA_DRIVER_RXE,
 	RDMA_DRIVER_HFI1,
 	RDMA_DRIVER_QIB,
+	RDMA_DRIVER_EFA,
 };
 
 #endif
-- 
2.21.0