summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-15 20:38:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-15 20:38:15 -0700
commit2a3c389a0fde49b241430df806a34276568cfb29 (patch)
tree9cf35829317e8cc2aaffc4341fb824dad63fce02 /include
parent8de262531f5fbb7458463224a7587429800c24bf (diff)
parent0b043644c0ca601cb19943a81aa1f1455dbe9461 (diff)
downloadlinux-2a3c389a0fde49b241430df806a34276568cfb29.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A smaller cycle this time. Notably we see another new driver, 'Soft iWarp', and the deletion of an ancient unused driver for nes. - Revise and simplify the signature offload RDMA MR APIs - More progress on hoisting object allocation boiler plate code out of the drivers - Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib, i40iw - Tree wide cleanups: struct_size, put_user_page, xarray, rst doc conversion - Removal of obsolete ib_ucm chardev and nes driver - netlink based discovery of chardevs and autoloading of the modules providing them - Move more of the rdamvt/hfi1 uapi to include/uapi/rdma - New driver 'siw' for software based iWarp running on top of netdev, much like rxe's software RoCE. - mlx5 feature to report events in their raw devx format to userspace - Expose per-object counters through rdma tool - Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core from netdev" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits) RMDA/siw: Require a 64 bit arch RDMA/siw: Mark expected switch fall-throughs RDMA/core: Fix -Wunused-const-variable warnings rdma/siw: Remove set but not used variable 's' rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS RDMA/siw: Add missing rtnl_lock around access to ifa rdma/siw: Use proper enumerated type in map_cqe_status RDMA/siw: Remove unnecessary kthread create/destroy printouts IB/rdmavt: Fix variable shadowing issue in rvt_create_cq RDMA/core: Fix race when resolving IP address RDMA/core: Make rdma_counter.h compile stand alone IB/core: Work on the caller socket net namespace in nldev_newlink() RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM RDMA/mlx5: Set RDMA DIM to be enabled by default RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink RDMA/core: Provide RDMA DIM support for ULPs linux/dim: Implement RDMA adaptive moderation (DIM) IB/mlx5: Report correctly tag matching rendezvous capability docs: infiniband: add it to the driver-api bookset IB/mlx5: Implement VHCA tunnel mechanism in DEVX ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/dim.h23
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
-rw-r--r--include/linux/mlx5/qp.h4
-rw-r--r--include/rdma/ib_umem.h19
-rw-r--r--include/rdma/ib_umem_odp.h20
-rw-r--r--include/rdma/ib_verbs.h247
-rw-r--r--include/rdma/mr_pool.h2
-rw-r--r--include/rdma/rdma_counter.h65
-rw-r--r--include/rdma/rdma_netlink.h8
-rw-r--r--include/rdma/rdma_vt.h5
-rw-r--r--include/rdma/rdmavt_cq.h25
-rw-r--r--include/rdma/rdmavt_qp.h312
-rw-r--r--include/rdma/restrack.h9
-rw-r--r--include/rdma/rw.h9
-rw-r--r--include/rdma/signature.h122
-rw-r--r--include/uapi/rdma/ib_user_cm.h326
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h19
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h9
-rw-r--r--include/uapi/rdma/rdma_netlink.h86
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h1
-rw-r--r--include/uapi/rdma/rvt-abi.h66
-rw-r--r--include/uapi/rdma/siw-abi.h185
22 files changed, 980 insertions, 588 deletions
diff --git a/include/linux/dim.h b/include/linux/dim.h
index aa9bdd47a648..d3a0fbfff2bb 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -82,6 +82,7 @@ struct dim_stats {
* @prev_stats: Measured rates from previous iteration (for comparison)
* @start_sample: Sampled data at start of current iteration
* @work: Work to perform on action required
+ * @priv: A pointer to the struct that points to dim
* @profile_ix: Current moderation profile
* @mode: CQ period count mode
* @tune_state: Algorithm tuning state (see below)
@@ -95,6 +96,7 @@ struct dim {
struct dim_sample start_sample;
struct dim_sample measuring_sample;
struct work_struct work;
+ void *priv;
u8 profile_ix;
u8 mode;
u8 tune_state;
@@ -363,4 +365,25 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
*/
void net_dim(struct dim *dim, struct dim_sample end_sample);
+/* RDMA DIM */
+
+/*
+ * RDMA DIM profile:
+ * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
+ */
+#define RDMA_DIM_PARAMS_NUM_PROFILES 9
+#define RDMA_DIM_START_PROFILE 0
+
+/**
+ * rdma_dim - Runs the adaptive moderation.
+ * @dim: The moderation struct.
+ * @completions: The number of completions collected in this round.
+ *
+ * Each call to rdma_dim takes the latest amount of completions that
+ * have been collected and counts them as a new event.
+ * Once enough events have been collected the algorithm decides a new
+ * moderation level.
+ */
+void rdma_dim(struct dim *dim, u64 completions);
+
#endif /* DIM_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 515624c66ce1..b3d5752657d9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1391,7 +1391,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_6c8[0x28];
u8 sf_base_id[0x10];
- u8 reserved_at_700[0x100];
+ u8 reserved_at_700[0x80];
+ u8 vhca_tunnel_commands[0x40];
+ u8 reserved_at_7c0[0x40];
};
enum mlx5_flow_destination_type {
@@ -9695,7 +9697,7 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
u8 opcode[0x10];
u8 uid[0x10];
- u8 reserved_at_20[0x10];
+ u8 vhca_tunnel_id[0x10];
u8 obj_type[0x10];
u8 obj_id[0x20];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 127d224443e3..ae63b1ae9004 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -37,7 +37,8 @@
#include <linux/mlx5/driver.h>
#define MLX5_INVALID_LKEY 0x100
-#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5)
+/* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */
+#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8)
#define MLX5_DIF_SIZE 8
#define MLX5_STRIDE_BLOCK_OP 0x400
#define MLX5_CPY_GRD_MASK 0xc0
@@ -70,6 +71,7 @@ enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_CQN_RCV = 1 << 19,
MLX5_QP_OPTPAR_DC_HS = 1 << 20,
MLX5_QP_OPTPAR_DC_KEY = 1 << 21,
+ MLX5_QP_OPTPAR_COUNTER_SET_ID = 1 << 25,
};
enum mlx5_qp_state {
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 040d853077c6..1052d0d62be7 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -46,7 +46,6 @@ struct ib_umem {
struct mm_struct *owning_mm;
size_t length;
unsigned long address;
- int page_shift;
u32 writable : 1;
u32 is_odp : 1;
struct work_struct work;
@@ -58,24 +57,14 @@ struct ib_umem {
/* Returns the offset of the umem start relative to the first page. */
static inline int ib_umem_offset(struct ib_umem *umem)
{
- return umem->address & (BIT(umem->page_shift) - 1);
-}
-
-/* Returns the first page of an ODP umem. */
-static inline unsigned long ib_umem_start(struct ib_umem *umem)
-{
- return umem->address - ib_umem_offset(umem);
-}
-
-/* Returns the address of the page after the last one of an ODP umem. */
-static inline unsigned long ib_umem_end(struct ib_umem *umem)
-{
- return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
+ return umem->address & ~PAGE_MASK;
}
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
+ return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
+ ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
+ PAGE_SHIFT;
}
#ifdef CONFIG_INFINIBAND_USER_MEM
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index eeec4e53c448..479db5c98ff6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -76,6 +76,7 @@ struct ib_umem_odp {
struct completion notifier_completion;
int dying;
+ unsigned int page_shift;
struct work_struct work;
};
@@ -84,6 +85,25 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
return container_of(umem, struct ib_umem_odp, umem);
}
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
+{
+ return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
+{
+ return ALIGN(umem_odp->umem.address + umem_odp->umem.length,
+ 1UL << umem_odp->page_shift);
+}
+
+static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
+{
+ return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >>
+ umem_odp->page_shift;
+}
+
/*
* The lower 2 bits of the DMA address signal the R/W permissions for
* the entry. To upgrade the permissions, provide the appropriate
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 54873085f2da..c5f8a9f17063 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -61,8 +61,11 @@
#include <linux/cgroup_rdma.h>
#include <linux/irqflags.h>
#include <linux/preempt.h>
+#include <linux/dim.h>
#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <rdma/restrack.h>
+#include <rdma/signature.h>
#include <uapi/rdma/rdma_user_ioctl.h>
#include <uapi/rdma/ib_user_ioctl_verbs.h>
@@ -132,17 +135,6 @@ struct ib_gid_attr {
u8 port_num;
};
-enum rdma_node_type {
- /* IB values map to NodeInfo:NodeType. */
- RDMA_NODE_IB_CA = 1,
- RDMA_NODE_IB_SWITCH,
- RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC,
- RDMA_NODE_USNIC,
- RDMA_NODE_USNIC_UDP,
- RDMA_NODE_UNSPECIFIED,
-};
-
enum {
/* set the local administered indication */
IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2,
@@ -164,7 +156,7 @@ enum rdma_protocol_type {
};
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type);
+rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
@@ -263,7 +255,7 @@ enum ib_device_cap_flags {
*/
IB_DEVICE_CROSS_CHANNEL = (1 << 27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
+ IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
@@ -275,17 +267,6 @@ enum ib_device_cap_flags {
IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
};
-enum ib_signature_prot_cap {
- IB_PROT_T10DIF_TYPE_1 = 1,
- IB_PROT_T10DIF_TYPE_2 = 1 << 1,
- IB_PROT_T10DIF_TYPE_3 = 1 << 2,
-};
-
-enum ib_signature_guard_cap {
- IB_GUARD_T10DIF_CRC = 1,
- IB_GUARD_T10DIF_CSUM = 1 << 1,
-};
-
enum ib_atomic_cap {
IB_ATOMIC_NONE,
IB_ATOMIC_HCA,
@@ -327,8 +308,8 @@ struct ib_rss_caps {
};
enum ib_tm_cap_flags {
- /* Support tag matching on RC transport */
- IB_TM_CAP_RC = 1 << 0,
+ /* Support tag matching with rendezvous offload for RC transport */
+ IB_TM_CAP_RNDV_RC = 1 << 0,
};
struct ib_tm_caps {
@@ -411,6 +392,7 @@ struct ib_device_attr {
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ unsigned int max_pi_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
@@ -796,118 +778,26 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
* enum ib_mr_type - memory region type
* @IB_MR_TYPE_MEM_REG: memory region that is used for
* normal registration
- * @IB_MR_TYPE_SIGNATURE: memory region that is used for
- * signature operations (data-integrity
- * capable regions)
* @IB_MR_TYPE_SG_GAPS: memory region that is capable to
* register any arbitrary sg lists (without
* the normal mr constraints - see
* ib_map_mr_sg)
+ * @IB_MR_TYPE_DM: memory region that is used for device
+ * memory registration
+ * @IB_MR_TYPE_USER: memory region that is used for the user-space
+ * application
+ * @IB_MR_TYPE_DMA: memory region that is used for DMA operations
+ * without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY: memory region that is used for
+ * data integrity operations
*/
enum ib_mr_type {
IB_MR_TYPE_MEM_REG,
- IB_MR_TYPE_SIGNATURE,
IB_MR_TYPE_SG_GAPS,
-};
-
-/**
- * Signature types
- * IB_SIG_TYPE_NONE: Unprotected.
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
- */
-enum ib_signature_type {
- IB_SIG_TYPE_NONE,
- IB_SIG_TYPE_T10_DIF,
-};
-
-/**
- * Signature T10-DIF block-guard types
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
- */
-enum ib_t10_dif_bg_type {
- IB_T10DIF_CRC,
- IB_T10DIF_CSUM
-};
-
-/**
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
- * domain.
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
- * @pi_interval: protection information interval.
- * @bg: seed of guard computation.
- * @app_tag: application tag of guard block
- * @ref_tag: initial guard block reference tag.
- * @ref_remap: Indicate wethear the reftag increments each block
- * @app_escape: Indicate to skip block check if apptag=0xffff
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
- * @apptag_check_mask: check bitmask of application tag.
- */
-struct ib_t10_dif_domain {
- enum ib_t10_dif_bg_type bg_type;
- u16 pi_interval;
- u16 bg;
- u16 app_tag;
- u32 ref_tag;
- bool ref_remap;
- bool app_escape;
- bool ref_escape;
- u16 apptag_check_mask;
-};
-
-/**
- * struct ib_sig_domain - Parameters for signature domain
- * @sig_type: specific signauture type
- * @sig: union of all signature domain attributes that may
- * be used to set domain layout.
- */
-struct ib_sig_domain {
- enum ib_signature_type sig_type;
- union {
- struct ib_t10_dif_domain dif;
- } sig;
-};
-
-/**
- * struct ib_sig_attrs - Parameters for signature handover operation
- * @check_mask: bitmask for signature byte check (8 bytes)
- * @mem: memory domain layout desciptor.
- * @wire: wire domain layout desciptor.
- */
-struct ib_sig_attrs {
- u8 check_mask;
- struct ib_sig_domain mem;
- struct ib_sig_domain wire;
-};
-
-enum ib_sig_err_type {
- IB_SIG_BAD_GUARD,
- IB_SIG_BAD_REFTAG,
- IB_SIG_BAD_APPTAG,
-};
-
-/**
- * Signature check masks (8 bytes in total) according to the T10-PI standard:
- * -------- -------- ------------
- * | GUARD | APPTAG | REFTAG |
- * | 2B | 2B | 4B |
- * -------- -------- ------------
- */
-enum {
- IB_SIG_CHECK_GUARD = 0xc0,
- IB_SIG_CHECK_APPTAG = 0x30,
- IB_SIG_CHECK_REFTAG = 0x0f,
-};
-
-/**
- * struct ib_sig_err - signature error descriptor
- */
-struct ib_sig_err {
- enum ib_sig_err_type err_type;
- u32 expected;
- u32 actual;
- u64 sig_err_offset;
- u32 key;
+ IB_MR_TYPE_DM,
+ IB_MR_TYPE_USER,
+ IB_MR_TYPE_DMA,
+ IB_MR_TYPE_INTEGRITY,
};
enum ib_mr_status_check {
@@ -1164,7 +1054,7 @@ enum ib_qp_create_flags {
IB_QP_CREATE_MANAGED_SEND = 1 << 3,
IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
- IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
+ IB_QP_CREATE_INTEGRITY_EN = 1 << 6,
/* FREE = 1 << 7, */
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
@@ -1343,7 +1233,7 @@ enum ib_wr_opcode {
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
- IB_WR_REG_SIG_MR,
+ IB_WR_REG_MR_INTEGRITY,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
@@ -1453,20 +1343,6 @@ static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
return container_of(wr, struct ib_reg_wr, wr);
}
-struct ib_sig_handover_wr {
- struct ib_send_wr wr;
- struct ib_sig_attrs *sig_attrs;
- struct ib_mr *sig_mr;
- int access_flags;
- struct ib_sge *prot;
-};
-
-static inline const struct ib_sig_handover_wr *
-sig_handover_wr(const struct ib_send_wr *wr)
-{
- return container_of(wr, struct ib_sig_handover_wr, wr);
-}
-
struct ib_recv_wr {
struct ib_recv_wr *next;
union {
@@ -1634,6 +1510,7 @@ struct ib_cq {
struct work_struct work;
};
struct workqueue_struct *comp_wq;
+ struct dim *dim;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1818,10 +1695,14 @@ struct ib_qp {
struct ib_qp_security *qp_sec;
u8 port;
+ bool integrity_en;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+
+ /* The counter the qp is bind to */
+ struct rdma_counter *counter;
};
struct ib_dm {
@@ -1840,6 +1721,7 @@ struct ib_mr {
u64 iova;
u64 length;
unsigned int page_size;
+ enum ib_mr_type type;
bool need_inval;
union {
struct ib_uobject *uobject; /* user */
@@ -1847,7 +1729,7 @@ struct ib_mr {
};
struct ib_dm *dm;
-
+ struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2243,6 +2125,8 @@ struct ib_port_data {
spinlock_t netdev_lock;
struct net_device __rcu *netdev;
struct hlist_node ndev_hash_link;
+ struct rdma_port_counter port_counter;
+ struct rdma_hw_stats *hw_stats;
};
/* rdma netdev type - specifies protocol type */
@@ -2329,6 +2213,11 @@ struct iw_cm_conn_param;
* need to define the supported operations, otherwise they will be set to null.
*/
struct ib_device_ops {
+ struct module *owner;
+ enum rdma_driver_id driver_id;
+ u32 uverbs_abi_ver;
+ unsigned int uverbs_no_driver_id_binding:1;
+
int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
@@ -2454,11 +2343,10 @@ struct ib_device_ops {
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
- struct ib_cq *(*create_cq)(struct ib_device *device,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
@@ -2470,6 +2358,9 @@ struct ib_device_ops {
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg, struct ib_udata *udata);
+ struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
int (*advise_mr)(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice, u32 flags,
struct ib_sge *sg_list, u32 num_sge,
@@ -2516,7 +2407,7 @@ struct ib_device_ops {
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *(*create_rwq_ind_table)(
@@ -2538,6 +2429,11 @@ struct ib_device_ops {
int (*read_counters)(struct ib_counters *counters,
struct ib_counters_read_attr *counters_read_attr,
struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
+
/**
* alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
* driver initialized data. The struct is kfree()'ed by the sysfs
@@ -2595,8 +2491,34 @@ struct ib_device_ops {
u8 pdata_len);
int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+ /**
+ * counter_bind_qp - Bind a QP to a counter.
+ * @counter - The counter to be bound. If counter->id is zero then
+ * the driver needs to allocate a new counter and set counter->id
+ */
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+ /**
+ * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+ * counter and bind it onto the default one
+ */
+ int (*counter_unbind_qp)(struct ib_qp *qp);
+ /**
+ * counter_dealloc -De-allocate the hw counter
+ */
+ int (*counter_dealloc)(struct rdma_counter *counter);
+ /**
+ * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
+ * the driver initialized data.
+ */
+ struct rdma_hw_stats *(*counter_alloc_stats)(
+ struct rdma_counter *counter);
+ /**
+ * counter_update_stats - Query the stats value of this counter
+ */
+ int (*counter_update_stats)(struct rdma_counter *counter);
DECLARE_RDMA_OBJ_SIZE(ib_ah);
+ DECLARE_RDMA_OBJ_SIZE(ib_cq);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
@@ -2636,7 +2558,6 @@ struct ib_device {
int num_comp_vectors;
- struct module *owner;
union {
struct device dev;
struct ib_core_device coredev;
@@ -2648,7 +2569,6 @@ struct ib_device {
*/
const struct attribute_group *groups[3];
- int uverbs_abi_ver;
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -2658,6 +2578,8 @@ struct ib_device {
u16 is_switch:1;
/* Indicates kernel verbs support, should not be used in drivers */
u16 kverbs_provider:1;
+ /* CQ adaptive moderation (RDMA DIM) */
+ u16 use_cq_dim:1;
u8 node_type;
u8 phys_port_cnt;
struct ib_device_attr attrs;
@@ -2672,7 +2594,6 @@ struct ib_device {
struct rdma_restrack_root *res;
const struct uapi_definition *driver_def;
- enum rdma_driver_id driver_id;
/*
* Positive refcount indicates that the device is currently
@@ -2694,11 +2615,15 @@ struct ib_device {
u32 iw_driver_flags;
};
+struct ib_client_nl_info;
struct ib_client {
const char *name;
void (*add) (struct ib_device *);
void (*remove)(struct ib_device *, void *client_data);
void (*rename)(struct ib_device *dev, void *client_data);
+ int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res);
+ int (*get_global_nl_info)(struct ib_client_nl_info *res);
/* Returns the net_dev belonging to this ib_client and matching the
* given parameters.
@@ -3859,9 +3784,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
*
* NOTE: for user cq use ib_destroy_cq_user with valid udata!
*/
-static inline int ib_destroy_cq(struct ib_cq *cq)
+static inline void ib_destroy_cq(struct ib_cq *cq)
{
- return ib_destroy_cq_user(cq, NULL);
+ ib_destroy_cq_user(cq, NULL);
}
/**
@@ -4148,6 +4073,10 @@ static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
}
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
+
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
@@ -4332,6 +4261,10 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size);
static inline int
ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h
index 83763ef82354..e77123bcb43b 100644
--- a/include/rdma/mr_pool.h
+++ b/include/rdma/mr_pool.h
@@ -11,7 +11,7 @@ struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list);
void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
- enum ib_mr_type type, u32 max_num_sg);
+ enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg);
void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
#endif /* _RDMA_MR_POOL_H */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
new file mode 100644
index 000000000000..eb99856e8b30
--- /dev/null
+++ b/include/rdma/rdma_counter.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_COUNTER_H_
+#define _RDMA_COUNTER_H_
+
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+
+#include <rdma/restrack.h>
+#include <rdma/rdma_netlink.h>
+
+struct ib_device;
+struct ib_qp;
+
+struct auto_mode_param {
+ int qp_type;
+};
+
+struct rdma_counter_mode {
+ enum rdma_nl_counter_mode mode;
+ enum rdma_nl_counter_mask mask;
+ struct auto_mode_param param;
+};
+
+struct rdma_port_counter {
+ struct rdma_counter_mode mode;
+ struct rdma_hw_stats *hstats;
+ unsigned int num_counters;
+ struct mutex lock;
+};
+
+struct rdma_counter {
+ struct rdma_restrack_entry res;
+ struct ib_device *device;
+ uint32_t id;
+ struct kref kref;
+ struct rdma_counter_mode mode;
+ struct mutex lock;
+ struct rdma_hw_stats *stats;
+ u8 port;
+};
+
+void rdma_counter_init(struct ib_device *dev);
+void rdma_counter_release(struct ib_device *dev);
+int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
+ bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
+
+int rdma_counter_query_stats(struct rdma_counter *counter);
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index);
+int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id);
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 *counter_id);
+int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id);
+int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+ enum rdma_nl_counter_mode *mode,
+ enum rdma_nl_counter_mask *mask);
+
+#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 10732ab31ba2..6631624e4d7c 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -6,6 +6,12 @@
#include <linux/netlink.h>
#include <uapi/rdma/rdma_netlink.h>
+enum {
+ RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
+ RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+ RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+};
+
struct rdma_nl_cbs {
int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
@@ -110,4 +116,6 @@ void rdma_link_register(struct rdma_link_ops *ops);
void rdma_link_unregister(struct rdma_link_ops *ops);
#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
+#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type)
+
#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index b9cd06db1a71..525848e227dc 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -2,7 +2,7 @@
#define DEF_RDMA_VT_H
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -202,7 +202,6 @@ struct rvt_pd {
struct rvt_ah {
struct ib_ah ibah;
struct rdma_ah_attr attr;
- atomic_t refcount;
u8 vl;
u8 log_pmtu;
};
@@ -555,7 +554,7 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
void rvt_dealloc_device(struct rvt_dev_info *rdi);
-int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id);
+int rvt_register_device(struct rvt_dev_info *rvd);
void rvt_unregister_device(struct rvt_dev_info *rvd);
int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 75dc65c0bfb8..04c519ef6d71 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -61,18 +61,27 @@
#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1)
/*
+ * Define read macro that apply smp_load_acquire memory barrier
+ * when reading indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val)
+
+/*
+ * Define write macro that uses smp_store_release memory barrier
+ * when writing indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x)
+#include <rdma/rvt-abi.h>
+
+/*
* This structure is used to contain the head pointer, tail pointer,
* and completion queue entries as a single memory allocation so
* it can be mmap'ed into user space.
*/
-struct rvt_cq_wc {
+struct rvt_k_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- union {
- /* these are actually size ibcq.cqe + 1 */
- struct ib_uverbs_wc uqueue[0];
- struct ib_wc kqueue[0];
- };
+ struct ib_wc kqueue[];
};
/*
@@ -84,10 +93,12 @@ struct rvt_cq {
spinlock_t lock; /* protect changes in this struct */
u8 notify;
u8 triggered;
+ u8 cq_full;
int comp_vector_cpu;
struct rvt_dev_info *rdi;
struct rvt_cq_wc *queue;
struct rvt_mmap_info *ip;
+ struct rvt_k_cq_wc *kqueue;
};
static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
@@ -95,6 +106,6 @@ static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
return container_of(ibcq, struct rvt_cq, ibcq);
}
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
#endif /* DEF_RDMAVT_INCCQH */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 68e38c20afc0..0eeea520a853 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
#define DEF_RDMAVT_INCQP_H
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
+#include <rdma/rvt-abi.h>
/*
* Atomic bit definitions for r_aflags.
*/
@@ -156,6 +157,22 @@
#define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START
#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1)
+/**
+ * rvt_ud_wr - IB UD work plus AH cache
+ * @wr: valid IB work request
+ * @attr: pointer to an allocated AH attribute
+ *
+ * Special case the UD WR so we can keep track of the AH attributes.
+ *
+ * NOTE: This data structure is stricly ordered wr then attr. I.e the attr
+ * MUST come after wr. The ib_ud_wr is sized and copied in rvt_post_one_wr.
+ * The copy assumes that wr is first.
+ */
+struct rvt_ud_wr {
+ struct ib_ud_wr wr;
+ struct rdma_ah_attr *attr;
+};
+
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
@@ -164,7 +181,7 @@
struct rvt_swqe {
union {
struct ib_send_wr wr; /* don't use wr.sg_list */
- struct ib_ud_wr ud_wr;
+ struct rvt_ud_wr ud_wr;
struct ib_reg_wr reg_wr;
struct ib_rdma_wr rdma_wr;
struct ib_atomic_wr atomic_wr;
@@ -177,33 +194,84 @@ struct rvt_swqe {
struct rvt_sge sg_list[0];
};
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+/**
+ * struct rvt_krwq - kernel struct receive work request
+ * @p_lock: lock to protect producer of the kernel buffer
+ * @head: index of next entry to fill
+ * @c_lock:lock to protect consumer of the kernel buffer
+ * @tail: index of next entry to pull
+ * @count: count is aproximate of total receive enteries posted
+ * @rvt_rwqe: struct of receive work request queue entry
+ *
+ * This structure is used to contain the head pointer,
+ * tail pointer and receive work queue entries for kernel
+ * mode user.
*/
-struct rvt_rwqe {
- u64 wr_id;
- u8 num_sge;
- struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct rvt_rwq {
+struct rvt_krwq {
+ spinlock_t p_lock; /* protect producer */
u32 head; /* new work requests posted to the head */
+
+ /* protect consumer */
+ spinlock_t c_lock ____cacheline_aligned_in_smp;
u32 tail; /* receives pull requests from here. */
- struct rvt_rwqe wq[0];
+ u32 count; /* approx count of receive entries posted */
+ struct rvt_rwqe *curr_wq;
+ struct rvt_rwqe wq[];
};
+/*
+ * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe)
+{
+ return ibah_to_rvtah(swqe->ud_wr.wr.ah);
+}
+
+/**
+ * rvt_get_swqe_ah_attr - Return the cached ah attribute information
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.attr;
+}
+
+/**
+ * rvt_get_swqe_remote_qpn - Access the remote QPN value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.remote_qpn;
+}
+
+/**
+ * rvt_get_swqe_remote_qkey - Acces the remote qkey value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.remote_qkey;
+}
+
+/**
+ * rvt_get_swqe_pkey_index - Access the pkey index
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.pkey_index;
+}
+
struct rvt_rq {
struct rvt_rwq *wq;
+ struct rvt_krwq *kwq;
u32 size; /* size of RWQE array */
u8 max_sge;
/* protect changes in this struct */
@@ -472,7 +540,7 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
{
return (struct rvt_rwqe *)
- ((char *)rq->wq->wq +
+ ((char *)rq->kwq->curr_wq +
(sizeof(struct rvt_rwqe) +
rq->max_sge * sizeof(struct ib_sge)) * n);
}
@@ -565,42 +633,6 @@ static inline void rvt_qp_wqe_unreserve(
extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
-/**
- * rvt_qp_swqe_complete() - insert send completion
- * @qp - the qp
- * @wqe - the send wqe
- * @status - completion status
- *
- * Insert a send completion into the completion
- * queue if the qp indicates it should be done.
- *
- * See IBTA 10.7.3.1 for info on completion
- * control.
- */
-static inline void rvt_qp_swqe_complete(
- struct rvt_qp *qp,
- struct rvt_swqe *wqe,
- enum ib_wc_opcode opcode,
- enum ib_wc_status status)
-{
- if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
- return;
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = opcode;
- wc.qp = &qp->ibqp;
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
-}
-
/*
* Compare the lower 24 bits of the msn values.
* Returns an integer <, ==, or > than zero.
@@ -734,7 +766,119 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
rvt_put_swqe(wqe);
if (qp->allowed_ops == IB_OPCODE_UD)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+ rdma_destroy_ah_attr(wqe->ud_wr.attr);
+}
+
+/**
+ * rvt_qp_sqwe_incr - increment ring index
+ * @qp: the qp
+ * @val: the starting value
+ *
+ * Return: the new value wrapping as appropriate
+ */
+static inline u32
+rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
+{
+ if (++val >= qp->s_size)
+ val = 0;
+ return val;
+}
+
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+/**
+ * rvt_recv_cq - add a new entry to completion queue
+ * by receive queue
+ * @qp: receive queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * receive queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc,
+ bool solicited)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq);
+
+ if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+ rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_send_cq - add a new entry to completion queue
+ * by send queue
+ * @qp: send queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * send queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
+ bool solicited)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq);
+
+ if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+ rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_qp_complete_swqe - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @opcode - wc operation (driver dependent)
+ * @status - completion status
+ *
+ * Update the s_last information, and then insert a send
+ * completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ *
+ * Return: new last
+ */
+static inline u32
+rvt_qp_complete_swqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ enum ib_wc_opcode opcode,
+ enum ib_wc_status status)
+{
+ bool need_completion;
+ u64 wr_id;
+ u32 byte_len, last;
+ int flags = wqe->wr.send_flags;
+
+ rvt_put_qp_swqe(qp, wqe);
+
+ need_completion =
+ !(flags & RVT_SEND_RESERVE_USED) &&
+ (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+ (flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS);
+ if (need_completion) {
+ wr_id = wqe->wr.wr_id;
+ byte_len = wqe->length;
+ /* above fields required before writing s_last */
+ }
+ last = rvt_qp_swqe_incr(qp, qp->s_last);
+ /* see rvt_qp_is_avail() */
+ smp_store_release(&qp->s_last, last);
+ if (need_completion) {
+ struct ib_wc w = {
+ .wr_id = wr_id,
+ .status = status,
+ .opcode = opcode,
+ .qp = &qp->ibqp,
+ .byte_len = byte_len,
+ };
+ rvt_send_cq(qp, &w, status != IB_WC_SUCCESS);
+ }
+ return last;
}
extern const int ib_rvt_state_ops[];
@@ -742,7 +886,6 @@ extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only);
void rvt_comm_est(struct rvt_qp *qp);
-int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
unsigned long rvt_rnr_tbl_to_usec(u32 index);
enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
@@ -784,6 +927,53 @@ struct rvt_qp_iter {
int n;
};
+/**
+ * ib_cq_tail - Return tail index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get tail
+ * of cq buffer.
+ */
+static inline u32 ib_cq_tail(struct ib_cq *send_cq)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+ return ibcq_to_rvtcq(send_cq)->ip ?
+ RDMA_READ_UAPI_ATOMIC(cq->queue->tail) :
+ ibcq_to_rvtcq(send_cq)->kqueue->tail;
+}
+
+/**
+ * ib_cq_head - Return head index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get head
+ * of cq buffer.
+ */
+static inline u32 ib_cq_head(struct ib_cq *send_cq)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+ return ibcq_to_rvtcq(send_cq)->ip ?
+ RDMA_READ_UAPI_ATOMIC(cq->queue->head) :
+ ibcq_to_rvtcq(send_cq)->kqueue->head;
+}
+
+/**
+ * rvt_free_rq - free memory allocated for rvt_rq struct
+ * @rvt_rq: request queue data structure
+ *
+ * This function should only be called if the rvt_mmap_info()
+ * has not succeeded.
+ */
+static inline void rvt_free_rq(struct rvt_rq *rq)
+{
+ kvfree(rq->kwq);
+ rq->kwq = NULL;
+ vfree(rq->wq);
+ rq->wq = NULL;
+}
+
struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
u64 v,
void (*cb)(struct rvt_qp *qp, u64 v));
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index ecf3c7702a4f..b0fc6b26bdf5 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -14,6 +14,9 @@
#include <uapi/rdma/rdma_netlink.h>
#include <linux/xarray.h>
+struct ib_device;
+struct sk_buff;
+
/**
* enum rdma_restrack_type - HW objects to track
*/
@@ -43,13 +46,15 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_CTX,
/**
+ * @RDMA_RESTRACK_COUNTER: Statistic Counter
+ */
+ RDMA_RESTRACK_COUNTER,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
};
-struct ib_device;
-
/**
* struct rdma_restrack_entry - metadata per-entry
*/
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index 494f79ca3e62..6ad9dc836c10 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -39,15 +39,6 @@ struct rdma_rw_ctx {
struct ib_send_wr inv_wr;
struct ib_mr *mr;
} *reg;
-
- struct {
- struct rdma_rw_reg_ctx data;
- struct rdma_rw_reg_ctx prot;
- struct ib_send_wr sig_inv_wr;
- struct ib_mr *sig_mr;
- struct ib_sge sig_sge;
- struct ib_sig_handover_wr sig_wr;
- } *sig;
};
};
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
new file mode 100644
index 000000000000..f24cc2a1d3c5
--- /dev/null
+++ b/include/rdma/signature.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_SIGNATURE_H_
+#define _RDMA_SIGNATURE_H_
+
+enum ib_signature_prot_cap {
+ IB_PROT_T10DIF_TYPE_1 = 1,
+ IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+ IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+ IB_GUARD_T10DIF_CRC = 1,
+ IB_GUARD_T10DIF_CSUM = 1 << 1,
+};
+
+/**
+ * enum ib_signature_type - Signature types
+ * @IB_SIG_TYPE_NONE: Unprotected.
+ * @IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+ IB_SIG_TYPE_NONE,
+ IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types
+ * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * @IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+ IB_T10DIF_CRC,
+ IB_T10DIF_CSUM,
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ * domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+ enum ib_t10_dif_bg_type bg_type;
+ u16 pi_interval;
+ u16 bg;
+ u16 app_tag;
+ u32 ref_tag;
+ bool ref_remap;
+ bool app_escape;
+ bool ref_escape;
+ u16 apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ * be used to set domain layout.
+ */
+struct ib_sig_domain {
+ enum ib_signature_type sig_type;
+ union {
+ struct ib_t10_dif_domain dif;
+ } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout descriptor.
+ * @wire: wire domain layout descriptor.
+ * @meta_length: metadata length
+ */
+struct ib_sig_attrs {
+ u8 check_mask;
+ struct ib_sig_domain mem;
+ struct ib_sig_domain wire;
+ int meta_length;
+};
+
+enum ib_sig_err_type {
+ IB_SIG_BAD_GUARD,
+ IB_SIG_BAD_REFTAG,
+ IB_SIG_BAD_APPTAG,
+};
+
+/*
+ * Signature check masks (8 bytes in total) according to the T10-PI standard:
+ * -------- -------- ------------
+ * | GUARD | APPTAG | REFTAG |
+ * | 2B | 2B | 4B |
+ * -------- -------- ------------
+ */
+enum {
+ IB_SIG_CHECK_GUARD = 0xc0,
+ IB_SIG_CHECK_APPTAG = 0x30,
+ IB_SIG_CHECK_REFTAG = 0x0f,
+};
+
+/*
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+ enum ib_sig_err_type err_type;
+ u32 expected;
+ u32 actual;
+ u64 sig_err_offset;
+ u32 key;
+};
+
+#endif /* _RDMA_SIGNATURE_H_ */
diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h
deleted file mode 100644
index e2709bb8cb18..000000000000
--- a/include/uapi/rdma/ib_user_cm.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IB_USER_CM_H
-#define IB_USER_CM_H
-
-#include <linux/types.h>
-#include <rdma/ib_user_sa.h>
-
-#define IB_USER_CM_ABI_VERSION 5
-
-enum {
- IB_USER_CM_CMD_CREATE_ID,
- IB_USER_CM_CMD_DESTROY_ID,
- IB_USER_CM_CMD_ATTR_ID,
-
- IB_USER_CM_CMD_LISTEN,
- IB_USER_CM_CMD_NOTIFY,
-
- IB_USER_CM_CMD_SEND_REQ,
- IB_USER_CM_CMD_SEND_REP,
- IB_USER_CM_CMD_SEND_RTU,
- IB_USER_CM_CMD_SEND_DREQ,
- IB_USER_CM_CMD_SEND_DREP,
- IB_USER_CM_CMD_SEND_REJ,
- IB_USER_CM_CMD_SEND_MRA,
- IB_USER_CM_CMD_SEND_LAP,
- IB_USER_CM_CMD_SEND_APR,
- IB_USER_CM_CMD_SEND_SIDR_REQ,
- IB_USER_CM_CMD_SEND_SIDR_REP,
-
- IB_USER_CM_CMD_EVENT,
- IB_USER_CM_CMD_INIT_QP_ATTR,
-};
-/*
- * command ABI structures.
- */
-struct ib_ucm_cmd_hdr {
- __u32 cmd;
- __u16 in;
- __u16 out;
-};
-
-struct ib_ucm_create_id {
- __aligned_u64 uid;
- __aligned_u64 response;
-};
-
-struct ib_ucm_create_id_resp {
- __u32 id;
-};
-
-struct ib_ucm_destroy_id {
- __aligned_u64 response;
- __u32 id;
- __u32 reserved;
-};
-
-struct ib_ucm_destroy_id_resp {
- __u32 events_reported;
-};
-
-struct ib_ucm_attr_id {
- __aligned_u64 response;
- __u32 id;
- __u32 reserved;
-};
-
-struct ib_ucm_attr_id_resp {
- __be64 service_id;
- __be64 service_mask;
- __be32 local_id;
- __be32 remote_id;
-};
-
-struct ib_ucm_init_qp_attr {
- __aligned_u64 response;
- __u32 id;
- __u32 qp_state;
-};
-
-struct ib_ucm_listen {
- __be64 service_id;
- __be64 service_mask;
- __u32 id;
- __u32 reserved;
-};
-
-struct ib_ucm_notify {
- __u32 id;
- __u32 event;
-};
-
-struct ib_ucm_private_data {
- __aligned_u64 data;
- __u32 id;
- __u8 len;
- __u8 reserved[3];
-};
-
-struct ib_ucm_req {
- __u32 id;
- __u32 qpn;
- __u32 qp_type;
- __u32 psn;
- __be64 sid;
- __aligned_u64 data;
- __aligned_u64 primary_path;
- __aligned_u64 alternate_path;
- __u8 len;
- __u8 peer_to_peer;
- __u8 responder_resources;
- __u8 initiator_depth;
- __u8 remote_cm_response_timeout;
- __u8 flow_control;
- __u8 local_cm_response_timeout;
- __u8 retry_count;
- __u8 rnr_retry_count;
- __u8 max_cm_retries;
- __u8 srq;
- __u8 reserved[5];
-};
-
-struct ib_ucm_rep {
- __aligned_u64 uid;
- __aligned_u64 data;
- __u32 id;
- __u32 qpn;
- __u32 psn;
- __u8 len;
- __u8 responder_resources;
- __u8 initiator_depth;
- __u8 target_ack_delay;
- __u8 failover_accepted;
- __u8 flow_control;
- __u8 rnr_retry_count;
- __u8 srq;
- __u8 reserved[4];
-};
-
-struct ib_ucm_info {
- __u32 id;
- __u32 status;
- __aligned_u64 info;
- __aligned_u64 data;
- __u8 info_len;
- __u8 data_len;
- __u8 reserved[6];
-};
-
-struct ib_ucm_mra {
- __aligned_u64 data;
- __u32 id;
- __u8 len;
- __u8 timeout;
- __u8 reserved[2];
-};
-
-struct ib_ucm_lap {
- __aligned_u64 path;
- __aligned_u64 data;
- __u32 id;
- __u8 len;
- __u8 reserved[3];
-};
-
-struct ib_ucm_sidr_req {
- __u32 id;
- __u32 timeout;
- __be64 sid;
- __aligned_u64 data;
- __aligned_u64 path;
- __u16 reserved_pkey;
- __u8 len;
- __u8 max_cm_retries;
- __u8 reserved[4];
-};
-
-struct ib_ucm_sidr_rep {
- __u32 id;
- __u32 qpn;
- __u32 qkey;
- __u32 status;
- __aligned_u64 info;
- __aligned_u64 data;
- __u8 info_len;
- __u8 data_len;
- __u8 reserved[6];
-};
-/*
- * event notification ABI structures.
- */
-struct ib_ucm_event_get {
- __aligned_u64 response;
- __aligned_u64 data;
- __aligned_u64 info;
- __u8 data_len;
- __u8 info_len;
- __u8 reserved[6];
-};
-
-struct ib_ucm_req_event_resp {
- struct ib_user_path_rec primary_path;
- struct ib_user_path_rec alternate_path;
- __be64 remote_ca_guid;
- __u32 remote_qkey;
- __u32 remote_qpn;
- __u32 qp_type;
- __u32 starting_psn;
- __u8 responder_resources;
- __u8 initiator_depth;
- __u8 local_cm_response_timeout;
- __u8 flow_control;
- __u8 remote_cm_response_timeout;
- __u8 retry_count;
- __u8 rnr_retry_count;
- __u8 srq;
- __u8 port;
- __u8 reserved[7];
-};
-
-struct ib_ucm_rep_event_resp {
- __be64 remote_ca_guid;
- __u32 remote_qkey;
- __u32 remote_qpn;
- __u32 starting_psn;
- __u8 responder_resources;
- __u8 initiator_depth;
- __u8 target_ack_delay;
- __u8 failover_accepted;
- __u8 flow_control;
- __u8 rnr_retry_count;
- __u8 srq;
- __u8 reserved[5];
-};
-
-struct ib_ucm_rej_event_resp {
- __u32 reason;
- /* ari in ib_ucm_event_get info field. */
-};
-
-struct ib_ucm_mra_event_resp {
- __u8 timeout;
- __u8 reserved[3];
-};
-
-struct ib_ucm_lap_event_resp {
- struct ib_user_path_rec path;
-};
-
-struct ib_ucm_apr_event_resp {
- __u32 status;
- /* apr info in ib_ucm_event_get info field. */
-};
-
-struct ib_ucm_sidr_req_event_resp {
- __u16 pkey;
- __u8 port;
- __u8 reserved;
-};
-
-struct ib_ucm_sidr_rep_event_resp {
- __u32 status;
- __u32 qkey;
- __u32 qpn;
- /* info in ib_ucm_event_get info field. */
-};
-
-#define IB_UCM_PRES_DATA 0x01
-#define IB_UCM_PRES_INFO 0x02
-#define IB_UCM_PRES_PRIMARY 0x04
-#define IB_UCM_PRES_ALTERNATE 0x08
-
-struct ib_ucm_event_resp {
- __aligned_u64 uid;
- __u32 id;
- __u32 event;
- __u32 present;
- __u32 reserved;
- union {
- struct ib_ucm_req_event_resp req_resp;
- struct ib_ucm_rep_event_resp rep_resp;
- struct ib_ucm_rej_event_resp rej_resp;
- struct ib_ucm_mra_event_resp mra_resp;
- struct ib_ucm_lap_event_resp lap_resp;
- struct ib_ucm_apr_event_resp apr_resp;
-
- struct ib_ucm_sidr_req_event_resp sidr_req_resp;
- struct ib_ucm_sidr_rep_event_resp sidr_rep_resp;
-
- __u32 send_status;
- } u;
-};
-
-#endif /* IB_USER_CM_H */
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index d404c951954c..d0da070cf0ab 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -51,6 +51,7 @@ enum mlx5_ib_devx_methods {
MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT),
MLX5_IB_METHOD_DEVX_QUERY_UAR,
MLX5_IB_METHOD_DEVX_QUERY_EQN,
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
};
enum mlx5_ib_devx_other_attrs {
@@ -93,6 +94,14 @@ enum mlx5_ib_devx_obj_query_async_attrs {
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
};
+enum mlx5_ib_devx_subscribe_event_attrs {
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+};
+
enum mlx5_ib_devx_query_eqn_attrs {
MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
@@ -127,16 +136,26 @@ enum mlx5_ib_devx_async_cmd_fd_alloc_attrs {
MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
};
+enum mlx5_ib_devx_async_event_fd_alloc_attrs {
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+};
+
enum mlx5_ib_devx_async_cmd_fd_methods {
MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
};
+enum mlx5_ib_devx_async_event_fd_methods {
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+};
+
enum mlx5_ib_objects {
MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
MLX5_IB_OBJECT_DEVX_OBJ,
MLX5_IB_OBJECT_DEVX_UMEM,
MLX5_IB_OBJECT_FLOW_MATCHER,
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
};
enum mlx5_ib_flow_matcher_create_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index a8f34c237458..7e9900b0e746 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -63,5 +63,14 @@ enum mlx5_ib_uapi_dm_type {
MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
};
+enum mlx5_ib_uapi_devx_create_event_channel_flags {
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA = 1 << 0,
+};
+
+struct mlx5_ib_uapi_devx_async_event_hdr {
+ __aligned_u64 cookie;
+ __u8 out_data[];
+};
+
#endif
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 41db51367efa..8e277783fa96 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -147,6 +147,18 @@ enum {
IWPM_NLA_HELLO_MAX
};
+/* For RDMA_NLDEV_ATTR_DEV_NODE_TYPE */
+enum {
+ /* IB values map to NodeInfo:NodeType. */
+ RDMA_NODE_IB_CA = 1,
+ RDMA_NODE_IB_SWITCH,
+ RDMA_NODE_IB_ROUTER,
+ RDMA_NODE_RNIC,
+ RDMA_NODE_USNIC,
+ RDMA_NODE_USNIC_UDP,
+ RDMA_NODE_UNSPECIFIED,
+};
+
/*
* Local service operations:
* RESOLVE - The client requests the local service to resolve a path.
@@ -267,11 +279,15 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */
- RDMA_NLDEV_NUM_OPS
-};
+ RDMA_NLDEV_CMD_GET_CHARDEV,
-enum {
- RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+ RDMA_NLDEV_CMD_STAT_SET,
+
+ RDMA_NLDEV_CMD_STAT_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_STAT_DEL,
+
+ RDMA_NLDEV_NUM_OPS
};
enum rdma_nldev_print_type {
@@ -478,10 +494,72 @@ enum rdma_nldev_attr {
* File descriptor handle of the net namespace object
*/
RDMA_NLDEV_NET_NS_FD, /* u32 */
+ /*
+ * Information about a chardev.
+ * CHARDEV_TYPE is the name of the chardev ABI (ie uverbs, umad, etc)
+ * CHARDEV_ABI signals the ABI revision (historical)
+ * CHARDEV_NAME is the kernel name for the /dev/ file (no directory)
+ * CHARDEV is the 64 bit dev_t for the inode
+ */
+ RDMA_NLDEV_ATTR_CHARDEV_TYPE, /* string */
+ RDMA_NLDEV_ATTR_CHARDEV_NAME, /* string */
+ RDMA_NLDEV_ATTR_CHARDEV_ABI, /* u64 */
+ RDMA_NLDEV_ATTR_CHARDEV, /* u64 */
+ RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, /* u64 */
+ /*
+ * Counter-specific attributes.
+ */
+ RDMA_NLDEV_ATTR_STAT_MODE, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_RES, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_COUNTER, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_COUNTER_ID, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTERS, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, /* string */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, /* u64 */
+
+ /*
+ * CQ adaptive moderatio (DIM)
+ */
+ RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */
/*
* Always the end
*/
RDMA_NLDEV_ATTR_MAX
};
+
+/*
+ * Supported counter bind modes. All modes are mutual-exclusive.
+ */
+enum rdma_nl_counter_mode {
+ RDMA_COUNTER_MODE_NONE,
+
+ /*
+ * A qp is bound with a counter automatically during initialization
+ * based on the auto mode (e.g., qp type, ...)
+ */
+ RDMA_COUNTER_MODE_AUTO,
+
+ /*
+ * Which qp are bound with which counter is explicitly specified
+ * by the user
+ */
+ RDMA_COUNTER_MODE_MANUAL,
+
+ /*
+ * Always the end
+ */
+ RDMA_COUNTER_MODE_MAX,
+};
+
+/*
+ * Supported criteria in counter auto mode.
+ * Currently only "qp type" is supported
+ */
+enum rdma_nl_counter_mask {
+ RDMA_COUNTER_MASK_QP_TYPE = 1,
+};
#endif /* _UAPI_RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 26213f49f5c8..64c14cb0022f 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -103,6 +103,7 @@ enum rdma_driver_id {
RDMA_DRIVER_HFI1,
RDMA_DRIVER_QIB,
RDMA_DRIVER_EFA,
+ RDMA_DRIVER_SIW,
};
#endif
diff --git a/include/uapi/rdma/rvt-abi.h b/include/uapi/rdma/rvt-abi.h
new file mode 100644
index 000000000000..7328293c715c
--- /dev/null
+++ b/include/uapi/rdma/rvt-abi.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+
+/*
+ * This file contains defines, structures, etc. that are used
+ * to communicate between kernel and user code.
+ */
+
+#ifndef RVT_ABI_USER_H
+#define RVT_ABI_USER_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_verbs.h>
+#ifndef RDMA_ATOMIC_UAPI
+#define RDMA_ATOMIC_UAPI(_type, _name) struct{ _type val; } _name
+#endif
+
+struct rvt_wqe_sge {
+ __aligned_u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct rvt_cq_wc {
+ /* index of next entry to fill */
+ RDMA_ATOMIC_UAPI(__u32, head);
+ /* index of next ib_poll_cq() entry */
+ RDMA_ATOMIC_UAPI(__u32, tail);
+
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct rvt_rwqe {
+ __u64 wr_id;
+ __u8 num_sge;
+ __u8 padding[7];
+ struct rvt_wqe_sge sg_list[];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() for user space and rvt_get_rwqe_ptr()
+ * for kernel space.
+ */
+struct rvt_rwq {
+ /* new work requests posted to the head */
+ RDMA_ATOMIC_UAPI(__u32, head);
+ /* receives pull requests from here. */
+ RDMA_ATOMIC_UAPI(__u32, tail);
+ struct rvt_rwqe wq[];
+};
+#endif /* RVT_ABI_USER_H */
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
new file mode 100644
index 000000000000..3dd8071ace7b
--- /dev/null
+++ b/include/uapi/rdma/siw-abi.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#ifndef _SIW_USER_H
+#define _SIW_USER_H
+
+#include <linux/types.h>
+
+#define SIW_NODE_DESC_COMMON "Software iWARP stack"
+#define SIW_ABI_VERSION 1
+#define SIW_MAX_SGE 6
+#define SIW_UOBJ_MAX_KEY 0x08FFFF
+#define SIW_INVAL_UOBJ_KEY (SIW_UOBJ_MAX_KEY + 1)
+
+struct siw_uresp_create_cq {
+ __u32 cq_id;
+ __u32 num_cqe;
+ __aligned_u64 cq_key;
+};
+
+struct siw_uresp_create_qp {
+ __u32 qp_id;
+ __u32 num_sqe;
+ __u32 num_rqe;
+ __u32 pad;
+ __aligned_u64 sq_key;
+ __aligned_u64 rq_key;
+};
+
+struct siw_ureq_reg_mr {
+ __u8 stag_key;
+ __u8 reserved[3];
+ __u32 pad;
+};
+
+struct siw_uresp_reg_mr {
+ __u32 stag;
+ __u32 pad;
+};
+
+struct siw_uresp_create_srq {
+ __u32 num_rqe;
+ __u32 pad;
+ __aligned_u64 srq_key;
+};
+
+struct siw_uresp_alloc_ctx {
+ __u32 dev_id;
+ __u32 pad;
+};
+
+enum siw_opcode {
+ SIW_OP_WRITE,
+ SIW_OP_READ,
+ SIW_OP_READ_LOCAL_INV,
+ SIW_OP_SEND,
+ SIW_OP_SEND_WITH_IMM,
+ SIW_OP_SEND_REMOTE_INV,
+
+ /* Unsupported */
+ SIW_OP_FETCH_AND_ADD,
+ SIW_OP_COMP_AND_SWAP,
+
+ SIW_OP_RECEIVE,
+ /* provider internal SQE */
+ SIW_OP_READ_RESPONSE,
+ /*
+ * below opcodes valid for
+ * in-kernel clients only
+ */
+ SIW_OP_INVAL_STAG,
+ SIW_OP_REG_MR,
+ SIW_NUM_OPCODES
+};
+
+/* Keep it same as ibv_sge to allow for memcpy */
+struct siw_sge {
+ __aligned_u64 laddr;
+ __u32 length;
+ __u32 lkey;
+};
+
+/*
+ * Inline data are kept within the work request itself occupying
+ * the space of sge[1] .. sge[n]. Therefore, inline data cannot be
+ * supported if SIW_MAX_SGE is below 2 elements.
+ */
+#define SIW_MAX_INLINE (sizeof(struct siw_sge) * (SIW_MAX_SGE - 1))
+
+#if SIW_MAX_SGE < 2
+#error "SIW_MAX_SGE must be at least 2"
+#endif
+
+enum siw_wqe_flags {
+ SIW_WQE_VALID = 1,
+ SIW_WQE_INLINE = (1 << 1),
+ SIW_WQE_SIGNALLED = (1 << 2),
+ SIW_WQE_SOLICITED = (1 << 3),
+ SIW_WQE_READ_FENCE = (1 << 4),
+ SIW_WQE_REM_INVAL = (1 << 5),
+ SIW_WQE_COMPLETED = (1 << 6)
+};
+
+/* Send Queue Element */
+struct siw_sqe {
+ __aligned_u64 id;
+ __u16 flags;
+ __u8 num_sge;
+ /* Contains enum siw_opcode values */
+ __u8 opcode;
+ __u32 rkey;
+ union {
+ __aligned_u64 raddr;
+ __aligned_u64 base_mr;
+ };
+ union {
+ struct siw_sge sge[SIW_MAX_SGE];
+ __aligned_u64 access;
+ };
+};
+
+/* Receive Queue Element */
+struct siw_rqe {
+ __aligned_u64 id;
+ __u16 flags;
+ __u8 num_sge;
+ /*
+ * only used by kernel driver,
+ * ignored if set by user
+ */
+ __u8 opcode;
+ __u32 unused;
+ struct siw_sge sge[SIW_MAX_SGE];
+};
+
+enum siw_notify_flags {
+ SIW_NOTIFY_NOT = (0),
+ SIW_NOTIFY_SOLICITED = (1 << 0),
+ SIW_NOTIFY_NEXT_COMPLETION = (1 << 1),
+ SIW_NOTIFY_MISSED_EVENTS = (1 << 2),
+ SIW_NOTIFY_ALL = SIW_NOTIFY_SOLICITED | SIW_NOTIFY_NEXT_COMPLETION |
+ SIW_NOTIFY_MISSED_EVENTS
+};
+
+enum siw_wc_status {
+ SIW_WC_SUCCESS,
+ SIW_WC_LOC_LEN_ERR,
+ SIW_WC_LOC_PROT_ERR,
+ SIW_WC_LOC_QP_OP_ERR,
+ SIW_WC_WR_FLUSH_ERR,
+ SIW_WC_BAD_RESP_ERR,
+ SIW_WC_LOC_ACCESS_ERR,
+ SIW_WC_REM_ACCESS_ERR,
+ SIW_WC_REM_INV_REQ_ERR,
+ SIW_WC_GENERAL_ERR,
+ SIW_NUM_WC_STATUS
+};
+
+struct siw_cqe {
+ __aligned_u64 id;
+ __u8 flags;
+ __u8 opcode;
+ __u16 status;
+ __u32 bytes;
+ union {
+ __aligned_u64 imm_data;
+ __u32 inval_stag;
+ };
+ /* QP number or QP pointer */
+ union {
+ struct ib_qp *base_qp;
+ __aligned_u64 qp_id;
+ };
+};
+
+/*
+ * Shared structure between user and kernel
+ * to control CQ arming.
+ */
+struct siw_cq_ctrl {
+ __aligned_u64 notify;
+};
+#endif