From eeb8461e36c99fdf2d058751be924a2aab215005 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jan 2014 13:40:15 +0200 Subject: IB: Refactor umem to use linear SG table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch refactors the IB core umem code and vendor drivers to use a linear (chained) SG table instead of chunk list. With this change the relevant code becomes clearer—no need for nested loops to build and use umem. Signed-off-by: Shachar Raindel Signed-off-by: Yishai Hadas Signed-off-by: Roland Dreier --- include/rdma/ib_umem.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 9ee0d2e51b16..1ea0b65c4cfb 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -46,17 +46,12 @@ struct ib_umem { int page_size; int writable; int hugetlb; - struct list_head chunk_list; struct work_struct work; struct mm_struct *mm; unsigned long diff; -}; - -struct ib_umem_chunk { - struct list_head list; - int nents; - int nmap; - struct scatterlist page_list[0]; + struct sg_table sg_head; + int nmap; + int npages; }; #ifdef CONFIG_INFINIBAND_USER_MEM -- cgit v1.2.3 From 17cd3a2db825506c3e3bb9548ad20f67e2f8d0e7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:04 +0200 Subject: IB/core: Introduce protected memory regions This commit introduces verbs for creating/destoying memory regions which will allow new types of memory key operations such as protected memory registration. Indirect memory registration is registering several (one of more) pre-registered memory regions in a specific layout. The Indirect region may potentialy describe several regions and some repitition format between them. Protected Memory registration is registering a memory region with various data integrity attributes which will describe protection schemes that will be handled by the HCA in an offloaded manner. These memory regions will be applicable for a new REG_SIG_MR work request introduced later in this patchset. In the future these routines may replace or implement current memory regions creation routines existing today: - ib_reg_user_mr - ib_alloc_fast_reg_mr - ib_get_dma_mr - ib_dereg_mr Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 39 +++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3ac795115438..ca8ce7d097e3 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1169,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct ib_mr *mr; + + if (!pd->device->create_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->create_mr(pd, mr_init_attr); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + ret = mr->device->destroy_mr(mr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); + struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct ib_mr *mr; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6793f32ccb58..cb12e6a4e553 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -461,6 +461,22 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; */ int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +enum ib_mr_create_flags { + IB_MR_SIGNATURE_EN = 1, +}; + +/** + * ib_mr_init_attr - Memory region init attributes passed to routine + * ib_create_mr. + * @max_reg_descriptors: max number of registration descriptors that + * may be used with registration work requests. + * @flags: MR creation flags bit mask. + */ +struct ib_mr_init_attr { + int max_reg_descriptors; + u32 flags; +}; + /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. @@ -1407,6 +1423,9 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); + int (*destroy_mr)(struct ib_mr *mr); + struct ib_mr * (*create_mr)(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, @@ -2250,6 +2269,25 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); */ int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_create_mr - Allocates a memory region that may be used for + * signature handover operations. + * @pd: The protection domain associated with the region. + * @mr_init_attr: memory region init attributes. + */ +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); + +/** + * ib_destroy_mr - Destroys a memory region that was created using + * ib_create_mr and removes it from HW translation tables. + * @mr: The memory region to destroy. + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_destroy_mr(struct ib_mr *mr); + /** * ib_alloc_fast_reg_mr - Allocates memory region usable with the * IB_WR_FAST_REG_MR send work request. -- cgit v1.2.3 From 1b01d33560e78417334c2dc673bbfac6c644424c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:05 +0200 Subject: IB/core: Introduce signature verbs API Introduce a verbs interface for signature-related operations. A signature handover operation configures the layouts of data and protection attributes both in memory and wire domains. Signature operations are: - INSERT: Generate and insert protection information when handing over data from input space to output space. - validate and STRIP: Validate protection information and remove it when handing over data from input space to output space. - validate and PASS: Validate protection information and pass it when handing over data from input space to output space. Once the signature handover opration is done, the HCA will offload data integrity generation/validation while performing the actual data transfer. Additions: 1. HCA signature capabilities in device attributes Verbs provider supporting signature handover operations fills relevant fields in device attributes structure returned by ib_query_device. 2. QP creation flag IB_QP_CREATE_SIGNATURE_EN Creating a QP that will carry signature handover operations may require some special preparations from the verbs provider. So we add QP creation flag IB_QP_CREATE_SIGNATURE_EN to declare that the created QP may carry out signature handover operations. Expose signature support to verbs layer (no support for now). 3. New send work request IB_WR_REG_SIG_MR Signature handover work request. This WR will define the signature handover properties of the memory/wire domains as well as the domains layout. The purpose of this work request is to bind all the needed information for the signature operation: - data to be transferred: wr->sg_list (ib_sge). * The raw data, pre-registered to a single MR (normally, before signature, this MR would have been used directly for the data transfer) - data protection guards: sig_handover.prot (ib_sge). * The data protection buffer, pre-registered to a single MR, which contains the data integrity guards of the raw data blocks. Note that it may not always exist, only in cases where the user is interested in storing protection guards in memory. - signature operation attributes: sig_handover.sig_attrs. * Tells the HCA how to validate/generate the protection information. Once the work request is executed, the memory region that will describe the signature transaction will be the sig_mr. The application can now go ahead and send the sig_mr.rkey or use the sig_mr.lkey for data transfer. 4. New Verb ib_check_mr_status check_mr_status verb checks the status of the memory region post transaction. The first check that may be used is IB_MR_CHECK_SIG_STATUS, which will indicate if any signature errors are pending for a specific signature-enabled ib_mr. This verb is a lightwight check and is allowed to be taken from interrupt context. An application must call this verb after it is known that the actual data transfer has finished. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 8 +++ include/rdma/ib_verbs.h | 149 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ca8ce7d097e3..92525f855d82 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1437,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id) return err; } EXPORT_SYMBOL(ib_destroy_flow); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + return mr->device->check_mr_status ? + mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cb12e6a4e553..82ab5c1e7605 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -122,7 +122,19 @@ enum ib_device_cap_flags { IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), - IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29) + IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) +}; + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, }; enum ib_atomic_cap { @@ -172,6 +184,8 @@ struct ib_device_attr { unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; + int sig_prot_cap; + int sig_guard_cap; }; enum ib_mtu { @@ -477,6 +491,114 @@ struct ib_mr_init_attr { u32 flags; }; +enum ib_signature_type { + IB_SIG_TYPE_T10_DIF, +}; + +/** + * T10-DIF Signature types + * T10-DIF types are defined by SCSI + * specifications. + */ +enum ib_t10_dif_type { + IB_T10DIF_NONE, + IB_T10DIF_TYPE1, + IB_T10DIF_TYPE2, + IB_T10DIF_TYPE3 +}; + +/** + * Signature T10-DIF block-guard types + * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @type: T10-DIF type (0|1|2|3) + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @type3_inc_reftag: T10-DIF type 3 does not state + * about the reference tag, it is the user + * choice to increment it or not. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_type type; + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool type3_inc_reftag; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout desciptor. + * @wire: wire domain layout desciptor. + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/** + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +enum ib_mr_status_check { + IB_MR_CHECK_SIG_STATUS = 1, +}; + +/** + * struct ib_mr_status - Memory region status container + * + * @fail_status: Bitmask of MR checks status. For each + * failed check a corresponding status bit is set. + * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS + * failure. + */ +struct ib_mr_status { + u32 fail_status; + struct ib_sig_err sig_err; +}; + /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. @@ -660,6 +782,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 5, + IB_QP_CREATE_SIGNATURE_EN = 1 << 6, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -824,6 +947,7 @@ enum ib_wr_opcode { IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, + IB_WR_REG_SIG_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ @@ -929,6 +1053,12 @@ struct ib_send_wr { u32 rkey; struct ib_mw_bind_info bind_info; } bind_mw; + struct { + struct ib_sig_attrs *sig_attrs; + struct ib_mr *sig_mr; + int access_flags; + struct ib_sge *prot; + } sig_handover; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; @@ -1474,6 +1604,8 @@ struct ib_device { *flow_attr, int domain); int (*destroy_flow)(struct ib_flow *flow_id); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); struct ib_dma_mapping_ops *dma_ops; @@ -2473,4 +2605,19 @@ static inline int ib_check_mr_access(int flags) return 0; } +/** + * ib_check_mr_status: lightweight check of MR status. + * This routine may provide status checks on a selected + * ib_mr. first use is for signature status check. + * + * @mr: A memory region. + * @check_mask: Bitmask of which checks to perform from + * ib_mr_status_check enumeration. + * @mr_status: The container of relevant status checks. + * failed checks will be indicated in the status bitmask + * and the relevant info shall be in the error item. + */ +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 3121e3c441b5eccdd15e6c320ec32215b334b9ec Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:06 +0200 Subject: mlx5: Implement create_mr and destroy_mr Support create_mr and destroy_mr verbs. Creating ib_mr may be done for either ib_mr that will register regular page lists like alloc_fast_reg_mr routine, or indirect ib_mrs that can register other (pre-registered) ib_mrs in an indirect manner. In addition user may request signature enable, that will mean that the created ib_mr may be attached with signature attributes (BSF, PSVs). Currently we only allow direct/indirect registration modes. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 + drivers/infiniband/hw/mlx5/mr.c | 111 +++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 61 +++++++++++++++ include/linux/mlx5/device.h | 25 ++++++ include/linux/mlx5/driver.h | 19 +++++ 6 files changed, 222 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aa03e732b6a8..7260a299c6db 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1423,9 +1423,11 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 389e31965773..79c4f14c4d3e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -265,6 +265,7 @@ struct mlx5_ib_mr { enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; }; struct mlx5_ib_fast_reg_page_list { @@ -495,6 +496,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7c95ca1f0c25..032445c47608 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -992,6 +992,117 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 35e514dc7b7d..bb746bbe73c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -144,3 +144,64 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); + +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index) +{ + struct mlx5_allocate_psv_in in; + struct mlx5_allocate_psv_out out; + int i, err; + + if (npsvs > MLX5_MAX_PSVS) + return -EINVAL; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV); + in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) { + mlx5_core_err(dev, "cmd exec failed %d\n", err); + return err; + } + + if (out.hdr.status) { + mlx5_core_err(dev, "create_psv bad status %d\n", out.hdr.status); + return mlx5_cmd_status_to_err(&out.hdr); + } + + for (i = 0; i < npsvs; i++) + sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff; + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_psv); + +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) +{ + struct mlx5_destroy_psv_in in; + struct mlx5_destroy_psv_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.psv_number = cpu_to_be32(psv_num); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) { + mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err); + goto out; + } + + if (out.hdr.status) { + mlx5_core_err(dev, "destroy_psv bad status %d\n", out.hdr.status); + err = mlx5_cmd_status_to_err(&out.hdr); + goto out; + } + +out: + return err; +} +EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 817a6fae6d2c..f714fc427765 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -48,6 +48,8 @@ enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, MLX5_PCI_CMD_XPORT = 7, + MLX5_MKEY_BSF_OCTO_SIZE = 4, + MLX5_MAX_PSVS = 4, }; enum { @@ -936,4 +938,27 @@ enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; +struct mlx5_allocate_psv_in { + struct mlx5_inbox_hdr hdr; + __be32 npsv_pd; + __be32 rsvd_psv0; +}; + +struct mlx5_allocate_psv_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + __be32 psv_idx[4]; +}; + +struct mlx5_destroy_psv_in { + struct mlx5_inbox_hdr hdr; + __be32 psv_number; + u8 rsvd[4]; +}; + +struct mlx5_destroy_psv_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 130bc8d77fa5..e1cb657ccade 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -401,6 +401,22 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_core_psv { + u32 psv_idx; + struct psv_layout { + u32 pd; + u16 syndrome; + u16 reserved; + u16 bg; + u16 app_tag; + u32 ref_tag; + } psv; +}; + +struct mlx5_core_sig_ctx { + struct mlx5_core_psv psv_memory; + struct mlx5_core_psv psv_wire; +}; struct mlx5_core_mr { u64 iova; @@ -746,6 +762,9 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index); +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); static inline u32 mlx5_mkey_to_idx(u32 mkey) { -- cgit v1.2.3 From e1e66cc26457c2e9412f67618646ec2a441fc409 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:07 +0200 Subject: IB/mlx5: Initialize mlx5_ib_qp signature-related members If user requested signature enable we initialize relevant mlx5_ib_qp members. We mark the qp as sig_enable and we increase the effective SQ size, but still limit the user max_send_wr to original size computed. We also allow the create_qp routine to accept sig_enable create flag. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 12 +++++++++--- include/linux/mlx5/qp.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 79c4f14c4d3e..e438f08899ae 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,6 +189,9 @@ struct mlx5_ib_qp { int create_type; u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7dfe8a1c84cf..01999f3744fb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { @@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags) + if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index d51eff713549..152756eaa8a3 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -37,6 +37,7 @@ #include #define MLX5_INVALID_LKEY 0x100 +#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, -- cgit v1.2.3 From 3bcdb17a5e88288ead90be3c107e754a6075a5b0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:10 +0200 Subject: IB/mlx5: Keep mlx5 MRs in a radix tree under device This will be useful when processing signature errors on a specific key. The mlx5 driver will lookup the matching mlx5 memory region structure and mark it as dirty (contains signature errors). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/mr.c | 24 ++++++++++++++++++++++++ include/linux/mlx5/driver.h | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a064f06e0cb8..96a0617f7609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -446,6 +446,7 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_init_cq_table(dev); mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); + mlx5_init_mr_table(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index bb746bbe73c6..4cc927649404 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,11 +36,24 @@ #include #include "mlx5_core.h" +void mlx5_init_mr_table(struct mlx5_core_dev *dev) +{ + struct mlx5_mr_table *table = &dev->priv.mr_table; + + rwlock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) +{ +} + int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out) { + struct mlx5_mr_table *table = &dev->priv.mr_table; struct mlx5_create_mkey_mbox_out lout; int err; u8 key; @@ -73,14 +86,21 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(lout.mkey), key, mr->key); + /* connect to MR tree */ + write_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); + write_unlock_irq(&table->lock); + return err; } EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) { + struct mlx5_mr_table *table = &dev->priv.mr_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; + unsigned long flags; int err; memset(&in, 0, sizeof(in)); @@ -95,6 +115,10 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) if (out.hdr.status) return mlx5_cmd_status_to_err(&out.hdr); + write_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + write_unlock_irqrestore(&table->lock, flags); + return err; } EXPORT_SYMBOL(mlx5_core_destroy_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e1cb657ccade..e562e01e59c7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -491,6 +491,13 @@ struct mlx5_srq_table { struct radix_tree_root tree; }; +struct mlx5_mr_table { + /* protect radix tree + */ + rwlock_t lock; + struct radix_tree_root tree; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -520,6 +527,10 @@ struct mlx5_priv { struct mlx5_cq_table cq_table; /* end: cq staff */ + /* start: mr staff */ + struct mlx5_mr_table mr_table; + /* end: mr staff */ + /* start: alloc staff */ struct mutex pgdir_mutex; struct list_head pgdir_list; @@ -667,6 +678,11 @@ static inline void mlx5_vfree(const void *addr) kfree(addr); } +static inline u32 mlx5_base_mkey(const u32 key) +{ + return key & 0xffffff00u; +} + int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev); void mlx5_dev_cleanup(struct mlx5_core_dev *dev); int mlx5_cmd_init(struct mlx5_core_dev *dev); @@ -701,6 +717,8 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); +void mlx5_init_mr_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, -- cgit v1.2.3 From e6631814fb3ac454fbbf47ea343c2b9508e4e1ba Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:11 +0200 Subject: IB/mlx5: Support IB_WR_REG_SIG_MR This patch implements IB_WR_REG_SIG_MR posted by the user. Baisically this WR involves 3 WQEs in order to prepare and properly register the signature layout: 1. post UMR WR to register the sig_mr in one of two possible ways: * In case the user registered a single MR for data so the UMR data segment consists of: - single klm (data MR) passed by the user - BSF with signature attributes requested by the user. * In case the user registered 2 MRs, one for data and one for protection, the UMR consists of: - strided block format which includes data and protection MRs and their repetitive block format. - BSF with signature attributes requested by the user. 2. post SET_PSV in order to set the memory domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. 3. post SET_PSV in order to set the wire domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. * After this compound WR we place a small fence for next WR to come. This patch also introduces some helper functions to set the BSF correctly and determining the signature format selectors. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 422 ++++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/qp.h | 61 ++++++ 2 files changed, 483 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1dbadbfc4474..67e79989b181 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1777,6 +1777,26 @@ static __be64 frwr_mkey_mask(void) return cpu_to_be64(result); } +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1961,6 +1981,339 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, return 0; } +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask = 0xff; + else + basic->wire.copy_byte_mask = 0x3f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot) { + /** + * Source domain doesn't contain signature information + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + + if (prot_key == data_key && prot_va == data_va) { + /** + * The data and protection are interleaved + * in a single memory region + **/ + prot_sentry->va = cpu_to_be64(data_va + block_size); + prot_sentry->stride = cpu_to_be16(block_size + prot_size); + data_sentry->stride = prot_sentry->stride; + } else { + /* The data and protection are two different buffers */ + prot_sentry->va = cpu_to_be64(prot_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->stride = cpu_to_be16(prot_size); + } + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -2108,6 +2461,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = &dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf = qp->bf; @@ -2203,6 +2557,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + default: break; } @@ -2286,6 +2707,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, mlx5_ib_opcode[wr->opcode]); +skip_psv: if (0) dump_wqe(qp, idx, size); } diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 152756eaa8a3..49af74f90ef9 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -38,6 +38,8 @@ #define MLX5_INVALID_LKEY 0x100 #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) +#define MLX5_DIF_SIZE 8 +#define MLX5_STRIDE_BLOCK_OP 0x400 enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, @@ -152,6 +154,11 @@ enum { MLX5_SND_DBR = 1, }; +enum { + MLX5_FLAGS_INLINE = 1<<7, + MLX5_FLAGS_CHECK_FREE = 1<<5, +}; + struct mlx5_wqe_fmr_seg { __be32 flags; __be32 mem_key; @@ -279,6 +286,60 @@ struct mlx5_wqe_inline_seg { __be32 byte_count; }; +struct mlx5_bsf { + struct mlx5_bsf_basic { + u8 bsf_size_sbs; + u8 check_byte_mask; + union { + u8 copy_byte_mask; + u8 bs_selector; + u8 rsvd_wflags; + } wire; + union { + u8 bs_selector; + u8 rsvd_mflags; + } mem; + __be32 raw_data_size; + __be32 w_bfs_psv; + __be32 m_bfs_psv; + } basic; + struct mlx5_bsf_ext { + __be32 t_init_gen_pro_size; + __be32 rsvd_epi_size; + __be32 w_tfs_psv; + __be32 m_tfs_psv; + } ext; + struct mlx5_bsf_inl { + __be32 w_inl_vld; + __be32 w_rsvd; + __be64 w_block_format; + __be32 m_inl_vld; + __be32 m_rsvd; + __be64 m_block_format; + } inl; +}; + +struct mlx5_klm { + __be32 bcount; + __be32 key; + __be64 va; +}; + +struct mlx5_stride_block_entry { + __be16 stride; + __be16 bcount; + __be32 key; + __be64 va; +}; + +struct mlx5_stride_block_ctrl_seg { + __be32 bcount_per_cycle; + __be32 op; + __be32 repeat_count; + u16 rsvd; + __be16 num_entries; +}; + struct mlx5_core_qp { void (*event) (struct mlx5_core_qp *, int); int qpn; -- cgit v1.2.3 From d5436ba01075ef4629015f7a00914d64ffd795d6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:12 +0200 Subject: IB/mlx5: Collect signature error completion This commit takes care of the generated signature error CQE generated by the HW (if happened). The underlying mlx5 driver will handle signature error completions and will mark the relevant memory region as dirty. Once the consumer gets the completion for the transaction, it must check for signature errors on signature memory region using a new lightweight verb ib_check_mr_status(). In case the user doesn't check for signature error (i.e. doesn't call ib_check_mr_status() with status check IB_MR_CHECK_SIG_STATUS), the memory region cannot be used for another signature operation (REG_SIG_MR work request will fail). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 62 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 ++++ drivers/infiniband/hw/mlx5/mr.c | 46 ++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 8 +++-- include/linux/mlx5/cq.h | 1 + include/linux/mlx5/device.h | 18 +++++++++++ include/linux/mlx5/driver.h | 4 +++ include/linux/mlx5/qp.h | 5 +++ 9 files changed, 150 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b1705ce6eb88..62bb6b49dc1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) mlx5_buf_free(&dev->mdev, &buf->buf); } +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -475,6 +510,33 @@ repoll: } } break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; } return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7260a299c6db..ba3ecec7fa65 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1431,6 +1431,7 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e438f08899ae..50541586e0a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -400,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp, mqp); } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -537,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 032445c47608..81392b26d078 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1038,6 +1038,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; } in->seg.flags = MLX5_PERM_UMR_EN | access_mode; @@ -1188,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 67e79989b181..ae788d27b93f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1784,6 +1784,7 @@ static __be64 sig_mkey_mask(void) result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | MLX5_MKEY_MASK_EN_RINVAL | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_LR | @@ -2219,13 +2220,14 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, { struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); @@ -2255,7 +2257,8 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (unlikely(wr->num_sge != 1) || unlikely(wr->wr.sig_handover.access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ @@ -2286,6 +2289,7 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (ret) return ret; + sig_mr->sig->sig_status_checked = false; return 0; } diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2202c7f72b75..f6b17ac601bd 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -80,6 +80,7 @@ enum { MLX5_CQE_RESP_SEND_IMM = 3, MLX5_CQE_RESP_SEND_INV = 4, MLX5_CQE_RESIZE_CQ = 5, + MLX5_CQE_SIG_ERR = 12, MLX5_CQE_REQ_ERR = 13, MLX5_CQE_RESP_ERR = 14, MLX5_CQE_INVALID = 15, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f714fc427765..407bdb67fd4f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -118,6 +118,7 @@ enum { MLX5_MKEY_MASK_START_ADDR = 1ull << 6, MLX5_MKEY_MASK_PD = 1ull << 7, MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, + MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9, MLX5_MKEY_MASK_BSF_EN = 1ull << 12, MLX5_MKEY_MASK_KEY = 1ull << 13, MLX5_MKEY_MASK_QPN = 1ull << 14, @@ -557,6 +558,23 @@ struct mlx5_cqe64 { u8 op_own; }; +struct mlx5_sig_err_cqe { + u8 rsvd0[16]; + __be32 expected_trans_sig; + __be32 actual_trans_sig; + __be32 expected_reftag; + __be32 actual_reftag; + __be16 syndrome; + u8 rsvd22[2]; + __be32 mkey; + __be64 err_offset; + u8 rsvd30[8]; + __be32 qpn; + u8 rsvd38[2]; + u8 signature; + u8 op_own; +}; + struct mlx5_wqe_srq_next_seg { u8 rsvd0[2]; __be16 next_wqe_index; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e562e01e59c7..93cef6313e72 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -416,6 +416,10 @@ struct mlx5_core_psv { struct mlx5_core_sig_ctx { struct mlx5_core_psv psv_memory; struct mlx5_core_psv psv_wire; + struct ib_sig_err err_item; + bool sig_status_checked; + bool sig_err_exists; + u32 sigerr_count; }; struct mlx5_core_mr { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 49af74f90ef9..f829ad80ff28 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -506,6 +506,11 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } +static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) +{ + return radix_tree_lookup(&dev->priv.mr_table.tree, key); +} + int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, -- cgit v1.2.3 From 55e51eda4820ec5a1c1fc8693a51029f74eac2b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:49 +0200 Subject: SCSI/libiscsi: Add check_protection callback for transports iSCSI needs to be at least aware that a task involves protection information. In case it does, after the transaction completed libiscsi will ask the transport to check the protection status of the transaction. Unlike transport errors, DIF errors should not prevent successful completion of the transaction from the transport point of view, but should be escelated to scsi mid-layer when constructing the scsi result and sense data. check_protection routine will return the ascq corresponding to the DIF error that occured (or 0 if no error happened). return ascq: - 0x1: GUARD_CHECK_FAILED - 0x2: APPTAG_CHECK_FAILED - 0x3: REFTAG_CHECK_FAILED Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/scsi/libiscsi.c | 32 ++++++++++++++++++++++++++++++++ include/scsi/libiscsi.h | 4 ++++ include/scsi/scsi_transport_iscsi.h | 1 + 3 files changed, 37 insertions(+) (limited to 'include') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 40462415291e..3c11acf67849 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } + + if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + task->protected = true; + if (sc->sc_data_direction == DMA_TO_DEVICE) { unsigned out_len = scsi_out(sc)->length; struct iscsi_r2t_info *r2t = &task->unsol_r2t; @@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc->result = (DID_OK << 16) | rhdr->cmd_status; + if (task->protected) { + sector_t sector; + u8 ascq; + + /** + * Transports that didn't implement check_protection + * callback but still published T10-PI support to scsi-mid + * deserve this BUG_ON. + **/ + BUG_ON(!session->tt->check_protection); + + ascq = session->tt->check_protection(task, §or); + if (ascq) { + sc->result = DRIVER_SENSE << 24 | + SAM_STAT_CHECK_CONDITION; + scsi_build_sense_buffer(1, sc->sense_buffer, + ILLEGAL_REQUEST, 0x10, ascq); + sc->sense_buffer[7] = 0xc; /* Additional sense length */ + sc->sense_buffer[8] = 0; /* Information desc type */ + sc->sense_buffer[9] = 0xa; /* Additional desc length */ + sc->sense_buffer[10] = 0x80; /* Validity bit */ + + put_unaligned_be64(sector, &sc->sense_buffer[12]); + goto out; + } + } + if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) { sc->result = DID_ERROR << 16; goto out; @@ -1567,6 +1598,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn, task->have_checked_conn = false; task->last_timeout = jiffies; task->last_xfer = jiffies; + task->protected = false; INIT_LIST_HEAD(&task->running); return task; } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 309f51336fb9..1457c26dfc58 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -133,6 +133,10 @@ struct iscsi_task { unsigned long last_xfer; unsigned long last_timeout; bool have_checked_conn; + + /* T10 protection information */ + bool protected; + /* state set/tested under session->lock */ int state; atomic_t refcount; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 88640a47216c..2555ee5343fd 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -167,6 +167,7 @@ struct iscsi_transport { struct iscsi_bus_flash_conn *fnode_conn); int (*logout_flashnode_sid) (struct iscsi_cls_session *cls_sess); int (*get_host_stats) (struct Scsi_Host *shost, char *buf, int len); + u8 (*check_protection)(struct iscsi_task *task, sector_t *sector); }; /* -- cgit v1.2.3 From 6751360514b604f2d69d00e64b5de0007e01e36e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:51:19 +0100 Subject: scsi_transport_srp: Fix two kernel-doc warnings This patch fixes the following two kernel-doc warnings: Warning(drivers/scsi/scsi_transport_srp.c:819): No description found for parameter 'rport' Warning(include/scsi/scsi_transport_srp.h:75): Excess struct/union/enum/typedef member 'deleted' description in 'srp_rport' Signed-off-by: Bart Van Assche Reported-by: Masanari Iida Acked-by: Sebastian Riemer Signed-off-by: Roland Dreier --- drivers/scsi/scsi_transport_srp.c | 1 + include/scsi/scsi_transport_srp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index d47ffc8d3e43..13e898332e45 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -810,6 +810,7 @@ EXPORT_SYMBOL_GPL(srp_remove_host); /** * srp_stop_rport_timers - stop the transport layer recovery timers + * @rport: SRP remote port for which to stop the timers. * * Must be called after srp_remove_host() and scsi_remove_host(). The caller * must hold a reference on the rport (rport->dev) and on the SCSI host diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index b11da5c1331e..cdb05dd1d440 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -41,7 +41,6 @@ enum srp_rport_state { * @mutex: Protects against concurrent rport reconnect / * fast_io_fail / dev_loss_tmo activity. * @state: rport state. - * @deleted: Whether or not srp_rport_del() has already been invoked. * @reconnect_delay: Reconnect delay in seconds. * @failed_reconnects: Number of failed reconnect attempts. * @reconnect_work: Work structure used for scheduling reconnect attempts. -- cgit v1.2.3 From ea58a595657db88f55b5159442fdf0e34e1b4d95 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:59 -0400 Subject: IB/core: Remove overload in ib_sg_dma* The code is replaced by driver specific changes and avoids the pointer NULL test for drivers that don't overload these operations. Suggested-by: Reviewed-by: Dennis Dalessandro Tested-by: Vinod Kumar Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6793f32ccb58..57777167dea7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1266,10 +1266,6 @@ struct ib_dma_mapping_ops { void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); - u64 (*dma_address)(struct ib_device *dev, - struct scatterlist *sg); - unsigned int (*dma_len)(struct ib_device *dev, - struct scatterlist *sg); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, @@ -2089,12 +2085,13 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_address() into sg_dma_address(). */ static inline u64 ib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_address(dev, sg); return sg_dma_address(sg); } @@ -2102,12 +2099,13 @@ static inline u64 ib_sg_dma_address(struct ib_device *dev, * ib_sg_dma_len - Return the DMA length from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_len() into sg_dma_len(). */ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_len(dev, sg); return sg_dma_len(sg); } -- cgit v1.2.3 From b2853fd6c2d0f383dbdf7427e263eb576a633867 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 27 Mar 2014 10:52:58 +0200 Subject: IB/core: Don't resolve passive side RoCE L2 address in CMA REQ handler The code that resolves the passive side source MAC within the rdma_cm connection request handler was both redundant and buggy, so remove it. It was redundant since later, when an RC QP is modified to RTR state, the resolution will take place in the ib_core module. It was buggy because this callback also deals with UD SIDR exchange, for which we incorrectly looked at the REQ member of the CM event and dereferenced a random value. Fixes: dd5f03beb4f7 ("IB/core: Ethernet L2 attributes in verbs/cm structures") Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 17 ----------------- drivers/infiniband/core/cma.c | 26 -------------------------- include/rdma/ib_cm.h | 1 - 3 files changed, 44 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0601b9daf840..c3239170d8b7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) -{ - struct cm_id_private *cm_id_priv; - - cm_id_priv = container_of(id, struct cm_id_private, id); - - if (smac != NULL) - memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac)); - - if (alt_smac != NULL) - memcpy(cm_id_priv->alt_av.smac, alt_smac, - sizeof(cm_id_priv->alt_av.smac)); - - return 0; -} -EXPORT_SYMBOL(ib_update_cm_av); - static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 199958d9ddc8..42c3058e6e9c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int offset, ret; - u8 smac[ETH_ALEN]; - u8 alt_smac[ETH_ALEN]; - u8 *psmac = smac; - u8 *palt_smac = alt_smac; - int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) == - RDMA_TRANSPORT_IB) && - (rdma_port_get_link_layer(cm_id->device, - ib_event->param.req_rcvd.port) == - IB_LINK_LAYER_ETHERNET)); listen_id = cm_id->context; if (!cma_check_req_qp_type(&listen_id->id, ib_event)) @@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) goto err3; - - if (is_iboe) { - if (ib_event->param.req_rcvd.primary_path != NULL) - rdma_addr_find_smac_by_sgid( - &ib_event->param.req_rcvd.primary_path->sgid, - psmac, NULL); - else - psmac = NULL; - if (ib_event->param.req_rcvd.alternate_path != NULL) - rdma_addr_find_smac_by_sgid( - &ib_event->param.req_rcvd.alternate_path->sgid, - palt_smac, NULL); - else - palt_smac = NULL; - } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); - if (is_iboe) - ib_update_cm_av(cm_id, psmac, palt_smac); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index f29e3a27c2cc..0e3ff30647d5 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param { int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); #endif /* IB_CM_H */ -- cgit v1.2.3