From 33b9b3ee9709b19c4f02ab91571d53540d05c3d1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 30 Jan 2006 14:29:21 -0800 Subject: IB: Add userspace support for resizing CQs Add support to uverbs to handle resizing userspace CQs (completion queues), including adding an ABI for marshalling requests and responses. The kernel midlayer already has ib_resize_cq(). Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 13 ++++++++++++- include/rdma/ib_verbs.h | 5 +++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 5ff1490c08db..6ad1207e4235 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -265,6 +265,17 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +struct ib_uverbs_resize_cq { + __u64 response; + __u32 cq_handle; + __u32 cqe; + __u64 driver_data[0]; +}; + +struct ib_uverbs_resize_cq_resp { + __u32 cqe; +}; + struct ib_uverbs_poll_cq { __u64 response; __u32 cq_handle; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 22fc886b9695..38fa6c082eae 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -5,7 +5,7 @@ * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -880,7 +880,8 @@ struct ib_device { struct ib_ucontext *context, struct ib_udata *udata); int (*destroy_cq)(struct ib_cq *cq); - int (*resize_cq)(struct ib_cq *cq, int cqe); + int (*resize_cq)(struct ib_cq *cq, int cqe, + struct ib_udata *udata); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); -- cgit v1.2.3 From c5bcbbb9fe00128d500c2f473d5ddc8d8c2c53a7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 2 Feb 2006 09:47:14 -0800 Subject: IB: Allow userspace to set node description Expose a writable "node_desc" sysfs attribute for InfiniBand devices. This allows userspace to update the node description with information such as the node's hostname, so that IB network management software can tie its view to the real world. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 30 +++++++++++++++++++++++++++++- include/rdma/ib_verbs.h | 5 ++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5982d687a000..49601bb8475b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -628,14 +628,42 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) be16_to_cpu(((__be16 *) &dev->node_guid)[3])); } +static ssize_t show_node_desc(struct class_device *cdev, char *buf) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + + return sprintf(buf, "%.64s\n", dev->node_desc); +} + +static ssize_t set_node_desc(struct class_device *cdev, const char *buf, + size_t count) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + struct ib_device_modify desc = {}; + int ret; + + if (!dev->modify_device) + return -EIO; + + memcpy(desc.node_desc, buf, min_t(int, count, 64)); + ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); + if (ret) + return ret; + + return count; +} + static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); +static CLASS_DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, + set_node_desc); static struct class_device_attribute *ib_class_attributes[] = { &class_device_attr_node_type, &class_device_attr_sys_image_guid, - &class_device_attr_node_guid + &class_device_attr_node_guid, + &class_device_attr_node_desc }; static struct class ib_class = { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 38fa6c082eae..1d31c8cd5ce0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -222,11 +222,13 @@ struct ib_port_attr { }; enum ib_device_modify_flags { - IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, + IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; struct ib_device_modify { u64 sys_image_guid; + char node_desc[64]; }; enum ib_port_modify_flags { @@ -951,6 +953,7 @@ struct ib_device { u64 uverbs_cmd_mask; int uverbs_abi_ver; + char node_desc[64]; __be64 node_guid; u8 node_type; u8 phys_port_cnt; -- cgit v1.2.3 From d36f34aadf184d8cc4c240de2b6319ccea8334bb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 2 Feb 2006 10:43:45 -0800 Subject: IB: Enable FMR pool user to set page size This patch allows the consumer to set the page size of "pages" mapped by the pool FMRs, which is a feature already existing in the base verbs API. On the cosmetic side it changes ib_fmr_attr.page_size field to be named page_shift. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/fmr_pool.c | 6 +++--- drivers/infiniband/hw/mthca/mthca_mr.c | 10 +++++----- include/rdma/ib_fmr_pool.h | 2 ++ include/rdma/ib_verbs.h | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index d34a6f1c4f4c..838bf54458d2 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -278,9 +278,9 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, { struct ib_pool_fmr *fmr; struct ib_fmr_attr attr = { - .max_pages = params->max_pages_per_fmr, - .max_maps = IB_FMR_MAX_REMAPS, - .page_size = PAGE_SHIFT + .max_pages = params->max_pages_per_fmr, + .max_maps = IB_FMR_MAX_REMAPS, + .page_shift = params->page_shift }; for (i = 0; i < params->pool_size; ++i) { diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 653f1321a13f..0b48048ad0f8 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -491,7 +491,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, int err = -ENOMEM; int i; - if (mr->attr.page_size < 12 || mr->attr.page_size >= 32) + if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32) return -EINVAL; /* For Arbel, all MTTs must fit in the same page. */ @@ -543,7 +543,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, MTHCA_MPT_FLAG_REGION | access); - mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12); + mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12); mpt_entry->key = cpu_to_be32(key); mpt_entry->pd = cpu_to_be32(pd); memset(&mpt_entry->start, 0, @@ -611,7 +611,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, if (list_len > fmr->attr.max_pages) return -EINVAL; - page_mask = (1 << fmr->attr.page_size) - 1; + page_mask = (1 << fmr->attr.page_shift) - 1; /* We are getting page lists, so va must be page aligned. */ if (iova & page_mask) @@ -659,7 +659,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, } mpt_entry.lkey = cpu_to_be32(key); - mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); + mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); mpt_entry.start = cpu_to_be64(iova); __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); @@ -700,7 +700,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, fmr->mem.arbel.mpt->key = cpu_to_be32(key); fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); - fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); + fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); fmr->mem.arbel.mpt->start = cpu_to_be64(iova); wmb(); diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h index 86b7e93f198b..4ace54cd0cce 100644 --- a/include/rdma/ib_fmr_pool.h +++ b/include/rdma/ib_fmr_pool.h @@ -43,6 +43,7 @@ struct ib_fmr_pool; /** * struct ib_fmr_pool_param - Parameters for creating FMR pool * @max_pages_per_fmr:Maximum number of pages per map request. + * @page_shift: Log2 of sizeof "pages" mapped by this fmr * @access:Access flags for FMRs in pool. * @pool_size:Number of FMRs to allocate for pool. * @dirty_watermark:Flush is triggered when @dirty_watermark dirty @@ -55,6 +56,7 @@ struct ib_fmr_pool; */ struct ib_fmr_pool_param { int max_pages_per_fmr; + int page_shift; enum ib_access_flags access; int pool_size; int dirty_watermark; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1d31c8cd5ce0..61a4390ae9d8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -651,7 +651,7 @@ struct ib_mw_bind { struct ib_fmr_attr { int max_pages; int max_maps; - u8 page_size; + u8 page_shift; }; struct ib_ucontext { -- cgit v1.2.3 From 8a51866f08103ba04894ce0f65eef567ddc3ed40 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 13 Feb 2006 12:48:12 -0800 Subject: IB: Add ib_modify_qp_is_ok() library function The in-kernel mthca driver contains a table of which attributes are valid for each queue pair state transition. It turns out that both other IB drivers -- ipath and ehca -- which are being prepared for merging have copied this table, errors and all. To forestall this code duplication, move this table and the code to check parameters against it into a midlayer library function, ib_modify_qp_is_ok(). Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 252 ++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 18 +++ 2 files changed, 270 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 8e0ba16bcbdd..ca07a2be87d3 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -245,6 +245,258 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } EXPORT_SYMBOL(ib_create_qp); +static const struct { + int valid; + enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1]; + enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1]; +} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + }, + [IB_QPS_RTR] = { + .valid = 1, + .req_param = { + [IB_QPT_UC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_RC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), + }, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_RC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = { + [IB_QPT_UD] = IB_QP_SQ_PSN, + [IB_QPT_UC] = IB_QP_SQ_PSN, + [IB_QPT_RC] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_SMI] = IB_QP_SQ_PSN, + [IB_QPT_GSI] = IB_QP_SQ_PSN, + }, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY + } + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_AV | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 } + } +}; + +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (cur_state < 0 || cur_state > IB_QPS_ERR || + next_state < 0 || next_state > IB_QPS_ERR) + return 0; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return 0; + + if (!qp_state_table[cur_state][next_state].valid) + return 0; + + req_param = qp_state_table[cur_state][next_state].req_param[type]; + opt_param = qp_state_table[cur_state][next_state].opt_param[type]; + + if ((mask & req_param) != req_param) + return 0; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return 0; + + return 1; +} +EXPORT_SYMBOL(ib_modify_qp_is_ok); + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 61a4390ae9d8..010287c844e7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -990,6 +990,24 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } +/** + * ib_modify_qp_is_ok - Check that the supplied attribute mask + * contains all required attributes and no attributes not allowed for + * the given QP state transition. + * @cur_state: Current QP state + * @next_state: Next QP state + * @type: QP type + * @mask: Mask of supplied QP attributes + * + * This function is a helper function that a low-level driver's + * modify_qp method can use to validate the consumer's input. It + * checks that cur_state and next_state are valid QP states, that a + * transition from cur_state to next_state is allowed by the IB spec, + * and that the attribute mask supplied is allowed for the transition. + */ +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask); + int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); -- cgit v1.2.3 From a74cd4af0bfa9578594acbb711a958104c93b772 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 13 Feb 2006 16:30:49 -0800 Subject: IB: Whitespace cleanups Remove trailing whitespace and fix indentation that with spaces instead of tabs. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 12 ++++++------ drivers/infiniband/core/uverbs_main.c | 1 - drivers/infiniband/core/verbs.c | 3 +-- include/rdma/ib_user_verbs.h | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index be1cef1b3116..398c125d908c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1134,8 +1134,8 @@ out: } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; @@ -1363,8 +1363,8 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; @@ -1414,8 +1414,8 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 099fe6cde68c..335b6938a656 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -462,7 +462,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); - } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ca07a2be87d3..c69334dc8012 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -574,8 +574,7 @@ int ib_destroy_cq(struct ib_cq *cq) } EXPORT_SYMBOL(ib_destroy_cq); -int ib_resize_cq(struct ib_cq *cq, - int cqe) +int ib_resize_cq(struct ib_cq *cq, int cqe) { return cq->device->resize_cq ? cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 6ad1207e4235..fb94c08169ce 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -426,7 +426,7 @@ struct ib_uverbs_sge { }; struct ib_uverbs_send_wr { - __u64 wr_id; + __u64 wr_id; __u32 num_sge; __u32 opcode; __u32 send_flags; @@ -500,7 +500,7 @@ struct ib_uverbs_post_srq_recv_resp { struct ib_uverbs_global_route { __u8 dgid[16]; - __u32 flow_label; + __u32 flow_label; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; -- cgit v1.2.3 From 7ccc9a24e01258a31ee2b964215e4ddddd2a02c4 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Mon, 13 Feb 2006 16:31:25 -0800 Subject: IB/uverbs: Support for query QP from userspace Add support to uverbs to handle querying userspace QPs (queue pairs), including adding an ABI for marshalling requests and responses. The kernel midlayer already has the underlying ib_query_qp() function. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 100 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_user_verbs.h | 41 ++++++++++++++ 4 files changed, 143 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3207239819ce..89c798eb5749 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -183,6 +183,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); IB_UVERBS_DECLARE_CMD(post_send); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 398c125d908c..4cbef8c06634 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -996,6 +996,106 @@ err_up: return ret; } +ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_query_qp cmd; + struct ib_uverbs_query_qp_resp resp; + struct ib_qp *qp; + struct ib_qp_attr *attr; + struct ib_qp_init_attr *init_attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); + else + ret = -EINVAL; + + mutex_unlock(&ib_uverbs_idr_mutex); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + + resp.qp_state = attr->qp_state; + resp.cur_qp_state = attr->cur_qp_state; + resp.path_mtu = attr->path_mtu; + resp.path_mig_state = attr->path_mig_state; + resp.qkey = attr->qkey; + resp.rq_psn = attr->rq_psn; + resp.sq_psn = attr->sq_psn; + resp.dest_qp_num = attr->dest_qp_num; + resp.qp_access_flags = attr->qp_access_flags; + resp.pkey_index = attr->pkey_index; + resp.alt_pkey_index = attr->alt_pkey_index; + resp.en_sqd_async_notify = attr->en_sqd_async_notify; + resp.max_rd_atomic = attr->max_rd_atomic; + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.port_num = attr->port_num; + resp.timeout = attr->timeout; + resp.retry_cnt = attr->retry_cnt; + resp.rnr_retry = attr->rnr_retry; + resp.alt_port_num = attr->alt_port_num; + resp.alt_timeout = attr->alt_timeout; + + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + resp.dest.flow_label = attr->ah_attr.grh.flow_label; + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; + resp.dest.dlid = attr->ah_attr.dlid; + resp.dest.sl = attr->ah_attr.sl; + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; + resp.dest.static_rate = attr->ah_attr.static_rate; + resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); + resp.dest.port_num = attr->ah_attr.port_num; + + memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; + resp.alt_dest.sl = attr->alt_ah_attr.sl; + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; + resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + + resp.max_send_wr = init_attr->cap.max_send_wr; + resp.max_recv_wr = init_attr->cap.max_recv_wr; + resp.max_send_sge = init_attr->cap.max_send_sge; + resp.max_recv_sge = init_attr->cap.max_recv_sge; + resp.max_inline_data = init_attr->cap.max_inline_data; + resp.sq_sig_all = !!init_attr->sq_sig_type; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + kfree(attr); + kfree(init_attr); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 335b6938a656..91e4750fa319 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -96,6 +96,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fb94c08169ce..58662c34a3de 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -370,6 +370,47 @@ struct ib_uverbs_qp_dest { __u8 port_num; }; +struct ib_uverbs_query_qp { + __u64 response; + __u32 qp_handle; + __u32 attr_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_qp_resp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 sq_sig_all; + __u8 reserved[5]; + __u64 driver_data[0]; +}; + struct ib_uverbs_modify_qp { struct ib_uverbs_qp_dest dest; struct ib_uverbs_qp_dest alt_dest; -- cgit v1.2.3 From 8bdb0e8632e0f5061bd18b6934346cb609490135 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Mon, 13 Feb 2006 16:31:57 -0800 Subject: IB/uverbs: Support for query SRQ from userspace Add support to uverbs to handle querying userspace SRQs (shared receive queues), including adding an ABI for marshalling requests and responses. The kernel midlayer already has the underlying ib_query_srq() function. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 44 +++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_user_verbs.h | 15 ++++++++++++ 4 files changed, 61 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 89c798eb5749..3372d67ff139 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -195,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); +IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4cbef8c06634..38a66fbef36d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -1923,6 +1924,49 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_srq cmd; + struct ib_uverbs_query_srq_resp resp; + struct ib_srq_attr attr; + struct ib_srq *srq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (srq && srq->uobject->context == file->ucontext) + ret = ib_query_srq(srq, &attr); + else + ret = -EINVAL; + + mutex_unlock(&ib_uverbs_idr_mutex); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + + resp.max_wr = attr.max_wr; + resp.max_sge = attr.max_sge; + resp.srq_limit = attr.srq_limit; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + return ret ? ret : in_len; +} + ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 91e4750fa319..ff092a0a94da 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -108,6 +108,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 58662c34a3de..0edd3a6fe8f5 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -613,6 +614,20 @@ struct ib_uverbs_modify_srq { __u64 driver_data[0]; }; +struct ib_uverbs_query_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq_resp { + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; +}; + struct ib_uverbs_destroy_srq { __u64 response; __u32 srq_handle; -- cgit v1.2.3 From 4d9781c5ce1a517a07dbf03c37323c011037fe79 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 16 Feb 2006 09:26:19 -0800 Subject: IB/uverbs: Fix alignment of struct ib_uverbs_create_qp_resp The size of struct ib_uverbs_create_qp_resp is not even multiple of 8 bytes. This causes problems for low-level drivers that add private data after the structure: 32-bit userspace will look in the wrong place for a response from a 64-bit kernel. Fix this by adding a reserved field. Also, bump the ABI version because this changes the size of a structure. Pointed out by Hoang-Nam Nguyen . Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 0edd3a6fe8f5..3bf4402f466a 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -44,7 +44,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 4 +#define IB_USER_VERBS_ABI_VERSION 5 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -350,6 +350,7 @@ struct ib_uverbs_create_qp_resp { __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; + __u32 reserved; }; /* -- cgit v1.2.3 From abb6e9ba17eb133ab385d0f9017fa8afa809d52a Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Thu, 23 Feb 2006 12:13:51 -0800 Subject: IB/mthca: Return actual capacity from create_srq Have mthca's create_srq method return the actual capacity of the SRQ that gets created. Also update comments in to clarify that this is what is expected from ib_create_srq(). Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_srq.c | 3 +++ include/rdma/ib_verbs.h | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index deb526ce013d..209d2bae2256 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -271,6 +271,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->first_free = 0; srq->last_free = srq->max - 1; + attr->max_wr = srq->max; + attr->max_sge = srq->max_gs; + return 0; err_out_free_srq: diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 010287c844e7..c1ad6273ac6c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1100,7 +1100,9 @@ int ib_destroy_ah(struct ib_ah *ah); * ib_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. - * @srq_init_attr: A list of initial attributes required to create the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated @@ -1159,7 +1161,9 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, * ib_create_qp - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. - * @qp_init_attr: A list of initial attributes required to create the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. */ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); -- cgit v1.2.3 From ea88fd16d6e85f4bc71b6053180b64f04be1ff14 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Thu, 23 Feb 2006 12:36:18 -0800 Subject: IB/uverbs: Return actual capacity from create SRQ operation Pass actual capacity of created SRQ back to userspace, so that userspace can report accurate capacities. This requires an ABI bump, to change struct ib_uverbs_create_srq_resp. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 2 ++ include/rdma/ib_user_verbs.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 38a66fbef36d..b157e5ff7dad 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1864,6 +1864,8 @@ retry: goto err_destroy; resp.srq_handle = uobj->uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 3bf4402f466a..338ed4333063 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -44,7 +44,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 5 +#define IB_USER_VERBS_ABI_VERSION 6 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -605,6 +605,9 @@ struct ib_uverbs_create_srq { struct ib_uverbs_create_srq_resp { __u32 srq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 reserved; }; struct ib_uverbs_modify_srq { -- cgit v1.2.3 From f36e1793e25513380cae5958a9164d4cc4458ad0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Mar 2006 21:54:13 -0800 Subject: IB/umad: Add support for large RMPP transfers Add support for sending and receiving large RMPP transfers. The old code supports transfers only as large as a single contiguous kernel memory allocation. This patch uses linked list of memory buffers when sending and receiving data to avoid needing contiguous pages for larger transfers. Receive side: copy the arriving MADs in chunks instead of coalescing to one large buffer in kernel space. Send side: split a multipacket MAD buffer to a list of segments, (multipacket_list) and send these using a gather list of size 2. Also, save pointer to last sent segment, and retrieve requested segments by walking list starting at last sent segment. Finally, save pointer to last-acked segment. When retrying, retrieve segments for resending relative to this pointer. When updating last ack, start at this pointer. Signed-off-by: Jack Morgenstein Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 166 ++++++++++++++++++++++----- drivers/infiniband/core/mad_priv.h | 16 ++- drivers/infiniband/core/mad_rmpp.c | 148 ++++++++---------------- drivers/infiniband/core/user_mad.c | 225 +++++++++++++++++++++++++------------ include/rdma/ib_mad.h | 48 +++++--- 5 files changed, 376 insertions(+), 227 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 445ad0dda213..16549add8e8f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ + * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include @@ -765,18 +765,67 @@ out: return ret; } -static int get_buf_length(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len) { int seg_size, pad; seg_size = sizeof(struct ib_mad) - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; - if (pad == seg_size) - pad = 0; + return pad == seg_size ? 0 : pad; } else - pad = seg_size; - return hdr_len + data_len + pad; + return seg_size; +} + +static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_segment *s, *t; + + list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { + list_del(&s->list); + kfree(s); + } +} + +static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, + gfp_t gfp_mask) +{ + struct ib_mad_send_buf *send_buf = &send_wr->send_buf; + struct ib_rmpp_mad *rmpp_mad = send_buf->mad; + struct ib_rmpp_segment *seg = NULL; + int left, seg_size, pad; + + send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + seg_size = send_buf->seg_size; + pad = send_wr->pad; + + /* Allocate data segments. */ + for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { + seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + if (!seg) { + printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " + "alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); + free_send_rmpp_list(send_wr); + return -ENOMEM; + } + seg->num = ++send_buf->seg_count; + list_add_tail(&seg->list, &send_wr->rmpp_list); + } + + /* Zero any padding */ + if (pad) + memset(seg->data + seg_size - pad, 0, pad); + + rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> + agent.rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + send_wr->cur_seg = container_of(send_wr->rmpp_list.next, + struct ib_rmpp_segment, list); + send_wr->last_ack_seg = send_wr->cur_seg; + return 0; } struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, @@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; - int buf_size; + int pad, message_size, ret, size; void *buf; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - buf_size = get_buf_length(hdr_len, data_len); + pad = get_pad_size(hdr_len, data_len); + message_size = hdr_len + data_len + pad; if ((!mad_agent->rmpp_version && - (rmpp_active || buf_size > sizeof(struct ib_mad))) || - (!rmpp_active && buf_size > sizeof(struct ib_mad))) + (rmpp_active || message_size > sizeof(struct ib_mad))) || + (!rmpp_active && message_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); + size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - mad_send_wr = buf + buf_size; + mad_send_wr = buf + size; + INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; + mad_send_wr->send_buf.hdr_len = hdr_len; + mad_send_wr->send_buf.data_len = data_len; + mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; - mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; + mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.num_sge = 2; mad_send_wr->send_wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; @@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - IB_MGMT_RMPP_HDR + data_len); - rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; - rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, - IB_MGMT_RMPP_FLAG_ACTIVE); + ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + if (ret) { + kfree(buf); + return ERR_PTR(ret); + } } mad_send_wr->send_buf.mad_agent = mad_agent; @@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_create_send_mad); +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct list_head *list; + + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + list = &mad_send_wr->cur_seg->list; + + if (mad_send_wr->cur_seg->num < seg_num) { + list_for_each_entry(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } else if (mad_send_wr->cur_seg->num > seg_num) { + list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } + return mad_send_wr->cur_seg->data; +} +EXPORT_SYMBOL(ib_get_rmpp_segment); + +static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) +{ + if (mad_send_wr->send_buf.seg_count) + return ib_get_rmpp_segment(&mad_send_wr->send_buf, + mad_send_wr->seg_num); + else + return mad_send_wr->send_buf.mad + + mad_send_wr->send_buf.hdr_len; +} + void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - kfree(send_buf->mad); + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + free_send_rmpp_list(mad_send_wr); + kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); } @@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; - sge->addr = dma_map_single(mad_agent->device->dma_device, - mad_send_wr->send_buf.mad, sge->length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + sge[0].addr = dma_map_single(mad_agent->device->dma_device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr); + + sge[1].addr = dma_map_single(mad_agent->device->dma_device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr); spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { @@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) + if (ret) { dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - sge->length, DMA_TO_DEVICE); - + pci_unmap_addr(mad_send_wr, header_mapping), + sge[0].length, DMA_TO_DEVICE); + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, payload_mapping), + sge[1].length, DMA_TO_DEVICE); + } return ret; } @@ -1860,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, retry: dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), + pci_unmap_addr(mad_send_wr, header_mapping), mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, payload_mapping), + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 570f78682af3..a7125d4b5ccf 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ + * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ @@ -85,6 +85,12 @@ struct ib_mad_private { } mad; } __attribute__ ((packed)); +struct ib_rmpp_segment { + struct list_head list; + u32 num; + u8 data[0]; +}; + struct ib_mad_agent_private { struct list_head agent_list; struct ib_mad_agent agent; @@ -119,7 +125,8 @@ struct ib_mad_send_wr_private { struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf send_buf; - DECLARE_PCI_UNMAP_ADDR(mapping) + DECLARE_PCI_UNMAP_ADDR(header_mapping) + DECLARE_PCI_UNMAP_ADDR(payload_mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; @@ -130,11 +137,12 @@ struct ib_mad_send_wr_private { enum ib_wc_status status; /* RMPP control */ + struct list_head rmpp_list; + struct ib_rmpp_segment *last_ack_seg; + struct ib_rmpp_segment *cur_seg; int last_ack; int seg_num; int newwin; - int total_seg; - int data_offset; int pad; }; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 3249e1d8c07b..bacfdd5bddad 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -111,14 +111,14 @@ static int data_offset(u8 mgmt_class) return IB_MGMT_RMPP_HDR; } -static void format_ack(struct ib_rmpp_mad *ack, +static void format_ack(struct ib_mad_send_buf *msg, struct ib_rmpp_mad *data, struct mad_rmpp_recv *rmpp_recv) { + struct ib_rmpp_mad *ack = msg->mad; unsigned long flags; - memcpy(&ack->mad_hdr, &data->mad_hdr, - data_offset(data->mad_hdr.mgmt_class)); + memcpy(ack, &data->mad_hdr, msg->hdr_len); ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; @@ -135,16 +135,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - int ret; + int ret, hdr_len; + hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, - IB_MGMT_RMPP_DATA, GFP_KERNEL); + recv_wc->wc->pkey_index, 1, hdr_len, + 0, GFP_KERNEL); if (!msg) return; - format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, - rmpp_recv); + format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); msg->ah = rmpp_recv->ah; ret = ib_post_send_mad(msg, NULL); if (ret) @@ -156,16 +156,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, { struct ib_mad_send_buf *msg; struct ib_ah *ah; + int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) return (void *) ah; + hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, - IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, - GFP_KERNEL); + hdr_len, 0, GFP_KERNEL); if (IS_ERR(msg)) ib_destroy_ah(ah); else @@ -195,8 +196,7 @@ static void nack_recv(struct ib_mad_agent_private *agent, return; rmpp_mad = msg->mad; - memcpy(rmpp_mad, recv_wc->recv_buf.mad, - data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; @@ -433,44 +433,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) return rmpp_wc; } -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf) -{ - struct ib_mad_recv_buf *seg_buf; - struct ib_rmpp_mad *rmpp_mad; - void *data; - int size, len, offset; - u8 flags; - - len = mad_recv_wc->mad_len; - if (len <= sizeof(struct ib_mad)) { - memcpy(buf, mad_recv_wc->recv_buf.mad, len); - return; - } - - offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class); - - list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) { - rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad; - flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr); - - if (flags & IB_MGMT_RMPP_FLAG_FIRST) { - data = rmpp_mad; - size = sizeof(*rmpp_mad); - } else { - data = (void *) rmpp_mad + offset; - if (flags & IB_MGMT_RMPP_FLAG_LAST) - size = len; - else - size = sizeof(*rmpp_mad) - offset; - } - - memcpy(buf, data, size); - len -= size; - buf += size; - } -} -EXPORT_SYMBOL(ib_coalesce_recv_mad); - static struct ib_mad_recv_wc * continue_rmpp(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) @@ -570,50 +532,33 @@ start_rmpp(struct ib_mad_agent_private *agent, return mad_recv_wc; } -static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr) -{ - return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset + - (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) * - (mad_send_wr->seg_num - 1); -} - static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int timeout; - u32 paylen; + u32 paylen = 0; rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); + rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num); if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA - + paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA - mad_send_wr->pad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); - mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); - } else { - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->sg_list[0].length = mad_send_wr->data_offset; - mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr); - mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) - - mad_send_wr->data_offset; - mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey; - rmpp_mad->rmpp_hdr.paylen_newwin = 0; } - if (mad_send_wr->seg_num == mad_send_wr->total_seg) { + if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); } + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); /* 2 seconds for an ACK until we can find the packet lifetime */ timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); - mad_send_wr->seg_num++; + return ib_send_mad(mad_send_wr); } @@ -629,7 +574,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, if (!mad_send_wr) goto out; /* Unmatched send */ - if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ @@ -645,6 +590,18 @@ out: spin_unlock_irqrestore(&agent->lock, flags); } +static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, + int seg_num) +{ + struct list_head *list; + + wr->last_ack = seg_num; + list = &wr->last_ack_seg->list; + list_for_each_entry(wr->last_ack_seg, list, list) + if (wr->last_ack_seg->num == seg_num) + break; +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -675,11 +632,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (!mad_send_wr) goto out; /* Unmatched ACK */ - if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ - if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { + if (seg_num > mad_send_wr->send_buf.seg_count || + seg_num > mad_send_wr->newwin) { spin_unlock_irqrestore(&agent->lock, flags); abort_send(agent, rmpp_mad->mad_hdr.tid, IB_MGMT_RMPP_STATUS_S2B); @@ -691,11 +649,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, goto out; /* Old ACK */ if (seg_num > mad_send_wr->last_ack) { - mad_send_wr->last_ack = seg_num; + adjust_last_ack(mad_send_wr, seg_num); mad_send_wr->retries = mad_send_wr->send_buf.retries; } mad_send_wr->newwin = newwin; - if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { /* If no response is expected, the ACK completes the send */ if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; @@ -714,7 +672,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, mad_send_wr->send_buf.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && - mad_send_wr->seg_num <= mad_send_wr->total_seg) { + mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { /* Send failure will just result in a timeout/retry */ ret = send_next_seg(mad_send_wr); if (ret) @@ -838,31 +796,19 @@ out: int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; - int i, total_len, ret; + int ret; rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + mad_send_wr->seg_num = 1; return IB_RMPP_RESULT_INTERNAL; + } - if (mad_send_wr->send_wr.num_sge > 1) - return -EINVAL; /* TODO: support num_sge > 1 */ - - mad_send_wr->seg_num = 1; mad_send_wr->newwin = 1; - mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class); - - total_len = 0; - for (i = 0; i < mad_send_wr->send_wr.num_sge; i++) - total_len += mad_send_wr->send_wr.sg_list[i].length; - - mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / - (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); @@ -893,14 +839,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (!mad_send_wr->timeout) return IB_RMPP_RESULT_PROCESSED; /* Response received */ - if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } - if (mad_send_wr->seg_num > mad_send_wr->newwin || - mad_send_wr->seg_num > mad_send_wr->total_seg) + if (mad_send_wr->seg_num == mad_send_wr->newwin || + mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ ret = send_next_seg(mad_send_wr); @@ -921,10 +867,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (mad_send_wr->last_ack == mad_send_wr->total_seg) + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; - mad_send_wr->seg_num = mad_send_wr->last_ack + 1; + mad_send_wr->seg_num = mad_send_wr->last_ack; + mad_send_wr->cur_seg = mad_send_wr->last_ack_seg; + ret = send_next_seg(mad_send_wr); if (ret) return IB_RMPP_RESULT_PROCESSED; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c908de8db5a9..fb6cd42601f9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ + * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include @@ -121,6 +121,7 @@ struct ib_umad_file { struct ib_umad_packet { struct ib_mad_send_buf *msg; + struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; @@ -176,31 +177,32 @@ static int queue_packet(struct ib_umad_file *file, return ret; } +static int data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return IB_MGMT_SA_HDR; + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return IB_MGMT_VENDOR_HDR; + else + return IB_MGMT_RMPP_HDR; +} + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); - if (!timeout) - goto out; - - timeout->length = IB_MGMT_MAD_HDR; - timeout->mad.hdr.id = packet->mad.hdr.id; - timeout->mad.hdr.status = ETIMEDOUT; - memcpy(timeout->mad.data, packet->mad.data, - sizeof (struct ib_mad_hdr)); - - if (queue_packet(file, agent, timeout)) - kfree(timeout); + packet->length = IB_MGMT_MAD_HDR; + packet->mad.hdr.status = ETIMEDOUT; + if (!queue_packet(file, agent, packet)) + return; } -out: kfree(packet); } @@ -209,22 +211,20 @@ static void recv_handler(struct ib_mad_agent *agent, { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; - int length; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) - goto out; + goto err1; - length = mad_recv_wc->mad_len; - packet = kzalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) - goto out; + goto err1; - packet->length = length; - - ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); + packet->length = mad_recv_wc->mad_len; + packet->recv_wc = mad_recv_wc; packet->mad.hdr.status = 0; - packet->mad.hdr.length = length + sizeof (struct ib_user_mad); + packet->mad.hdr.length = sizeof (struct ib_user_mad) + + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; @@ -240,12 +240,79 @@ static void recv_handler(struct ib_mad_agent *agent, } if (queue_packet(file, agent, packet)) - kfree(packet); + goto err2; + return; -out: +err2: + kfree(packet); +err1: ib_free_recv_mad(mad_recv_wc); } +static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + struct ib_mad_recv_buf *recv_buf; + int left, seg_payload, offset, max_seg_payload; + + /* We need enough room to copy the first (or only) MAD segment. */ + recv_buf = &packet->recv_wc->recv_buf; + if ((packet->length <= sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + packet->length) || + (packet->length > sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) + return -EFAULT; + + buf += sizeof (packet->mad); + seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + if (copy_to_user(buf, recv_buf->mad, seg_payload)) + return -EFAULT; + + if (seg_payload < packet->length) { + /* + * Multipacket RMPP MAD message. Copy remainder of message. + * Note that last segment may have a shorter payload. + */ + if (count < sizeof (packet->mad) + packet->length) { + /* + * The buffer is too small, return the first RMPP segment, + * which includes the RMPP message length. + */ + return -ENOSPC; + } + offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class); + max_seg_payload = sizeof (struct ib_mad) - offset; + + for (left = packet->length - seg_payload, buf += seg_payload; + left; left -= seg_payload, buf += seg_payload) { + recv_buf = container_of(recv_buf->list.next, + struct ib_mad_recv_buf, list); + seg_payload = min(left, max_seg_payload); + if (copy_to_user(buf, ((void *) recv_buf->mad) + offset, + seg_payload)) + return -EFAULT; + } + } + return sizeof (packet->mad) + packet->length; +} + +static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + ssize_t size = sizeof (packet->mad) + packet->length; + + if (count < size) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, size)) + return -EFAULT; + + return size; +} + static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -253,7 +320,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) + if (count < sizeof (struct ib_user_mad)) return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -276,28 +343,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); - if (count < packet->length + sizeof (struct ib_user_mad)) { - /* Return length needed (and first RMPP segment) if too small */ - if (copy_to_user(buf, &packet->mad, - sizeof (struct ib_user_mad) + sizeof (struct ib_mad))) - ret = -EFAULT; - else - ret = -ENOSPC; - } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) - ret = -EFAULT; + if (packet->recv_wc) + ret = copy_recv_mad(buf, packet, count); else - ret = packet->length + sizeof (struct ib_user_mad); + ret = copy_send_mad(buf, packet, count); + if (ret < 0) { /* Requeue packet */ spin_lock_irq(&file->recv_lock); list_add(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); - } else + } else { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } return ret; } +static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) +{ + int left, seg; + + /* Copy class specific header */ + if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && + copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, + msg->hdr_len - IB_MGMT_RMPP_HDR)) + return -EFAULT; + + /* All headers are in place. Copy data segments. */ + for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; + seg++, left -= msg->seg_size, buf += msg->seg_size) { + if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, + min(left, msg->seg_size))) + return -EFAULT; + } + return 0; +} + static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -309,14 +392,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, copy_offset; - int rmpp_active, has_rmpp_header; + int ret, data_len, hdr_len, copy_offset, rmpp_active; if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) return -EINVAL; - length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -363,35 +444,25 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { hdr_len = IB_MGMT_SA_HDR; copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START && rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) { - hdr_len = IB_MGMT_VENDOR_HDR; - copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; + hdr_len = IB_MGMT_VENDOR_HDR; + copy_offset = IB_MGMT_RMPP_HDR; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; } else { hdr_len = IB_MGMT_MAD_HDR; copy_offset = IB_MGMT_MAD_HDR; - has_rmpp_header = 0; - } - - if (has_rmpp_header) - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE; - else rmpp_active = 0; - - /* Validate that the management class can support RMPP */ - if (rmpp_active && !agent->rmpp_version) { - ret = -EINVAL; - goto err_ah; } + data_len = count - sizeof (struct ib_user_mad) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, rmpp_active, - hdr_len, length - hdr_len, - GFP_KERNEL); + 0, rmpp_active, hdr_len, + data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -402,14 +473,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; - /* Copy MAD headers (RMPP header in place) */ + /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(packet->msg->mad + copy_offset, - buf + sizeof (struct ib_user_mad) + copy_offset, - length - copy_offset)) { - ret = -EFAULT; - goto err_msg; + buf += sizeof (struct ib_user_mad); + + if (!rmpp_active) { + if (copy_from_user(packet->msg->mad + copy_offset, + buf + copy_offset, + hdr_len + data_len - copy_offset)) { + ret = -EFAULT; + goto err_msg; + } + } else { + ret = copy_rmpp_mad(packet->msg, buf); + if (ret) + goto err_msg; } /* @@ -433,18 +511,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_msg; up_read(&file->port->mutex); - return count; err_msg: ib_free_send_mad(packet->msg); - err_ah: ib_destroy_ah(ah); - err_up: up_read(&file->port->mutex); - err: kfree(packet); return ret; @@ -627,8 +701,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp) already_dead = file->agents_dead; file->agents_dead = 1; - list_for_each_entry_safe(packet, tmp, &file->recv_list, list) + list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } list_del(&file->port_list); diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 2c133506742b..51ab8eddb295 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -33,7 +33,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $ + * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #if !defined( IB_MAD_H ) @@ -208,15 +208,23 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. - * @mad: References an allocated MAD data buffer. + * @mad: References an allocated MAD data buffer for MADs that do not have + * RMPP active. For MADs using RMPP, references the common and management + * class specific headers. * @mad_agent: MAD agent that allocated the buffer. * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * includes the common MAD, RMPP, and class specific headers. + * @data_len: Indicates the total size of user-transferred data. + * @seg_count: The number of RMPP segments allocated for this send. + * @seg_size: Size of each RMPP segment. * @timeout_ms: Time to wait for a response. * @retries: Number of times to retry a request for a response. * * Users are responsible for initializing the MAD buffer itself, with the - * exception of specifying the payload length field in any RMPP MAD. + * exception of any RMPP header. Additional segment buffer space allocated + * beyond data_len is padding. */ struct ib_mad_send_buf { struct ib_mad_send_buf *next; @@ -224,6 +232,10 @@ struct ib_mad_send_buf { struct ib_mad_agent *mad_agent; struct ib_ah *ah; void *context[2]; + int hdr_len; + int data_len; + int seg_count; + int seg_size; int timeout_ms; int retries; }; @@ -299,7 +311,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to - * the user after the send operation completes. All data buffers given + * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving * client, except for snooping agents. Clients snooping MADs should not * modify the data referenced by @mad_recv_wc. @@ -485,17 +497,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf); -/** - * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. - * @mad_recv_wc: Work completion information for a received MAD. - * @buf: User-provided data buffer to receive the coalesced buffers. The - * referenced buffer should be at least the size of the mad_len specified - * by @mad_recv_wc. - * - * This call copies a chain of received MAD segments into a single data buffer, - * removing duplicated headers. - */ -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); /** * ib_free_recv_mad - Returns data buffers used to receive a MAD. @@ -590,9 +591,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * with an initialized work request structure. Users may modify the returned * MAD data buffer before posting the send. * - * The returned data buffer will be cleared. Users are responsible for - * initializing the common MAD and any class specific headers. If @rmpp_active - * is set, the RMPP header will be initialized for sending. + * The returned MAD header, class specific headers, and any padding will be + * cleared. Users are responsible for initializing the common MAD header, + * any class specific header, and MAD data area. + * If @rmpp_active is set, the RMPP header will be initialized for sending. */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, @@ -600,6 +602,16 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, int hdr_len, int data_len, gfp_t gfp_mask); +/** + * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment. + * @send_buf: Previously allocated send data buffer. + * @seg_num: number of segment to return + * + * This routine returns a pointer to the data buffer of an RMPP MAD. + * Users must provide synchronization to @send_buf around this call. + */ +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num); + /** * ib_free_send_mad - Returns data buffers used to send a MAD. * @send_buf: Previously allocated send data buffer. -- cgit v1.2.3