From 400dbc96583ff3b8ad4c09bd7e9dcd35a6215922 Mon Sep 17 00:00:00 2001 From: Igor Ivanov Date: Wed, 14 Aug 2013 13:58:29 +0300 Subject: IB/core: Infrastructure for extensible uverbs commands Add infrastructure to support extended uverbs capabilities in a forward/backward manner. Uverbs command opcodes which are based on the verbs extensions approach should be greater or equal to IB_USER_VERBS_CMD_THRESHOLD. They have new header format and processed a bit differently. Whenever a specific IB_USER_VERBS_CMD_XXX is extended, which practically means it needs to have additional arguments, we will be able to add them without creating a completely new IB_USER_VERBS_CMD_YYY command or bumping the uverbs ABI version. This patch for itself doesn't provide the whole scheme which is also dependent on adding a comp_mask field to each extended uverbs command struct. The new header framework allows for future extension of the CMD arguments (ib_uverbs_cmd_hdr.in_words, ib_uverbs_cmd_hdr.out_words) for an existing new command (that is a command that supports the new uverbs command header format suggested in this patch) w/o bumping ABI version and with maintaining backward and formward compatibility to new and old libibverbs versions. In the uverbs command we are passing both uverbs arguments and the provider arguments. We split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider input argument size. Same goes for the response (the uverbs CMD output argument). For example take the create_cq call and the mlx4_ib provider: The uverbs layer gets libibverb's struct ibv_create_cq (named struct ib_uverbs_create_cq in the kernel), mlx4_ib gets libmlx4's struct mlx4_create_cq (which includes struct ibv_create_cq and is named struct mlx4_ib_create_cq in the kernel) and in_words = sizeof(mlx4_create_cq)/4 . Thus ib_uverbs_cmd_hdr.in_words carry both uverbs plus mlx4_ib input argument sizes, where uverbs assumes it knows the size of its input argument - struct ibv_create_cq. Now, if we wish to add a variable to struct ibv_create_cq, we can add a comp_mask field to the struct which is basically bit field indicating which fields exists in the struct (as done for the libibverbs API extension), but we need a way to tell what is the total size of the struct and not assume the struct size is predefined (since we may get different struct sizes from different user libibverbs versions). So we know at which point the provider input argument (struct mlx4_create_cq) begins. Same goes for extending the provider struct mlx4_create_cq. Thus we split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider (mlx4_ib) input argument size. Signed-off-by: Igor Ivanov Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 805711ea2005..61535aa0a62e 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -43,6 +43,7 @@ * compatibility are made. */ #define IB_USER_VERBS_ABI_VERSION 6 +#define IB_USER_VERBS_CMD_THRESHOLD 50 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -123,6 +124,15 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; +struct ib_uverbs_cmd_hdr_ex { + __u32 command; + __u16 in_words; + __u16 out_words; + __u16 provider_in_words; + __u16 provider_out_words; + __u32 cmd_hdr_reserved; +}; + struct ib_uverbs_get_context { __u64 response; __u64 driver_data[0]; -- cgit v1.2.3 From 436f2ad05a0b65b1467ddf51bc68171c381bf844 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 14 Aug 2013 13:58:30 +0300 Subject: IB/core: Export ib_create/destroy_flow through uverbs Implement ib_uverbs_create_flow() and ib_uverbs_destroy_flow() to support flow steering for user space applications. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 3 + drivers/infiniband/core/uverbs_cmd.c | 214 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 13 ++- include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_verbs.h | 89 +++++++++++++- 5 files changed, 318 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fcd7aa26fa2..ad9d1028102d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -155,6 +155,7 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -215,5 +216,7 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +IB_UVERBS_DECLARE_CMD(create_flow); +IB_UVERBS_DECLARE_CMD(destroy_flow); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b3c07b0c9f26..6e98df929e29 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -54,6 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -330,6 +331,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -2587,6 +2589,218 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); + if (ib_spec->eth.size != kern_spec->eth.size) + return -EINVAL; + memcpy(&ib_spec->eth.val, &kern_spec->eth.val, + sizeof(struct ib_flow_eth_filter)); + memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, + sizeof(struct ib_flow_eth_filter)); + break; + case IB_FLOW_SPEC_IPV4: + ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); + if (ib_spec->ipv4.size != kern_spec->ipv4.size) + return -EINVAL; + memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, + sizeof(struct ib_flow_ipv4_filter)); + memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, + sizeof(struct ib_flow_ipv4_filter)); + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); + if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + return -EINVAL; + memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, + sizeof(struct ib_flow_tcp_udp_filter)); + memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, + sizeof(struct ib_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; + } + return 0; +} + +ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_kern_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + int kern_attr_size; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && + !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) + return -EPERM; + + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr_ex); + if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), + kern_attr_size)) { + err = -EFAULT; + goto err_free_attr; + } + } else { + kern_flow_attr = &cmd.flow_attr; + kern_attr_size = sizeof(cmd.flow_attr); + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto err_free_attr; + } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; + } + + flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *) ib_spec)->size; + kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; + kern_spec += ((struct ib_kern_spec *) kern_spec)->size; + ib_spec += ((union ib_flow_spec *) ib_spec)->size; + } + if (kern_attr_size) { + pr_warn("create flow failed, %d bytes left from uverb cmd\n", + kern_attr_size); + goto err_free; + } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; + } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long) cmd.response, + &resp, sizeof(resp))) { + err = -EFAULT; + goto err_copy; + } + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return in_len; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} + +ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return ret ? ret : in_len; +} + static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e4e7b2449d19..75ad86c4abf8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -113,7 +114,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, + [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, + [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow }; static void ib_uverbs_add_one(struct ib_device *device); @@ -212,6 +215,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6f874b00491a..274205d4df97 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -954,6 +954,7 @@ struct ib_ucontext { struct list_head srq_list; struct list_head ah_list; struct list_head xrcd_list; + struct list_head rule_list; int closing; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 61535aa0a62e..0b233c56b0e4 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -86,7 +86,9 @@ enum { IB_USER_VERBS_CMD_OPEN_XRCD, IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, - IB_USER_VERBS_CMD_OPEN_QP + IB_USER_VERBS_CMD_OPEN_QP, + IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_CMD_DESTROY_FLOW }; /* @@ -694,6 +696,91 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; +struct ib_kern_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_kern_spec_eth { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_eth_filter val; + struct ib_kern_eth_filter mask; +}; + +struct ib_kern_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; +}; + +struct ib_kern_spec_ipv4 { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_ipv4_filter val; + struct ib_kern_ipv4_filter mask; +}; + +struct ib_kern_tcp_udp_filter { + __be16 dst_port; + __be16 src_port; +}; + +struct ib_kern_spec_tcp_udp { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_tcp_udp_filter val; + struct ib_kern_tcp_udp_filter mask; +}; + +struct ib_kern_spec { + union { + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + struct ib_kern_spec_eth eth; + struct ib_kern_spec_ipv4 ipv4; + struct ib_kern_spec_tcp_udp tcp_udp; + }; +}; + +struct ib_kern_flow_attr { + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; + __u8 reserved[2]; + __u8 port; + __u32 flags; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ +}; + +struct ib_uverbs_create_flow { + __u32 comp_mask; + __u64 response; + __u32 qp_handle; + struct ib_kern_flow_attr flow_attr; +}; + +struct ib_uverbs_create_flow_resp { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ib_uverbs_destroy_flow { + __u32 comp_mask; + __u32 flow_handle; +}; + struct ib_uverbs_create_srq { __u64 response; __u64 user_handle; -- cgit v1.2.3