From bb9b18fb55b03477fe5bdd3e97245d6d4d3dee4f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 30 Nov 2013 13:21:30 +0100 Subject: genl: Add genlmsg_new_unicast() for unicast message allocation Allocates a new sk_buff large enough to cover the specified payload plus required Netlink headers. Will check receiving socket for memory mapped i/o capability and use it if enabled. Will fall back to non-mapped skb if message size exceeds the frame size of the ring. Signed-of-by: Thomas Graf Reviewed-by: Daniel Borkmann Signed-off-by: Jesse Gross --- include/net/genetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b177ed803b7..93695f0e22a5 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -73,6 +73,7 @@ struct genl_family { * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers + * @dst_sk: destination socket */ struct genl_info { u32 snd_seq; @@ -85,6 +86,7 @@ struct genl_info { struct net * _net; #endif void * user_ptr[2]; + struct sock * dst_sk; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -177,6 +179,8 @@ void genl_notify(struct genl_family *family, struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); -- cgit v1.2.3 From af2806f8f90a150160be898cd85332459c83c5cb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:17 +0100 Subject: net: Export skb_zerocopy() to zerocopy from one skb to another Make the skb zerocopy logic written for nfnetlink queue available for use by other modules. Signed-off-by: Thomas Graf Reviewed-by: Daniel Borkmann Acked-by: David S. Miller Signed-off-by: Jesse Gross --- include/linux/skbuff.h | 3 ++ net/core/skbuff.c | 85 ++++++++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue_core.c | 59 ++----------------------- 3 files changed, 92 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bec1cc7d5e3c..7c48e2d4c72b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2345,6 +2345,9 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8c..55859cb8b83d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2122,6 +2122,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + * @from: source buffer + * + * Calculates the amount of linear headroom needed in the 'to' skb passed + * into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ + unsigned int hlen = 0; + + if (!from->head_frag || + skb_headlen(from) < L1_CACHE_BYTES || + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(from); + + if (skb_has_frag_list(from)) + hlen = from->len; + + return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + * skb_zerocopy - Zero copy skb to skb + * @to: destination buffer + * @source: source buffer + * @len: number of bytes to copy from source buffer + * @hlen: size of linear headroom in destination buffer + * + * Copies up to `len` bytes from `from` to `to` by creating references + * to the frags in the source buffer. + * + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the + * headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + BUG_ON(!from->head_frag && !hlen); + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); + void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 21258cf70091..615ee12647ae 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -235,51 +235,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ - int i, j = 0; - int plen = 0; /* length of skb->head fragment */ - struct page *page; - unsigned int offset; - - /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } - - if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); - len -= hlen; - } else { - plen = min_t(int, skb_headlen(from), len); - if (plen) { - page = virt_to_head_page(from->head); - offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); - get_page(page); - j = 1; - len -= plen; - } - } - - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; - - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { - if (!len) - break; - skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; - skb_frag_ref(to, j); - j++; - } - skb_shinfo(to)->nr_frags = j; -} - static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, bool csum_verify) @@ -304,7 +259,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, { size_t size; size_t data_len = 0, cap_len = 0; - int hlen = 0; + unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -356,14 +311,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || - skb_headlen(entskb) < L1_CACHE_BYTES || - skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) - hlen = skb_headlen(entskb); - - if (skb_has_frag_list(entskb)) - hlen = entskb->len; - hlen = min_t(int, data_len, hlen); + hlen = skb_zerocopy_headlen(entskb); + hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; @@ -504,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - nfqnl_zcopy(skb, entskb, data_len, hlen); + skb_zerocopy(skb, entskb, data_len, hlen); } nlh->nlmsg_len = skb->len; -- cgit v1.2.3 From 43d4be9cb55f3bac5253e9289996fd9d735531db Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:18 +0100 Subject: openvswitch: Allow user space to announce ability to accept unaligned Netlink messages Signed-off-by: Thomas Graf Reviewed-by: Daniel Borkmann Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 4 ++++ net/openvswitch/datapath.c | 14 ++++++++++++++ net/openvswitch/datapath.h | 2 ++ 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d120f9fe0017..07ef2c3e21fa 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -75,6 +75,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ + OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ __OVS_DP_ATTR_MAX }; @@ -106,6 +107,9 @@ struct ovs_vport_stats { __u64 tx_dropped; /* no space available in linux */ }; +/* Allow last Netlink attribute to be unaligned */ +#define OVS_DP_F_UNALIGNED (1 << 0) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 50d7782d8d7c..6a9b0cb8a1db 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1040,6 +1040,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1098,6 +1099,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, &dp_megaflow_stats)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) + goto nla_put_failure; + return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1144,6 +1148,12 @@ static struct datapath *lookup_datapath(struct net *net, return dp ? dp : ERR_PTR(-ENODEV); } +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ + if (a[OVS_DP_ATTR_USER_FEATURES]) + dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1202,6 +1212,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + ovs_dp_change(dp, a); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1305,6 +1317,8 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; + ovs_dp_change(dp, info->attrs); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index ba13be4a680f..62619a4ee649 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,6 +88,8 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif + + u32 user_features; }; /** -- cgit v1.2.3 From 44da5ae5fbea4686f667dc854e5ea16814e44c59 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:19 +0100 Subject: openvswitch: Drop user features if old user space attempted to create datapath Drop user features if an outdated user space instance that does not understand the concept of user_features attempted to create a new datapath. Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 10 +++++++++- net/openvswitch/datapath.c | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 07ef2c3e21fa..970553cbbc8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -40,7 +40,15 @@ struct ovs_header { #define OVS_DATAPATH_FAMILY "ovs_datapath" #define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 + +/* V2: + * - API users are expected to provide OVS_DP_ATTR_USER_FEATURES + * when creating the datapath. + */ +#define OVS_DATAPATH_VERSION 2 + +/* First OVS datapath version to support features */ +#define OVS_DP_VER_FEATURES 2 enum ovs_datapath_cmd { OVS_DP_CMD_UNSPEC, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6a9b0cb8a1db..497b2fd36df4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1148,6 +1148,18 @@ static struct datapath *lookup_datapath(struct net *net, return dp ? dp : ERR_PTR(-ENODEV); } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + if (!dp) + return; + + WARN(dp->user_features, "Dropping previously announced user features\n"); + dp->user_features = 0; +} + static void ovs_dp_change(struct datapath *dp, struct nlattr **a) { if (a[OVS_DP_ATTR_USER_FEATURES]) @@ -1220,6 +1232,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err == -EBUSY) err = -EEXIST; + if (err == -EEXIST) { + /* An outdated user space instance that does not understand + * the concept of user_features has attempted to create a new + * datapath and is likely to reuse it. Drop all user features. + */ + if (info->genlhdr->version < OVS_DP_VER_FEATURES) + ovs_dp_reset_user_features(skb, info); + } + goto err_destroy_ports_array; } -- cgit v1.2.3