summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2019-02-22 13:48:13 +0100
committerJohannes Berg <johannes.berg@intel.com>2019-02-22 13:48:13 +0100
commitb7b14ec1ebef35d22f3f4087816468f22c987f75 (patch)
tree3f99f4d7b770d7bba3ee84663b32f98dfbe7582d /net
parent77ff2c6b49843b01adef1f80abb091753e4c9c65 (diff)
parent7a25c6c0aac85bbc50d3ce49cd08888adb14508b (diff)
downloadlinux-b7b14ec1ebef35d22f3f4087816468f22c987f75.tar.bz2
Merge remote-tracking branch 'net-next/master' into mac80211-next
Merge net-next to resolve a conflict and to get the mac80211 rhashtable fixes so further patches can be applied on top. Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig2
-rw-r--r--net/atm/proc.c3
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/gateway_client.c1
-rw-r--r--net/batman-adv/gateway_common.c1
-rw-r--r--net/batman-adv/gateway_common.h6
-rw-r--r--net/batman-adv/hard-interface.c5
-rw-r--r--net/batman-adv/netlink.c1080
-rw-r--r--net/batman-adv/netlink.h6
-rw-r--r--net/batman-adv/soft-interface.c6
-rw-r--r--net/batman-adv/sysfs.c64
-rw-r--r--net/bridge/br_multicast.c10
-rw-r--r--net/bridge/br_switchdev.c20
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c10
-rw-r--r--net/caif/cfpkt_skbuff.c16
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c71
-rw-r--r--net/core/devlink.c1176
-rw-r--r--net/core/ethtool.c251
-rw-r--r--net/core/filter.c558
-rw-r--r--net/core/flow_offload.c153
-rw-r--r--net/core/lwt_bpf.c265
-rw-r--r--net/core/neighbour.c11
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net-traces.c8
-rw-r--r--net/core/page_pool.c22
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c22
-rw-r--r--net/dccp/ccid.h4
-rw-r--r--net/dsa/dsa2.c3
-rw-r--r--net/dsa/dsa_priv.h4
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/port.c45
-rw-r--r--net/dsa/slave.c43
-rw-r--r--net/dsa/tag_dsa.c9
-rw-r--r--net/dsa/tag_edsa.c9
-rw-r--r--net/dsa/tag_ksz.c2
-rw-r--r--net/ieee802154/6lowpan/reassembly.c141
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/igmp.c1
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ipmr.c88
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c7
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c9
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/addrconf_core.c6
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/ip6_gre.c41
-rw-r--r--net/ipv6/ip6_offload.c33
-rw-r--r--net/ipv6/ip6mr.c78
-rw-r--r--net/ipv6/mcast_snoop.c1
-rw-r--r--net/ipv6/netfilter.c17
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c18
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c21
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c9
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/l2tp/l2tp_core.c9
-rw-r--r--net/l2tp/l2tp_core.h20
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ip6.c3
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c6
-rw-r--r--net/mac80211/mesh.h6
-rw-r--r--net/mac80211/mesh_pathtbl.c157
-rw-r--r--net/mac80211/tx.c12
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mpls/mpls_iptunnel.c4
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c17
-rw-r--r--net/netfilter/nf_conntrack_core.c30
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_tables_api.c89
-rw-r--r--net/netfilter/nft_compat.c65
-rw-r--r--net/netfilter/nft_dynset.c18
-rw-r--r--net/netfilter/nft_immediate.c6
-rw-r--r--net/netfilter/nft_lookup.c18
-rw-r--r--net/netfilter/nft_objref.c18
-rw-r--r--net/netfilter/nft_tunnel.c34
-rw-r--r--net/netfilter/utils.c6
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_addrtype.c16
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rds/af_rds.c37
-rw-r--r--net/rds/bind.c6
-rw-r--r--net/rds/connection.c21
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h4
-rw-r--r--net/rds/ib_cm.c72
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/rdma_transport.c14
-rw-r--r--net/rds/rdma_transport.h6
-rw-r--r--net/rds/rds.h14
-rw-r--r--net/rds/recv.c1
-rw-r--r--net/rds/send.c7
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rds/threads.c1
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/recvmsg.c3
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c1331
-rw-r--r--net/sched/cls_basic.c14
-rw-r--r--net/sched/cls_bpf.c15
-rw-r--r--net/sched/cls_cgroup.c15
-rw-r--r--net/sched/cls_flow.c15
-rw-r--r--net/sched/cls_flower.c101
-rw-r--r--net/sched/cls_fw.c20
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/cls_route.c19
-rw-r--r--net/sched/cls_rsvp.h16
-rw-r--r--net/sched/cls_tcindex.c93
-rw-r--r--net/sched/cls_u32.c14
-rw-r--r--net/sched/sch_api.c23
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/offload.c1
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/stream.c24
-rw-r--r--net/smc/af_smc.c124
-rw-r--r--net/smc/smc_cdc.c52
-rw-r--r--net/smc/smc_cdc.h62
-rw-r--r--net/smc/smc_clc.c2
-rw-r--r--net/smc/smc_close.c16
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_core.h20
-rw-r--r--net/smc/smc_diag.c3
-rw-r--r--net/smc/smc_ib.c31
-rw-r--r--net/smc/smc_ib.h2
-rw-r--r--net/smc/smc_llc.c3
-rw-r--r--net/smc/smc_netns.h20
-rw-r--r--net/smc/smc_pnet.c631
-rw-r--r--net/smc/smc_pnet.h13
-rw-r--r--net/smc/smc_tx.c81
-rw-r--r--net/smc/smc_wr.c46
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/socket.c82
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c49
-rw-r--r--net/sunrpc/debugfs.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c105
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c9
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/switchdev/switchdev.c20
-rw-r--r--net/tipc/link.c17
-rw-r--r--net/tipc/msg.h22
-rw-r--r--net/tipc/node.c11
-rw-r--r--net/tls/tls_device.c24
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/tls/tls_sw.c198
-rw-r--r--net/vmw_vsock/virtio_transport.c29
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/wireless/ap.c2
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/pmsr.c26
-rw-r--r--net/wireless/sme.c2
-rw-r--r--net/wireless/util.c35
-rw-r--r--net/x25/af_x25.c6
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c4
194 files changed, 6820 insertions, 2065 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 5cb9de1aaf88..62da6148e9f8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -403,7 +403,7 @@ config LWTUNNEL
config LWTUNNEL_BPF
bool "Execute BPF program as route nexthop action"
- depends on LWTUNNEL
+ depends on LWTUNNEL && INET
default y if LWTUNNEL=y
---help---
Allows to run BPF programs as a nexthop action following a route
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 0b0495a41bbe..d79221fd4dae 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v)
static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
v = vcc_walk(seq, 1);
- *pos += !!PTR_ERR(v);
+ if (v)
+ (*pos)++;
return v;
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 7b80f6f8d4dc..a9b7919c9de5 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+ /* free the TID stats immediately */
+ cfg80211_sinfo_release_content(&sinfo);
+
dev_put(real_netdev);
if (ret == -ENOENT) {
/* Node is not associated anymore! It would be
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 899ab051ffce..310a4f353008 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1711,6 +1711,8 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr,
batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT);
batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
+ consume_skb(skb);
+
batadv_dbg(BATADV_DBG_DAT, bat_priv,
"Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n",
&ip_dst, hw_dst, batadv_print_vid(vid));
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 0a6b9b30eabd..f5811f61aa92 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -47,7 +47,6 @@
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
-#include "gateway_common.h"
#include "hard-interface.h"
#include "log.h"
#include "netlink.h"
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 53d03adc9bfc..e064de45e22c 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -28,6 +28,7 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "gateway_client.h"
#include "log.h"
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 89bae100a0b0..128467a0fb89 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -25,12 +25,6 @@
struct net_device;
-enum batadv_gw_modes {
- BATADV_GW_MODE_OFF,
- BATADV_GW_MODE_CLIENT,
- BATADV_GW_MODE_SERVER,
-};
-
/**
* enum batadv_bandwidth_units - bandwidth unit types
*/
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 28c1fb8d1af0..96ef7c70b4d9 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -20,7 +20,6 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/gfp.h>
@@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
parent_dev = __dev_get_by_index((struct net *)parent_net,
dev_get_iflink(net_dev));
/* if we got a NULL parent_dev there is something broken.. */
- if (WARN(!parent_dev, "Cannot find parent device"))
+ if (!parent_dev) {
+ pr_err("Cannot find parent device\n");
return false;
+ }
if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
return false;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 5fe833cc9507..67a58da2e6a0 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -20,13 +20,17 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
+#include <linux/err.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
@@ -47,21 +51,54 @@
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
+#include "gateway_common.h"
#include "hard-interface.h"
+#include "log.h"
#include "multicast.h"
+#include "network-coding.h"
#include "originator.h"
#include "soft-interface.h"
#include "tp_meter.h"
#include "translation-table.h"
+struct net;
+
struct genl_family batadv_netlink_family;
/* multicast groups */
enum batadv_netlink_multicast_groups {
+ BATADV_NL_MCGRP_CONFIG,
BATADV_NL_MCGRP_TPMETER,
};
+/**
+ * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags
+ */
+enum batadv_genl_ops_flags {
+ /**
+ * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in
+ * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be
+ * saved in info->user_ptr[0]
+ */
+ BATADV_FLAG_NEED_MESH = BIT(0),
+
+ /**
+ * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in
+ * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be
+ * saved in info->user_ptr[1]
+ */
+ BATADV_FLAG_NEED_HARDIF = BIT(1),
+
+ /**
+ * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in
+ * attribute BATADV_ATTR_VLANID and expects a pointer to it to be
+ * saved in info->user_ptr[1]
+ */
+ BATADV_FLAG_NEED_VLAN = BIT(2),
+};
+
static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
+ [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG },
[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
};
@@ -104,6 +141,26 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
[BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 },
[BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 },
[BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 },
+ [BATADV_ATTR_VLANID] = { .type = NLA_U16 },
+ [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 },
+ [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 },
+ [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 },
+ [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 },
+ [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 },
+ [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 },
+ [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 },
+ [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 },
+ [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 },
+ [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 },
+ [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 },
+ [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 },
};
/**
@@ -122,20 +179,75 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
}
/**
- * batadv_netlink_mesh_info_put() - fill in generic information about mesh
- * interface
- * @msg: netlink message to be sent back
- * @soft_iface: interface for which the data should be taken
+ * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
*
- * Return: 0 on success, < 0 on error
+ * Return: 0 on success or negative error number in case of failure
*/
-static int
-batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg,
+ struct batadv_priv *bat_priv)
+{
+ struct batadv_softif_vlan *vlan;
+ u8 ap_isolation;
+
+ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+ if (!vlan)
+ return 0;
+
+ ap_isolation = atomic_read(&vlan->ap_isolation);
+ batadv_softif_vlan_put(vlan);
+
+ return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+ !!ap_isolation);
+}
+
+/**
+ * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg
+ * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr,
+ struct batadv_priv *bat_priv)
+{
+ struct batadv_softif_vlan *vlan;
+
+ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+ if (!vlan)
+ return -ENOENT;
+
+ atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+ batadv_softif_vlan_put(vlan);
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_mesh_fill() - Fill message with mesh attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_mesh_fill(struct sk_buff *msg,
+ struct batadv_priv *bat_priv,
+ enum batadv_nl_commands cmd,
+ u32 portid, u32 seq, int flags)
{
- struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ struct net_device *soft_iface = bat_priv->soft_iface;
struct batadv_hard_iface *primary_if = NULL;
struct net_device *hard_iface;
- int ret = -ENOBUFS;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+ if (!hdr)
+ return -ENOBUFS;
if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
@@ -146,16 +258,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
soft_iface->dev_addr) ||
nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
(u8)atomic_read(&bat_priv->tt.vn)))
- goto out;
+ goto nla_put_failure;
#ifdef CONFIG_BATMAN_ADV_BLA
if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
ntohs(bat_priv->bla.claim_dest.group)))
- goto out;
+ goto nla_put_failure;
#endif
if (batadv_mcast_mesh_info_put(msg, bat_priv))
- goto out;
+ goto nla_put_failure;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -167,77 +279,345 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
hard_iface->name) ||
nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
hard_iface->dev_addr))
- goto out;
+ goto nla_put_failure;
}
- ret = 0;
+ if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+ !!atomic_read(&bat_priv->aggregated_ogms)))
+ goto nla_put_failure;
+
+ if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK,
+ bat_priv->isolation_mark))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK,
+ bat_priv->isolation_mark_mask))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED,
+ !!atomic_read(&bat_priv->bonding)))
+ goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+ if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+ !!atomic_read(&bat_priv->bridge_loop_avoidance)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+ if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+ !!atomic_read(&bat_priv->distributed_arp_table)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+ if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED,
+ !!atomic_read(&bat_priv->fragmentation)))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN,
+ atomic_read(&bat_priv->gw.bandwidth_down)))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP,
+ atomic_read(&bat_priv->gw.bandwidth_up)))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, BATADV_ATTR_GW_MODE,
+ atomic_read(&bat_priv->gw.mode)))
+ goto nla_put_failure;
+
+ if (bat_priv->algo_ops->gw.get_best_gw_node &&
+ bat_priv->algo_ops->gw.is_eligible) {
+ /* GW selection class is not available if the routing algorithm
+ * in use does not implement the GW API
+ */
+ if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS,
+ atomic_read(&bat_priv->gw.sel_class)))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+ atomic_read(&bat_priv->hop_penalty)))
+ goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+ if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL,
+ atomic_read(&bat_priv->log_level)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+ !atomic_read(&bat_priv->multicast_mode)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+ if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED,
+ !!atomic_read(&bat_priv->network_coding)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+ if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL,
+ atomic_read(&bat_priv->orig_interval)))
+ goto nla_put_failure;
- out:
if (primary_if)
batadv_hardif_put(primary_if);
- return ret;
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
}
/**
- * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
- * netlink request
- * @skb: received netlink message
- * @info: receiver information
+ * batadv_netlink_notify_mesh() - send softif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success, < 0 on error
*/
-static int
-batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
{
- struct net *net = genl_info_net(info);
- struct net_device *soft_iface;
- struct sk_buff *msg = NULL;
- void *msg_head;
- int ifindex;
+ struct sk_buff *msg;
int ret;
- if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
- return -EINVAL;
-
- ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
- if (!ifindex)
- return -EINVAL;
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
- soft_iface = dev_get_by_index(net, ifindex);
- if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
- ret = -ENODEV;
- goto out;
+ ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH,
+ 0, 0, 0);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
}
+ genlmsg_multicast_netns(&batadv_netlink_family,
+ dev_net(bat_priv->soft_iface), msg, 0,
+ BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_get_mesh() - Get softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct sk_buff *msg;
+ int ret;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- ret = -ENOMEM;
- goto out;
+ if (!msg)
+ return -ENOMEM;
+
+ ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH,
+ info->snd_portid, info->snd_seq, 0);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
}
- msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &batadv_netlink_family, 0,
- BATADV_CMD_GET_MESH_INFO);
- if (!msg_head) {
- ret = -ENOBUFS;
- goto out;
+ ret = genlmsg_reply(msg, info);
+
+ return ret;
+}
+
+/**
+ * batadv_netlink_set_mesh() - Set softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct nlattr *attr;
+
+ if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED];
+
+ atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr));
}
- ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+ if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
- out:
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_netlink_set_mesh_ap_isolation(attr, bat_priv);
+ }
- if (ret) {
- if (msg)
- nlmsg_free(msg);
- return ret;
+ if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) {
+ attr = info->attrs[BATADV_ATTR_ISOLATION_MARK];
+
+ bat_priv->isolation_mark = nla_get_u32(attr);
}
- genlmsg_end(msg, msg_head);
- return genlmsg_reply(msg, info);
+ if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) {
+ attr = info->attrs[BATADV_ATTR_ISOLATION_MASK];
+
+ bat_priv->isolation_mark_mask = nla_get_u32(attr);
+ }
+
+ if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_BONDING_ENABLED];
+
+ atomic_set(&bat_priv->bonding, !!nla_get_u8(attr));
+ }
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+ if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED];
+
+ atomic_set(&bat_priv->bridge_loop_avoidance,
+ !!nla_get_u8(attr));
+ batadv_bla_status_update(bat_priv->soft_iface);
+ }
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+ if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED];
+
+ atomic_set(&bat_priv->distributed_arp_table,
+ !!nla_get_u8(attr));
+ batadv_dat_status_update(bat_priv->soft_iface);
+ }
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+ if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
+
+ atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+ batadv_update_min_mtu(bat_priv->soft_iface);
+ }
+
+ if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
+ attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN];
+
+ atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr));
+ batadv_gw_tvlv_container_update(bat_priv);
+ }
+
+ if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) {
+ attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP];
+
+ atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr));
+ batadv_gw_tvlv_container_update(bat_priv);
+ }
+
+ if (info->attrs[BATADV_ATTR_GW_MODE]) {
+ u8 gw_mode;
+
+ attr = info->attrs[BATADV_ATTR_GW_MODE];
+ gw_mode = nla_get_u8(attr);
+
+ if (gw_mode <= BATADV_GW_MODE_SERVER) {
+ /* Invoking batadv_gw_reselect() is not enough to really
+ * de-select the current GW. It will only instruct the
+ * gateway client code to perform a re-election the next
+ * time that this is needed.
+ *
+ * When gw client mode is being switched off the current
+ * GW must be de-selected explicitly otherwise no GW_ADD
+ * uevent is thrown on client mode re-activation. This
+ * is operation is performed in
+ * batadv_gw_check_client_stop().
+ */
+ batadv_gw_reselect(bat_priv);
+
+ /* always call batadv_gw_check_client_stop() before
+ * changing the gateway state
+ */
+ batadv_gw_check_client_stop(bat_priv);
+ atomic_set(&bat_priv->gw.mode, gw_mode);
+ batadv_gw_tvlv_container_update(bat_priv);
+ }
+ }
+
+ if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] &&
+ bat_priv->algo_ops->gw.get_best_gw_node &&
+ bat_priv->algo_ops->gw.is_eligible) {
+ /* setting the GW selection class is allowed only if the routing
+ * algorithm in use implements the GW API
+ */
+
+ u32 sel_class_max = 0xffffffffu;
+ u32 sel_class;
+
+ attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
+ sel_class = nla_get_u32(attr);
+
+ if (!bat_priv->algo_ops->gw.store_sel_class)
+ sel_class_max = BATADV_TQ_MAX_VALUE;
+
+ if (sel_class >= 1 && sel_class <= sel_class_max) {
+ atomic_set(&bat_priv->gw.sel_class, sel_class);
+ batadv_gw_reselect(bat_priv);
+ }
+ }
+
+ if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+ attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+ atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr));
+ }
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+ if (info->attrs[BATADV_ATTR_LOG_LEVEL]) {
+ attr = info->attrs[BATADV_ATTR_LOG_LEVEL];
+
+ atomic_set(&bat_priv->log_level,
+ nla_get_u32(attr) & BATADV_DBG_ALL);
+ }
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED];
+
+ atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr));
+ }
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+ if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED];
+
+ atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr));
+ batadv_nc_status_update(bat_priv->soft_iface);
+ }
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+ if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) {
+ u32 orig_interval;
+
+ attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL];
+ orig_interval = nla_get_u32(attr);
+
+ orig_interval = min_t(u32, orig_interval, INT_MAX);
+ orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER);
+
+ atomic_set(&bat_priv->orig_interval, orig_interval);
+ }
+
+ batadv_netlink_notify_mesh(bat_priv);
+
+ return 0;
}
/**
@@ -329,40 +709,24 @@ err_genlmsg:
static int
batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
{
- struct net *net = genl_info_net(info);
- struct net_device *soft_iface;
- struct batadv_priv *bat_priv;
+ struct batadv_priv *bat_priv = info->user_ptr[0];
struct sk_buff *msg = NULL;
u32 test_length;
void *msg_head;
- int ifindex;
u32 cookie;
u8 *dst;
int ret;
- if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
- return -EINVAL;
-
if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
return -EINVAL;
if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
return -EINVAL;
- ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
- if (!ifindex)
- return -EINVAL;
-
dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
- soft_iface = dev_get_by_index(net, ifindex);
- if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
- ret = -ENODEV;
- goto out;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
@@ -377,15 +741,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- bat_priv = netdev_priv(soft_iface);
batadv_tp_start(bat_priv, dst, test_length, &cookie);
ret = batadv_netlink_tp_meter_put(msg, cookie);
out:
- if (soft_iface)
- dev_put(soft_iface);
-
if (ret) {
if (msg)
nlmsg_free(msg);
@@ -406,65 +766,53 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
static int
batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
{
- struct net *net = genl_info_net(info);
- struct net_device *soft_iface;
- struct batadv_priv *bat_priv;
- int ifindex;
+ struct batadv_priv *bat_priv = info->user_ptr[0];
u8 *dst;
int ret = 0;
- if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
- return -EINVAL;
-
if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
return -EINVAL;
- ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
- if (!ifindex)
- return -EINVAL;
-
dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
- soft_iface = dev_get_by_index(net, ifindex);
- if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
- ret = -ENODEV;
- goto out;
- }
-
- bat_priv = netdev_priv(soft_iface);
batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
-out:
- if (soft_iface)
- dev_put(soft_iface);
-
return ret;
}
/**
- * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
+ * batadv_netlink_hardif_fill() - Fill message with hardif attributes
* @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ * @cmd: type of message to generate
* @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
* @cb: Control block containing additional options
- * @hard_iface: Hard interface to dump
*
- * Return: error code, or 0 on success
+ * Return: 0 on success or negative error number in case of failure
*/
-static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
- struct netlink_callback *cb,
- struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_hardif_fill(struct sk_buff *msg,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface,
+ enum batadv_nl_commands cmd,
+ u32 portid, u32 seq, int flags,
+ struct netlink_callback *cb)
{
struct net_device *net_dev = hard_iface->net_dev;
void *hdr;
- hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
- &batadv_netlink_family, NLM_F_MULTI,
- BATADV_CMD_GET_HARDIFS);
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
if (!hdr)
- return -EMSGSIZE;
+ return -ENOBUFS;
+
+ if (cb)
+ genl_dump_check_consistent(cb, hdr);
- genl_dump_check_consistent(cb, hdr);
+ if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+ bat_priv->soft_iface->ifindex))
+ goto nla_put_failure;
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
net_dev->ifindex) ||
@@ -479,27 +827,137 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
goto nla_put_failure;
}
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
+ atomic_read(&hard_iface->bat_v.elp_interval)))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE,
+ atomic_read(&hard_iface->bat_v.throughput_override)))
+ goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
genlmsg_end(msg, hdr);
return 0;
- nla_put_failure:
+nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
}
/**
- * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
+ * batadv_netlink_notify_hardif() - send hardif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
+{
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+ BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_multicast_netns(&batadv_netlink_family,
+ dev_net(bat_priv->soft_iface), msg, 0,
+ BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_get_hardif() - Get hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_hardif(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+ BATADV_CMD_GET_HARDIF,
+ info->snd_portid, info->snd_seq, 0,
+ NULL);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ ret = genlmsg_reply(msg, info);
+
+ return ret;
+}
+
+/**
+ * batadv_netlink_set_hardif() - Set hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_hardif(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ struct nlattr *attr;
+
+ if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
+ attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
+
+ atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr));
+ }
+
+ if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) {
+ attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE];
+
+ atomic_set(&hard_iface->bat_v.throughput_override,
+ nla_get_u32(attr));
+ }
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+ batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_dump_hardif() - Dump all hard interface into a messages
* @msg: Netlink message to dump into
* @cb: Parameters from query
*
* Return: error code, or length of reply message on success
*/
static int
-batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
{
struct net *net = sock_net(cb->skb->sk);
struct net_device *soft_iface;
struct batadv_hard_iface *hard_iface;
+ struct batadv_priv *bat_priv;
int ifindex;
int portid = NETLINK_CB(cb->skb).portid;
int skip = cb->args[0];
@@ -519,6 +977,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
return -ENODEV;
}
+ bat_priv = netdev_priv(soft_iface);
+
rtnl_lock();
cb->seq = batadv_hardif_generation << 1 | 1;
@@ -529,8 +989,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
if (i++ < skip)
continue;
- if (batadv_netlink_dump_hardif_entry(msg, portid, cb,
- hard_iface)) {
+ if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+ BATADV_CMD_GET_HARDIF,
+ portid, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb)) {
i--;
break;
}
@@ -545,24 +1007,361 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
return msg->len;
}
+/**
+ * batadv_netlink_vlan_fill() - Fill message with vlan attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_vlan_fill(struct sk_buff *msg,
+ struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan,
+ enum batadv_nl_commands cmd,
+ u32 portid, u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+ bat_priv->soft_iface->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+ !!atomic_read(&vlan->ap_isolation)))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_notify_vlan() - send vlan attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
+{
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan,
+ BATADV_CMD_SET_VLAN, 0, 0, 0);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_multicast_netns(&batadv_netlink_family,
+ dev_net(bat_priv->soft_iface), msg, 0,
+ BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_get_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct batadv_softif_vlan *vlan = info->user_ptr[1];
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN,
+ info->snd_portid, info->snd_seq, 0);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ ret = genlmsg_reply(msg, info);
+
+ return ret;
+}
+
+/**
+ * batadv_netlink_set_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct batadv_softif_vlan *vlan = info->user_ptr[1];
+ struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct nlattr *attr;
+
+ if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+ attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+ atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+ }
+
+ batadv_netlink_notify_vlan(bat_priv, vlan);
+
+ return 0;
+}
+
+/**
+ * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to soft interface (with increased refcnt) on success, error
+ * pointer on error
+ */
+static struct net_device *
+batadv_get_softif_from_info(struct net *net, struct genl_info *info)
+{
+ struct net_device *soft_iface;
+ int ifindex;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return ERR_PTR(-EINVAL);
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface)
+ return ERR_PTR(-ENODEV);
+
+ if (!batadv_softif_is_valid(soft_iface))
+ goto err_put_softif;
+
+ return soft_iface;
+
+err_put_softif:
+ dev_put(soft_iface);
+
+ return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to hard interface (with increased refcnt) on success, error
+ * pointer on error
+ */
+static struct batadv_hard_iface *
+batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net,
+ struct genl_info *info)
+{
+ struct batadv_hard_iface *hard_iface;
+ struct net_device *hard_dev;
+ unsigned int hardif_index;
+
+ if (!info->attrs[BATADV_ATTR_HARD_IFINDEX])
+ return ERR_PTR(-EINVAL);
+
+ hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]);
+
+ hard_dev = dev_get_by_index(net, hardif_index);
+ if (!hard_dev)
+ return ERR_PTR(-ENODEV);
+
+ hard_iface = batadv_hardif_get_by_netdev(hard_dev);
+ if (!hard_iface)
+ goto err_put_harddev;
+
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ goto err_put_hardif;
+
+ /* hard_dev is referenced by hard_iface and not needed here */
+ dev_put(hard_dev);
+
+ return hard_iface;
+
+err_put_hardif:
+ batadv_hardif_put(hard_iface);
+err_put_harddev:
+ dev_put(hard_dev);
+
+ return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to vlan on success (with increased refcnt), error pointer
+ * on error
+ */
+static struct batadv_softif_vlan *
+batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net,
+ struct genl_info *info)
+{
+ struct batadv_softif_vlan *vlan;
+ u16 vid;
+
+ if (!info->attrs[BATADV_ATTR_VLANID])
+ return ERR_PTR(-EINVAL);
+
+ vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]);
+
+ vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG);
+ if (!vlan)
+ return ERR_PTR(-ENOENT);
+
+ return vlan;
+}
+
+/**
+ * batadv_pre_doit() - Prepare batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct batadv_hard_iface *hard_iface;
+ struct batadv_priv *bat_priv = NULL;
+ struct batadv_softif_vlan *vlan;
+ struct net_device *soft_iface;
+ u8 user_ptr1_flags;
+ u8 mesh_dep_flags;
+ int ret;
+
+ user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+ if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1))
+ return -EINVAL;
+
+ mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+ if (WARN_ON((ops->internal_flags & mesh_dep_flags) &&
+ (~ops->internal_flags & BATADV_FLAG_NEED_MESH)))
+ return -EINVAL;
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_MESH) {
+ soft_iface = batadv_get_softif_from_info(net, info);
+ if (IS_ERR(soft_iface))
+ return PTR_ERR(soft_iface);
+
+ bat_priv = netdev_priv(soft_iface);
+ info->user_ptr[0] = bat_priv;
+ }
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) {
+ hard_iface = batadv_get_hardif_from_info(bat_priv, net, info);
+ if (IS_ERR(hard_iface)) {
+ ret = PTR_ERR(hard_iface);
+ goto err_put_softif;
+ }
+
+ info->user_ptr[1] = hard_iface;
+ }
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) {
+ vlan = batadv_get_vlan_from_info(bat_priv, net, info);
+ if (IS_ERR(vlan)) {
+ ret = PTR_ERR(vlan);
+ goto err_put_softif;
+ }
+
+ info->user_ptr[1] = vlan;
+ }
+
+ return 0;
+
+err_put_softif:
+ if (bat_priv)
+ dev_put(bat_priv->soft_iface);
+
+ return ret;
+}
+
+/**
+ * batadv_post_doit() - End batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ */
+static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct batadv_hard_iface *hard_iface;
+ struct batadv_softif_vlan *vlan;
+ struct batadv_priv *bat_priv;
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF &&
+ info->user_ptr[1]) {
+ hard_iface = info->user_ptr[1];
+
+ batadv_hardif_put(hard_iface);
+ }
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) {
+ vlan = info->user_ptr[1];
+ batadv_softif_vlan_put(vlan);
+ }
+
+ if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) {
+ bat_priv = info->user_ptr[0];
+ dev_put(bat_priv->soft_iface);
+ }
+}
+
static const struct genl_ops batadv_netlink_ops[] = {
{
- .cmd = BATADV_CMD_GET_MESH_INFO,
- .flags = GENL_ADMIN_PERM,
+ .cmd = BATADV_CMD_GET_MESH,
+ /* can be retrieved by unprivileged users */
.policy = batadv_netlink_policy,
- .doit = batadv_netlink_get_mesh_info,
+ .doit = batadv_netlink_get_mesh,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
},
{
.cmd = BATADV_CMD_TP_METER,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.doit = batadv_netlink_tp_meter_start,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
},
{
.cmd = BATADV_CMD_TP_METER_CANCEL,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.doit = batadv_netlink_tp_meter_cancel,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
},
{
.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
@@ -571,10 +1370,13 @@ static const struct genl_ops batadv_netlink_ops[] = {
.dumpit = batadv_algo_dump,
},
{
- .cmd = BATADV_CMD_GET_HARDIFS,
- .flags = GENL_ADMIN_PERM,
+ .cmd = BATADV_CMD_GET_HARDIF,
+ /* can be retrieved by unprivileged users */
.policy = batadv_netlink_policy,
- .dumpit = batadv_netlink_dump_hardifs,
+ .dumpit = batadv_netlink_dump_hardif,
+ .doit = batadv_netlink_get_hardif,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_HARDIF,
},
{
.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
@@ -630,7 +1432,37 @@ static const struct genl_ops batadv_netlink_ops[] = {
.policy = batadv_netlink_policy,
.dumpit = batadv_mcast_flags_dump,
},
-
+ {
+ .cmd = BATADV_CMD_SET_MESH,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_set_mesh,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
+ },
+ {
+ .cmd = BATADV_CMD_SET_HARDIF,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_set_hardif,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_HARDIF,
+ },
+ {
+ .cmd = BATADV_CMD_GET_VLAN,
+ /* can be retrieved by unprivileged users */
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_get_vlan,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_VLAN,
+ },
+ {
+ .cmd = BATADV_CMD_SET_VLAN,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_set_vlan,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_VLAN,
+ },
};
struct genl_family batadv_netlink_family __ro_after_init = {
@@ -639,6 +1471,8 @@ struct genl_family batadv_netlink_family __ro_after_init = {
.version = 1,
.maxattr = BATADV_ATTR_MAX,
.netnsok = true,
+ .pre_doit = batadv_pre_doit,
+ .post_doit = batadv_post_doit,
.module = THIS_MODULE,
.ops = batadv_netlink_ops,
.n_ops = ARRAY_SIZE(batadv_netlink_ops),
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 216484b8b82d..7273368544fc 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie);
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface);
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan);
+
extern struct genl_family batadv_netlink_family;
#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b14fb3462af7..2e367230376b 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -50,13 +50,13 @@
#include <linux/string.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
-#include "gateway_common.h"
#include "hard-interface.h"
#include "multicast.h"
#include "network-coding.h"
@@ -222,12 +222,16 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
+
+ skb_reset_mac_header(skb);
ethhdr = eth_hdr(skb);
proto = ethhdr->h_proto;
switch (ntohs(proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, sizeof(*vhdr)))
+ goto dropped;
vhdr = vlan_eth_hdr(skb);
proto = vhdr->h_vlan_encapsulated_proto;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e1b816262c53..0b4b3fb778a6 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -40,6 +40,7 @@
#include <linux/stringify.h>
#include <linux/workqueue.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
@@ -47,6 +48,7 @@
#include "gateway_common.h"
#include "hard-interface.h"
#include "log.h"
+#include "netlink.h"
#include "network-coding.h"
#include "soft-interface.h"
@@ -153,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
{ \
struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
struct batadv_priv *bat_priv = netdev_priv(net_dev); \
+ ssize_t length; \
+ \
+ length = __batadv_store_bool_attr(buff, count, _post_func, attr,\
+ &bat_priv->_name, net_dev); \
\
- return __batadv_store_bool_attr(buff, count, _post_func, attr, \
- &bat_priv->_name, net_dev); \
+ batadv_netlink_notify_mesh(bat_priv); \
+ \
+ return length; \
}
#define BATADV_ATTR_SIF_SHOW_BOOL(_name) \
@@ -185,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
{ \
struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
struct batadv_priv *bat_priv = netdev_priv(net_dev); \
+ ssize_t length; \
\
- return __batadv_store_uint_attr(buff, count, _min, _max, \
- _post_func, attr, \
- &bat_priv->_var, net_dev, \
- NULL); \
+ length = __batadv_store_uint_attr(buff, count, _min, _max, \
+ _post_func, attr, \
+ &bat_priv->_var, net_dev, \
+ NULL); \
+ \
+ batadv_netlink_notify_mesh(bat_priv); \
+ \
+ return length; \
}
#define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
@@ -222,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
attr, &vlan->_name, \
bat_priv->soft_iface); \
\
+ if (vlan->vid) \
+ batadv_netlink_notify_vlan(bat_priv, vlan); \
+ else \
+ batadv_netlink_notify_mesh(bat_priv); \
+ \
batadv_softif_vlan_put(vlan); \
return res; \
}
@@ -255,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
{ \
struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
struct batadv_hard_iface *hard_iface; \
+ struct batadv_priv *bat_priv; \
ssize_t length; \
\
hard_iface = batadv_hardif_get_by_netdev(net_dev); \
@@ -267,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
hard_iface->soft_iface, \
net_dev); \
\
+ if (hard_iface->soft_iface) { \
+ bat_priv = netdev_priv(hard_iface->soft_iface); \
+ batadv_netlink_notify_hardif(bat_priv, hard_iface); \
+ } \
+ \
batadv_hardif_put(hard_iface); \
return length; \
}
@@ -536,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
batadv_gw_check_client_stop(bat_priv);
atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
batadv_gw_tvlv_container_update(bat_priv);
+
+ batadv_netlink_notify_mesh(bat_priv);
+
return count;
}
@@ -562,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
size_t count)
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+ ssize_t length;
/* setting the GW selection class is allowed only if the routing
* algorithm in use implements the GW API
@@ -577,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
count);
- return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
- batadv_post_gw_reselect, attr,
- &bat_priv->gw.sel_class,
- bat_priv->soft_iface, NULL);
+ length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+ batadv_post_gw_reselect, attr,
+ &bat_priv->gw.sel_class,
+ bat_priv->soft_iface, NULL);
+
+ batadv_netlink_notify_mesh(bat_priv);
+
+ return length;
}
static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
@@ -600,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
struct attribute *attr, char *buff,
size_t count)
{
+ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+ ssize_t length;
if (buff[count - 1] == '\n')
buff[count - 1] = '\0';
- return batadv_gw_bandwidth_set(net_dev, buff, count);
+ length = batadv_gw_bandwidth_set(net_dev, buff, count);
+
+ batadv_netlink_notify_mesh(bat_priv);
+
+ return length;
}
/**
@@ -673,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
"New skb mark for extended isolation: %#.8x/%#.8x\n",
bat_priv->isolation_mark, bat_priv->isolation_mark_mask);
+ batadv_netlink_notify_mesh(bat_priv);
+
return count;
}
@@ -1077,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
struct attribute *attr,
char *buff, size_t count)
{
+ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
struct batadv_hard_iface *hard_iface;
u32 tp_override;
@@ -1107,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
+ batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
out:
batadv_hardif_put(hard_iface);
return count;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 1fb885a33c66..fe9f2d8ca2c1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
if (!nsrcs)
return -EINVAL;
- grec_len = sizeof(*grec) +
- sizeof(struct in6_addr) * ntohs(*nsrcs);
+ grec_len = struct_size(grec, grec_src, ntohs(*nsrcs));
if (!ipv6_mc_may_pull(skb, len + grec_len))
return -EINVAL;
@@ -1616,12 +1615,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (ip_hdr(skb)->protocol == IPPROTO_PIM)
br_multicast_pim(br, port, skb);
} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
- err = br_ip4_multicast_mrd_rcv(br, port, skb);
-
- if (err < 0 && err != -ENOMSG) {
- br_multicast_err_count(br, port, skb->protocol);
- return err;
- }
+ br_ip4_multicast_mrd_rcv(br, port, skb);
}
return 0;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 4d2b9eb7604a..af57c4a2b78a 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -14,7 +14,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
/* dev is yet to be added to the port list. */
list_for_each_entry(p, &br->port_list, list) {
- if (switchdev_port_same_parent_id(dev, p->dev))
+ if (netdev_port_same_parent_id(dev, p->dev))
return p->offload_fwd_mark;
}
@@ -23,15 +23,12 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
int nbp_switchdev_mark_set(struct net_bridge_port *p)
{
- struct switchdev_attr attr = {
- .orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- };
+ struct netdev_phys_item_id ppid = { };
int err;
ASSERT_RTNL();
- err = switchdev_port_attr_get(p->dev, &attr);
+ err = dev_get_port_parent_id(p->dev, &ppid, true);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -67,21 +64,19 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
+ .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
+ .u.brport_flags = mask,
};
int err;
if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
return 0;
- err = switchdev_port_attr_get(p->dev, &attr);
+ err = switchdev_port_attr_set(p->dev, &attr);
if (err == -EOPNOTSUPP)
return 0;
- if (err)
- return err;
- /* Check if specific bridge flag attribute offload is supported */
- if (!(attr.u.brport_flags_support & mask)) {
+ if (err) {
br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
(unsigned int)p->port_no, p->dev->name);
return -EOPNOTSUPP;
@@ -90,6 +85,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
attr.flags = SWITCHDEV_F_DEFER;
attr.u.brport_flags = flags;
+
err = switchdev_port_attr_set(p->dev, &attr);
if (err) {
br_warn(p->br, "error setting offload flag on port %u(%s)\n",
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 419e8edf23ba..1b1856744c80 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -125,13 +125,10 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
return;
- if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
- ip_hdr(oldskb)->protocol == IPPROTO_UDP)
- proto = ip_hdr(oldskb)->protocol;
- else
- proto = 0;
+ proto = ip_hdr(oldskb)->protocol;
if (!skb_csum_unnecessary(oldskb) &&
+ nf_reject_verify_csum(proto) &&
nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return;
@@ -234,6 +231,9 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
+ if (!nf_reject_verify_csum(proto))
+ return true;
+
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 38c2b7a890dd..37ac5ca0ffdf 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -319,16 +319,12 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
if (tmppkt == NULL)
return NULL;
tmp = pkt_to_skb(tmppkt);
- skb_set_tail_pointer(tmp, dstlen);
- tmp->len = dstlen;
- memcpy(tmp->data, dst->data, dstlen);
+ skb_put_data(tmp, dst->data, dstlen);
cfpkt_destroy(dstpkt);
dst = tmp;
}
- memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
+ skb_put_data(dst, add->data, skb_headlen(add));
cfpkt_destroy(addpkt);
- dst->tail += addlen;
- dst->len += addlen;
return skb_to_pkt(dst);
}
@@ -359,13 +355,11 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
if (skb2 == NULL)
return NULL;
+ skb_put_data(skb2, split, len2nd);
+
/* Reduce the length of the original packet */
- skb_set_tail_pointer(skb, pos);
- skb->len = pos;
+ skb_trim(skb, pos);
- memcpy(skb2->data, split, len2nd);
- skb2->tail += len2nd;
- skb2->len += len2nd;
skb2->priority = skb->priority;
return skb_to_pkt(skb2);
}
diff --git a/net/core/Makefile b/net/core/Makefile
index fccd31e0e7f7..f97d6254e564 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o xdp.o
+ fib_notifier.o xdp.o flow_offload.o
obj-y += net-sysfs.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o
diff --git a/net/core/dev.c b/net/core/dev.c
index bfa4be42afff..a3d13f5e2bfc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7878,6 +7878,63 @@ int dev_get_phys_port_name(struct net_device *dev,
EXPORT_SYMBOL(dev_get_phys_port_name);
/**
+ * dev_get_port_parent_id - Get the device's port parent identifier
+ * @dev: network device
+ * @ppid: pointer to a storage for the port's parent identifier
+ * @recurse: allow/disallow recursion to lower devices
+ *
+ * Get the devices's port parent identifier
+ */
+int dev_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid,
+ bool recurse)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct netdev_phys_item_id first = { };
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops->ndo_get_port_parent_id)
+ return ops->ndo_get_port_parent_id(dev, ppid);
+
+ if (!recurse)
+ return err;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+ if (err)
+ break;
+ if (!first.id_len)
+ first = *ppid;
+ else if (memcmp(&first, ppid, sizeof(*ppid)))
+ return -ENODATA;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(dev_get_port_parent_id);
+
+/**
+ * netdev_port_same_parent_id - Indicate if two network devices have
+ * the same port parent identifier
+ * @a: first network device
+ * @b: second network device
+ */
+bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
+{
+ struct netdev_phys_item_id a_id = { };
+ struct netdev_phys_item_id b_id = { };
+
+ if (dev_get_port_parent_id(a, &a_id, true) ||
+ dev_get_port_parent_id(b, &b_id, true))
+ return false;
+
+ return netdev_phys_item_id_same(&a_id, &b_id);
+}
+EXPORT_SYMBOL(netdev_port_same_parent_id);
+
+/**
* dev_change_proto_down - update protocol port state information
* @dev: device
* @proto_down: new value
@@ -7976,11 +8033,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
enum bpf_netdev_command query;
struct bpf_prog *prog = NULL;
bpf_op_t bpf_op, bpf_chk;
+ bool offload;
int err;
ASSERT_RTNL();
- query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+ offload = flags & XDP_FLAGS_HW_MODE;
+ query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
bpf_op = bpf_chk = ops->ndo_bpf;
if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
@@ -7993,8 +8052,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
- __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) {
+ if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
return -EEXIST;
}
@@ -8009,8 +8067,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (!(flags & XDP_FLAGS_HW_MODE) &&
- bpf_prog_is_dev_bound(prog->aux)) {
+ if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
bpf_prog_put(prog);
return -EINVAL;
@@ -8158,7 +8215,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
netdev_features_t feature;
int feature_bit;
- for_each_netdev_feature(&upper_disables, feature_bit) {
+ for_each_netdev_feature(upper_disables, feature_bit) {
feature = __NETIF_F_BIT(feature_bit);
if (!(upper->wanted_features & feature)
&& (features & feature)) {
@@ -8178,7 +8235,7 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_features_t feature;
int feature_bit;
- for_each_netdev_feature(&upper_disables, feature_bit) {
+ for_each_netdev_feature(upper_disables, feature_bit) {
feature = __NETIF_F_BIT(feature_bit);
if (!(features & feature) && (lower->features & feature)) {
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 52bf27491fb8..4f31ddc883e7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -81,6 +81,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
static LIST_HEAD(devlink_list);
@@ -115,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
+ lockdep_assert_held(&devlink_mutex);
+
list_for_each_entry(devlink, &devlink_list, list) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0 &&
@@ -2659,6 +2662,27 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
return devlink->ops->reload(devlink, info->extack);
}
+static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *file_name, *component;
+ struct nlattr *nla_component;
+
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
+ return -EINVAL;
+ file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
+
+ nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
+ component = nla_component ? nla_data(nla_component) : NULL;
+
+ return devlink->ops->flash_update(devlink, file_name, component,
+ info->extack);
+}
+
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -2700,11 +2724,6 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
},
- {
- .id = DEVLINK_PARAM_GENERIC_ID_WOL,
- .name = DEVLINK_PARAM_GENERIC_WOL_NAME,
- .type = DEVLINK_PARAM_GENERIC_WOL_TYPE,
- },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -2857,6 +2876,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
u32 portid, u32 seq, int flags)
{
union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+ bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
const struct devlink_param *param = param_item->param;
struct devlink_param_gset_ctx ctx;
struct nlattr *param_values_list;
@@ -2875,12 +2895,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
return -EOPNOTSUPP;
param_value[i] = param_item->driverinit_value;
} else {
+ if (!param_item->published)
+ continue;
ctx.cmode = i;
err = devlink_param_get(devlink, param, &ctx);
if (err)
return err;
param_value[i] = ctx.val;
}
+ param_value_set[i] = true;
}
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2915,7 +2938,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
goto param_nest_cancel;
for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
- if (!devlink_param_cmode_is_supported(param, i))
+ if (!param_value_set[i])
continue;
err = devlink_nl_param_value_fill_one(msg, param->type,
i, param_value[i]);
@@ -3624,44 +3647,56 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
u64 ret_offset, start_offset, end_offset = 0;
- struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
const struct genl_ops *ops = cb->data;
struct devlink_region *region;
struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
+ struct nlattr **attrs;
bool dump = true;
void *hdr;
int err;
start_offset = *((u64 *)&cb->args[0]);
+ attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack);
if (err)
- goto out;
+ goto out_free;
+ mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink))
- goto out;
+ if (IS_ERR(devlink)) {
+ err = PTR_ERR(devlink);
+ goto out_dev;
+ }
- mutex_lock(&devlink_mutex);
mutex_lock(&devlink->lock);
if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
- !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+ !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
+ err = -EINVAL;
goto out_unlock;
+ }
region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
region = devlink_region_get_by_name(devlink, region_name);
- if (!region)
+ if (!region) {
+ err = -EINVAL;
goto out_unlock;
+ }
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
DEVLINK_CMD_REGION_READ);
- if (!hdr)
+ if (!hdr) {
+ err = -EMSGSIZE;
goto out_unlock;
+ }
err = devlink_nl_put_handle(skb, devlink);
if (err)
@@ -3672,8 +3707,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
- if (!chunks_attr)
+ if (!chunks_attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
@@ -3696,8 +3733,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
/* Check if there was any progress done to prevent infinite loop */
- if (ret_offset == start_offset)
+ if (ret_offset == start_offset) {
+ err = -EINVAL;
goto nla_put_failure;
+ }
*((u64 *)&cb->args[0]) = ret_offset;
@@ -3705,6 +3744,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
genlmsg_end(skb, hdr);
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
+ kfree(attrs);
return skb->len;
@@ -3712,9 +3752,11 @@ nla_put_failure:
genlmsg_cancel(skb, hdr);
out_unlock:
mutex_unlock(&devlink->lock);
+out_dev:
mutex_unlock(&devlink_mutex);
-out:
- return 0;
+out_free:
+ kfree(attrs);
+ return err;
}
struct devlink_info_req {
@@ -3878,6 +3920,964 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
return msg->len;
}
+struct devlink_fmsg_item {
+ struct list_head list;
+ int attrtype;
+ u8 nla_type;
+ u16 len;
+ int value[0];
+};
+
+struct devlink_fmsg {
+ struct list_head item_list;
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+ struct devlink_fmsg *fmsg;
+
+ fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+ if (!fmsg)
+ return NULL;
+
+ INIT_LIST_HEAD(&fmsg->item_list);
+
+ return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+ struct devlink_fmsg_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ kfree(fmsg);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+ int attrtype)
+{
+ struct devlink_fmsg_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->attrtype = attrtype;
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+ struct devlink_fmsg_item *item;
+
+ if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = NLA_NUL_STRING;
+ item->len = strlen(name) + 1;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+ memcpy(&item->value, name, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_put_name(fmsg, name);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+ const void *value, u16 value_len,
+ u8 value_nla_type)
+{
+ struct devlink_fmsg_item *item;
+
+ if (value_len > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = value_nla_type;
+ item->len = value_len;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ memcpy(&item->value, value, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
+
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+ return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+ NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
+{
+ return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u16 value_len)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_binary_put(fmsg, value, value_len);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ case NLA_U8:
+ case NLA_U32:
+ case NLA_U64:
+ case NLA_NUL_STRING:
+ case NLA_BINARY:
+ return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+ msg->nla_type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ u8 tmp;
+
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ /* Always provide flag data, regardless of its value */
+ tmp = *(bool *) msg->value;
+
+ return nla_put_u8(skb, attrtype, tmp);
+ case NLA_U8:
+ return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
+ case NLA_U32:
+ return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
+ case NLA_U64:
+ return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
+ DEVLINK_ATTR_PAD);
+ case NLA_NUL_STRING:
+ return nla_put_string(skb, attrtype, (char *) &msg->value);
+ case NLA_BINARY:
+ return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ int *start)
+{
+ struct devlink_fmsg_item *item;
+ struct nlattr *fmsg_nlattr;
+ int i = 0;
+ int err;
+
+ fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG);
+ if (!fmsg_nlattr)
+ return -EMSGSIZE;
+
+ list_for_each_entry(item, &fmsg->item_list, list) {
+ if (i < *start) {
+ i++;
+ continue;
+ }
+
+ switch (item->attrtype) {
+ case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+ case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+ case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+ case DEVLINK_ATTR_FMSG_NEST_END:
+ err = nla_put_flag(skb, item->attrtype);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+ err = devlink_fmsg_item_fill_type(item, skb);
+ if (err)
+ break;
+ err = devlink_fmsg_item_fill_data(item, skb);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_NAME:
+ err = nla_put_string(skb, item->attrtype,
+ (char *) &item->value);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ if (!err)
+ *start = ++i;
+ else
+ break;
+ }
+
+ nla_nest_end(skb, fmsg_nlattr);
+ return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+ struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ bool last = false;
+ int index = 0;
+ void *hdr;
+ int err;
+
+ while (!last) {
+ int tmp_index = index;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if (!err)
+ last = true;
+ else if (err != -EMSGSIZE || tmp_index == index)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ err = genlmsg_reply(skb, info);
+ if (err)
+ return err;
+ }
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ nlmsg_free(skb);
+ return err;
+}
+
+struct devlink_health_reporter {
+ struct list_head list;
+ void *priv;
+ const struct devlink_health_reporter_ops *ops;
+ struct devlink *devlink;
+ struct devlink_fmsg *dump_fmsg;
+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+ u64 graceful_period;
+ bool auto_recover;
+ u8 health_state;
+ u64 dump_ts;
+ u64 error_count;
+ u64 recovery_count;
+ u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+ DEVLINK_HEALTH_REPORTER_STATE_ERROR,
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+ return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ struct devlink_health_reporter *reporter;
+
+ list_for_each_entry(reporter, &devlink->reporter_list, list)
+ if (!strcmp(reporter->ops->name, reporter_name))
+ return reporter;
+ return NULL;
+}
+
+/**
+ * devlink_health_reporter_create - create devlink health reporter
+ *
+ * @devlink: devlink
+ * @ops: ops
+ * @graceful_period: to avoid recovery loops, in msecs
+ * @auto_recover: auto recover when error occurs
+ * @priv: priv
+ */
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, bool auto_recover,
+ void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ mutex_lock(&devlink->lock);
+ if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
+ reporter = ERR_PTR(-EEXIST);
+ goto unlock;
+ }
+
+ if (WARN_ON(auto_recover && !ops->recover) ||
+ WARN_ON(graceful_period && !ops->recover)) {
+ reporter = ERR_PTR(-EINVAL);
+ goto unlock;
+ }
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter) {
+ reporter = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = auto_recover;
+ mutex_init(&reporter->dump_lock);
+ list_add_tail(&reporter->list, &devlink->reporter_list);
+unlock:
+ mutex_unlock(&devlink->lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+/**
+ * devlink_health_reporter_destroy - destroy devlink health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ mutex_lock(&reporter->devlink->lock);
+ list_del(&reporter->list);
+ mutex_unlock(&reporter->devlink->lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx)
+{
+ int err;
+
+ if (!reporter->ops->recover)
+ return -EOPNOTSUPP;
+
+ err = reporter->ops->recover(reporter, priv_ctx);
+ if (err)
+ return err;
+
+ reporter->recovery_count++;
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+ reporter->last_recovery_ts = jiffies;
+
+ return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+ if (!reporter->dump_fmsg)
+ return;
+ devlink_fmsg_free(reporter->dump_fmsg);
+ reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+ void *priv_ctx)
+{
+ int err;
+
+ if (!reporter->ops->dump)
+ return 0;
+
+ if (reporter->dump_fmsg)
+ return 0;
+
+ reporter->dump_fmsg = devlink_fmsg_alloc();
+ if (!reporter->dump_fmsg) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+ priv_ctx);
+ if (err)
+ goto dump_err;
+
+ err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ reporter->dump_ts = jiffies;
+
+ return 0;
+
+dump_err:
+ devlink_health_dump_clear(reporter);
+ return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ struct devlink *devlink = reporter->devlink;
+
+ /* write a log message of the current error */
+ WARN_ON(!msg);
+ trace_devlink_health_report(devlink, reporter->ops->name, msg);
+ reporter->error_count++;
+
+ /* abort if the previous error wasn't recovered */
+ if (reporter->auto_recover &&
+ (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+ jiffies - reporter->last_recovery_ts <
+ msecs_to_jiffies(reporter->graceful_period))) {
+ trace_devlink_health_recover_aborted(devlink,
+ reporter->ops->name,
+ reporter->health_state,
+ jiffies -
+ reporter->last_recovery_ts);
+ return -ECANCELED;
+ }
+
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+ mutex_lock(&reporter->dump_lock);
+ /* store current dump of current error, for later analysis */
+ devlink_health_do_dump(reporter, priv_ctx);
+ mutex_unlock(&reporter->dump_lock);
+
+ if (reporter->auto_recover)
+ return devlink_health_reporter_recover(reporter, priv_ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *reporter_name;
+
+ if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+ return NULL;
+
+ reporter_name =
+ nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+ return devlink_health_reporter_find_by_name(devlink, reporter_name);
+}
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct sk_buff *msg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ info->snd_portid, info->snd_seq,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(reporter, &devlink->reporter_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, devlink,
+ reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->recover &&
+ (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ return -EOPNOTSUPP;
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+ reporter->graceful_period =
+ nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+ reporter->auto_recover =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+ return 0;
+}
+
+static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ return devlink_health_reporter_recover(reporter, NULL);
+}
+
+static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct devlink_fmsg *fmsg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->diagnose)
+ return -EOPNOTSUPP;
+
+ fmsg = devlink_fmsg_alloc();
+ if (!fmsg)
+ return -ENOMEM;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ goto out;
+
+ err = reporter->ops->diagnose(reporter, fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+ devlink_fmsg_free(fmsg);
+ return err;
+}
+
+static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ err = devlink_health_do_dump(reporter, NULL);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(reporter->dump_fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0);
+
+out:
+ mutex_unlock(&reporter->dump_lock);
+ return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ devlink_health_dump_clear(reporter);
+ mutex_unlock(&reporter->dump_lock);
+ return 0;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3903,6 +4903,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -4146,6 +5151,58 @@ static const struct genl_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .doit = devlink_nl_cmd_health_reporter_get_doit,
+ .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+ .doit = devlink_nl_cmd_health_reporter_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ .doit = devlink_nl_cmd_health_reporter_recover_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ .doit = devlink_nl_cmd_health_reporter_dump_get_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+ .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_FLASH_UPDATE,
+ .doit = devlink_nl_cmd_flash_update,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -4186,6 +5243,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->resource_list);
INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
+ INIT_LIST_HEAD(&devlink->reporter_list);
mutex_init(&devlink->lock);
return devlink;
}
@@ -4228,6 +5286,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister);
*/
void devlink_free(struct devlink *devlink)
{
+ WARN_ON(!list_empty(&devlink->reporter_list));
+ WARN_ON(!list_empty(&devlink->region_list));
+ WARN_ON(!list_empty(&devlink->param_list));
+ WARN_ON(!list_empty(&devlink->resource_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->port_list));
+
kfree(devlink);
}
EXPORT_SYMBOL_GPL(devlink_free);
@@ -4878,6 +5944,48 @@ void devlink_params_unregister(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devlink_params_unregister);
/**
+ * devlink_params_publish - publish configuration parameters
+ *
+ * @devlink: devlink
+ *
+ * Publish previously registered configuration parameters.
+ */
+void devlink_params_publish(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (param_item->published)
+ continue;
+ param_item->published = true;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_params_publish);
+
+/**
+ * devlink_params_unpublish - unpublish configuration parameters
+ *
+ * @devlink: devlink
+ *
+ * Unpublish previously registered configuration parameters.
+ */
+void devlink_params_unpublish(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (!param_item->published)
+ continue;
+ param_item->published = false;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_params_unpublish);
+
+/**
* devlink_port_params_register - register port configuration parameters
*
* @devlink_port: devlink port
@@ -5330,7 +6438,7 @@ void devlink_compat_running_version(struct net_device *dev,
list_for_each_entry(devlink, &devlink_list, list) {
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
- if (devlink_port->type == DEVLINK_PORT_TYPE_ETH ||
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH &&
devlink_port->type_dev == dev) {
__devlink_compat_running_version(devlink,
buf, len);
@@ -5344,6 +6452,36 @@ out:
mutex_unlock(&devlink_mutex);
}
+int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+{
+ struct devlink_port *devlink_port;
+ struct devlink *devlink;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ int ret = -EOPNOTSUPP;
+
+ if (devlink_port->type != DEVLINK_PORT_TYPE_ETH ||
+ devlink_port->type_dev != dev)
+ continue;
+
+ mutex_unlock(&devlink_mutex);
+ if (devlink->ops->flash_update)
+ ret = devlink->ops->flash_update(devlink,
+ file_name,
+ NULL, NULL);
+ mutex_unlock(&devlink->lock);
+ return ret;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+ mutex_unlock(&devlink_mutex);
+
+ return -EOPNOTSUPP;
+}
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 45c0a6e3d6ad..1320e8dce559 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,6 +29,7 @@
#include <linux/net.h>
#include <net/devlink.h>
#include <net/xdp_sock.h>
+#include <net/flow_offload.h>
/*
* Some useful ethtool_ops methods that're device independent.
@@ -2037,11 +2038,17 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
if (copy_from_user(&efl, useraddr, sizeof(efl)))
return -EFAULT;
+ efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
- if (!dev->ethtool_ops->flash_device)
- return -EOPNOTSUPP;
+ if (!dev->ethtool_ops->flash_device) {
+ int ret;
- efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+ rtnl_unlock();
+ ret = devlink_compat_flash_update(dev, efl.data);
+ rtnl_lock();
+
+ return ret;
+ }
return dev->ethtool_ops->flash_device(dev, &efl);
}
@@ -2820,3 +2827,241 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return rc;
}
+
+struct ethtool_rx_flow_key {
+ struct flow_dissector_key_basic basic;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_ip ip;
+ struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_eth_addrs eth_addrs;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct ethtool_rx_flow_match {
+ struct flow_dissector dissector;
+ struct ethtool_rx_flow_key key;
+ struct ethtool_rx_flow_key mask;
+};
+
+struct ethtool_rx_flow_rule *
+ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
+{
+ const struct ethtool_rx_flow_spec *fs = input->fs;
+ static struct in6_addr zero_addr = {};
+ struct ethtool_rx_flow_match *match;
+ struct ethtool_rx_flow_rule *flow;
+ struct flow_action_entry *act;
+
+ flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) +
+ sizeof(struct ethtool_rx_flow_match), GFP_KERNEL);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ /* ethtool_rx supports only one single action per rule. */
+ flow->rule = flow_rule_alloc(1);
+ if (!flow->rule) {
+ kfree(flow);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ match = (struct ethtool_rx_flow_match *)flow->priv;
+ flow->rule->match.dissector = &match->dissector;
+ flow->rule->match.mask = &match->mask;
+ flow->rule->match.key = &match->key;
+
+ match->mask.basic.n_proto = htons(0xffff);
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW: {
+ const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec;
+
+ match->key.basic.n_proto = htons(ETH_P_IP);
+
+ v4_spec = &fs->h_u.tcp_ip4_spec;
+ v4_m_spec = &fs->m_u.tcp_ip4_spec;
+
+ if (v4_m_spec->ip4src) {
+ match->key.ipv4.src = v4_spec->ip4src;
+ match->mask.ipv4.src = v4_m_spec->ip4src;
+ }
+ if (v4_m_spec->ip4dst) {
+ match->key.ipv4.dst = v4_spec->ip4dst;
+ match->mask.ipv4.dst = v4_m_spec->ip4dst;
+ }
+ if (v4_m_spec->ip4src ||
+ v4_m_spec->ip4dst) {
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
+ offsetof(struct ethtool_rx_flow_key, ipv4);
+ }
+ if (v4_m_spec->psrc) {
+ match->key.tp.src = v4_spec->psrc;
+ match->mask.tp.src = v4_m_spec->psrc;
+ }
+ if (v4_m_spec->pdst) {
+ match->key.tp.dst = v4_spec->pdst;
+ match->mask.tp.dst = v4_m_spec->pdst;
+ }
+ if (v4_m_spec->psrc ||
+ v4_m_spec->pdst) {
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+ offsetof(struct ethtool_rx_flow_key, tp);
+ }
+ if (v4_m_spec->tos) {
+ match->key.ip.tos = v4_spec->tos;
+ match->mask.ip.tos = v4_m_spec->tos;
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_IP);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+ offsetof(struct ethtool_rx_flow_key, ip);
+ }
+ }
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW: {
+ const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec;
+
+ match->key.basic.n_proto = htons(ETH_P_IPV6);
+
+ v6_spec = &fs->h_u.tcp_ip6_spec;
+ v6_m_spec = &fs->m_u.tcp_ip6_spec;
+ if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+ memcpy(&match->key.ipv6.src, v6_spec->ip6src,
+ sizeof(match->key.ipv6.src));
+ memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src,
+ sizeof(match->mask.ipv6.src));
+ }
+ if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
+ memcpy(&match->key.ipv6.dst, v6_spec->ip6dst,
+ sizeof(match->key.ipv6.dst));
+ memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst,
+ sizeof(match->mask.ipv6.dst));
+ }
+ if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
+ memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
+ offsetof(struct ethtool_rx_flow_key, ipv6);
+ }
+ if (v6_m_spec->psrc) {
+ match->key.tp.src = v6_spec->psrc;
+ match->mask.tp.src = v6_m_spec->psrc;
+ }
+ if (v6_m_spec->pdst) {
+ match->key.tp.dst = v6_spec->pdst;
+ match->mask.tp.dst = v6_m_spec->pdst;
+ }
+ if (v6_m_spec->psrc ||
+ v6_m_spec->pdst) {
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+ offsetof(struct ethtool_rx_flow_key, tp);
+ }
+ if (v6_m_spec->tclass) {
+ match->key.ip.tos = v6_spec->tclass;
+ match->mask.ip.tos = v6_m_spec->tclass;
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_IP);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+ offsetof(struct ethtool_rx_flow_key, ip);
+ }
+ }
+ break;
+ default:
+ ethtool_rx_flow_rule_destroy(flow);
+ return ERR_PTR(-EINVAL);
+ }
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ match->key.basic.ip_proto = IPPROTO_TCP;
+ break;
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ match->key.basic.ip_proto = IPPROTO_UDP;
+ break;
+ }
+ match->mask.basic.ip_proto = 0xff;
+
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
+ offsetof(struct ethtool_rx_flow_key, basic);
+
+ if (fs->flow_type & FLOW_EXT) {
+ const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+ const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+ if (ext_m_spec->vlan_etype &&
+ ext_m_spec->vlan_tci) {
+ match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype;
+ match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype;
+
+ match->key.vlan.vlan_id =
+ ntohs(ext_h_spec->vlan_tci) & 0x0fff;
+ match->mask.vlan.vlan_id =
+ ntohs(ext_m_spec->vlan_tci) & 0x0fff;
+
+ match->key.vlan.vlan_priority =
+ (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
+ match->mask.vlan.vlan_priority =
+ (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13;
+
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_VLAN);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+ offsetof(struct ethtool_rx_flow_key, vlan);
+ }
+ }
+ if (fs->flow_type & FLOW_MAC_EXT) {
+ const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+ const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+ memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
+ ETH_ALEN);
+ memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
+ ETH_ALEN);
+
+ match->dissector.used_keys |=
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+ match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
+ offsetof(struct ethtool_rx_flow_key, eth_addrs);
+ }
+
+ act = &flow->rule->action.entries[0];
+ switch (fs->ring_cookie) {
+ case RX_CLS_FLOW_DISC:
+ act->id = FLOW_ACTION_DROP;
+ break;
+ case RX_CLS_FLOW_WAKE:
+ act->id = FLOW_ACTION_WAKE;
+ break;
+ default:
+ act->id = FLOW_ACTION_QUEUE;
+ if (fs->flow_type & FLOW_RSS)
+ act->queue.ctx = input->rss_ctx;
+
+ act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+ act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie);
+ break;
+ }
+
+ return flow;
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_create);
+
+void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow)
+{
+ kfree(flow->rule);
+ kfree(flow);
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a49f68eda10..5132c054c981 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -73,6 +73,7 @@
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
+#include <net/lwtunnel.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -1793,6 +1794,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+ sk = sk_to_full_sk(sk);
+
+ return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+ .func = bpf_sk_fullsock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+};
+
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
@@ -2789,8 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (!skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2831,8 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (!skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2957,8 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (!skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2987,8 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (!skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -4112,10 +4123,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
+ val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
break;
case SO_SNDBUF:
+ val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
break;
@@ -4801,7 +4814,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
}
#endif /* CONFIG_IPV6_SEG6_BPF */
-BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+ bool ingress)
+{
+ return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
+}
+#endif
+
+BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
u32, len)
{
switch (type) {
@@ -4810,13 +4831,40 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
case BPF_LWT_ENCAP_SEG6_INLINE:
return bpf_push_seg6_encap(skb, type, hdr, len);
#endif
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+ case BPF_LWT_ENCAP_IP:
+ return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
+#endif
default:
return -EINVAL;
}
}
-static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
- .func = bpf_lwt_push_encap,
+BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
+ void *, hdr, u32, len)
+{
+ switch (type) {
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+ case BPF_LWT_ENCAP_IP:
+ return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
+#endif
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+ .func = bpf_lwt_in_push_encap,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE
+};
+
+static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+ .func = bpf_lwt_xmit_push_encap,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
@@ -5016,6 +5064,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
};
#endif /* CONFIG_IPV6_SEG6_BPF */
+#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \
+do { \
+ switch (si->off) { \
+ case offsetof(md_type, snd_cwnd): \
+ CONVERT(snd_cwnd); break; \
+ case offsetof(md_type, srtt_us): \
+ CONVERT(srtt_us); break; \
+ case offsetof(md_type, snd_ssthresh): \
+ CONVERT(snd_ssthresh); break; \
+ case offsetof(md_type, rcv_nxt): \
+ CONVERT(rcv_nxt); break; \
+ case offsetof(md_type, snd_nxt): \
+ CONVERT(snd_nxt); break; \
+ case offsetof(md_type, snd_una): \
+ CONVERT(snd_una); break; \
+ case offsetof(md_type, mss_cache): \
+ CONVERT(mss_cache); break; \
+ case offsetof(md_type, ecn_flags): \
+ CONVERT(ecn_flags); break; \
+ case offsetof(md_type, rate_delivered): \
+ CONVERT(rate_delivered); break; \
+ case offsetof(md_type, rate_interval_us): \
+ CONVERT(rate_interval_us); break; \
+ case offsetof(md_type, packets_out): \
+ CONVERT(packets_out); break; \
+ case offsetof(md_type, retrans_out): \
+ CONVERT(retrans_out); break; \
+ case offsetof(md_type, total_retrans): \
+ CONVERT(total_retrans); break; \
+ case offsetof(md_type, segs_in): \
+ CONVERT(segs_in); break; \
+ case offsetof(md_type, data_segs_in): \
+ CONVERT(data_segs_in); break; \
+ case offsetof(md_type, segs_out): \
+ CONVERT(segs_out); break; \
+ case offsetof(md_type, data_segs_out): \
+ CONVERT(data_segs_out); break; \
+ case offsetof(md_type, lost_out): \
+ CONVERT(lost_out); break; \
+ case offsetof(md_type, sacked_out): \
+ CONVERT(sacked_out); break; \
+ case offsetof(md_type, bytes_received): \
+ CONVERT(bytes_received); break; \
+ case offsetof(md_type, bytes_acked): \
+ CONVERT(bytes_acked); break; \
+ } \
+} while (0)
+
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
int dif, int sdif, u8 family, u8 proto)
@@ -5253,6 +5349,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
+ return false;
+
+ if (off % size != 0)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_tcp_sock, bytes_received):
+ case offsetof(struct bpf_tcp_sock, bytes_acked):
+ return size == sizeof(__u64);
+ default:
+ return size == sizeof(__u32);
+ }
+}
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
+ FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
+ si->dst_reg, si->src_reg, \
+ offsetof(struct tcp_sock, FIELD)); \
+ } while (0)
+
+ CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
+ BPF_TCP_SOCK_GET_COMMON);
+
+ if (insn > insn_buf)
+ return insn - insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_tcp_sock, rtt_min):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ sizeof(struct minmax));
+ BUILD_BUG_ON(sizeof(struct minmax) <
+ sizeof(struct minmax_sample));
+
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct tcp_sock, rtt_min) +
+ offsetof(struct minmax_sample, v));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
+{
+ sk = sk_to_full_sk(sk);
+
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_tcp_sock_proto = {
+ .func = bpf_tcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -5282,7 +5451,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_lwt_seg6_adjust_srh ||
func == bpf_lwt_seg6_action ||
#endif
- func == bpf_lwt_push_encap)
+ func == bpf_lwt_in_push_encap ||
+ func == bpf_lwt_xmit_push_encap)
return true;
return false;
@@ -5406,6 +5576,12 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
switch (func_id) {
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_sk_fullsock:
+ return &bpf_sk_fullsock_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_tcp_sock:
+ return &bpf_tcp_sock_proto;
+#endif
default:
return sk_filter_func_proto(func_id, prog);
}
@@ -5477,6 +5653,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
+ case BPF_FUNC_sk_fullsock:
+ return &bpf_sk_fullsock_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
@@ -5494,6 +5672,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_tcp_sock:
+ return &bpf_tcp_sock_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5670,7 +5850,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_lwt_push_encap:
- return &bpf_lwt_push_encap_proto;
+ return &bpf_lwt_in_push_encap_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
@@ -5706,6 +5886,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
+ case BPF_FUNC_lwt_push_encap:
+ return &bpf_lwt_xmit_push_encap_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
@@ -5764,6 +5946,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
+ case offsetof(struct __sk_buff, sk):
+ if (type == BPF_WRITE || size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+ break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -5935,31 +6122,44 @@ full_access:
return true;
}
-static bool __sock_filter_check_size(int off, int size,
+bool bpf_sock_common_is_valid_access(int off, int size,
+ enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- const int size_default = sizeof(__u32);
-
switch (off) {
- case bpf_ctx_range(struct bpf_sock, src_ip4):
- case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
- bpf_ctx_record_field_size(info, size_default);
- return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range_till(struct bpf_sock, type, priority):
+ return false;
+ default:
+ return bpf_sock_is_valid_access(off, size, type, info);
}
-
- return size == size_default;
}
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
if (off % size != 0)
return false;
- if (!__sock_filter_check_size(off, size, info))
- return false;
- return true;
+
+ switch (off) {
+ case offsetof(struct bpf_sock, state):
+ case offsetof(struct bpf_sock, family):
+ case offsetof(struct bpf_sock, type):
+ case offsetof(struct bpf_sock, protocol):
+ case offsetof(struct bpf_sock, dst_port):
+ case offsetof(struct bpf_sock, src_port):
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ case bpf_ctx_range(struct bpf_sock, dst_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
+ }
+
+ return size == size_default;
}
static bool sock_filter_is_valid_access(int off, int size,
@@ -6748,6 +6948,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, pkt_len);
*target_size = 4;
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+ break;
+
+ case offsetof(struct __sk_buff, sk):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ break;
}
return insn - insn_buf;
@@ -6796,24 +7003,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
-
- *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_family));
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_family),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common,
+ skc_family,
+ FIELD_SIZEOF(struct sock_common,
+ skc_family),
+ target_size));
break;
case offsetof(struct bpf_sock, type):
+ BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ *target_size = 2;
break;
case offsetof(struct bpf_sock, protocol):
+ BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+ *target_size = 1;
break;
case offsetof(struct bpf_sock, src_ip4):
@@ -6825,6 +7040,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
+ case offsetof(struct bpf_sock, dst_ip4):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_daddr,
+ FIELD_SIZEOF(struct sock_common,
+ skc_daddr),
+ target_size));
+ break;
+
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
off = si->off;
@@ -6843,6 +7067,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
#endif
break;
+ case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ off = si->off;
+ off -= offsetof(struct bpf_sock, dst_ip6[0]);
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common,
+ skc_v6_daddr.s6_addr32[0],
+ FIELD_SIZEOF(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]),
+ target_size) + off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+ *target_size = 4;
+#endif
+ break;
+
case offsetof(struct bpf_sock, src_port):
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@@ -6852,6 +7093,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
skc_num),
target_size));
break;
+
+ case offsetof(struct bpf_sock, dst_port):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_dport,
+ FIELD_SIZEOF(struct sock_common,
+ skc_dport),
+ target_size));
+ break;
+
+ case offsetof(struct bpf_sock, state):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_state),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_state,
+ FIELD_SIZEOF(struct sock_common,
+ skc_state),
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -7099,6 +7360,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn = insn_buf;
int off;
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
+ OBJ_FIELD), \
+ si->dst_reg, si->dst_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ } while (0)
+
+#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
+ SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ int reg = BPF_REG_9; \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
+ do { \
+ if (TYPE == BPF_WRITE) \
+ SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ else \
+ SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ } while (0)
+
+ CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
+ SOCK_OPS_GET_TCP_SOCK_FIELD);
+
+ if (insn > insn_buf)
+ return insn - insn_buf;
+
switch (si->off) {
case offsetof(struct bpf_sock_ops, op) ...
offsetof(struct bpf_sock_ops, replylong[3]):
@@ -7256,175 +7596,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
FIELD_SIZEOF(struct minmax_sample, t));
break;
-/* Helper macro for adding read access to tcp_sock or sock fields. */
-#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
- do { \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
- struct bpf_sock_ops_kern, \
- is_fullsock), \
- si->dst_reg, si->src_reg, \
- offsetof(struct bpf_sock_ops_kern, \
- is_fullsock)); \
- *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
- struct bpf_sock_ops_kern, sk),\
- si->dst_reg, si->src_reg, \
- offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
- OBJ_FIELD), \
- si->dst_reg, si->dst_reg, \
- offsetof(OBJ, OBJ_FIELD)); \
- } while (0)
-
-/* Helper macro for adding write access to tcp_sock or sock fields.
- * The macro is called with two registers, dst_reg which contains a pointer
- * to ctx (context) and src_reg which contains the value that should be
- * stored. However, we need an additional register since we cannot overwrite
- * dst_reg because it may be used later in the program.
- * Instead we "borrow" one of the other register. We first save its value
- * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
- * it at the end of the macro.
- */
-#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
- do { \
- int reg = BPF_REG_9; \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
- if (si->dst_reg == reg || si->src_reg == reg) \
- reg--; \
- if (si->dst_reg == reg || si->src_reg == reg) \
- reg--; \
- *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
- offsetof(struct bpf_sock_ops_kern, \
- temp)); \
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
- struct bpf_sock_ops_kern, \
- is_fullsock), \
- reg, si->dst_reg, \
- offsetof(struct bpf_sock_ops_kern, \
- is_fullsock)); \
- *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
- struct bpf_sock_ops_kern, sk),\
- reg, si->dst_reg, \
- offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
- reg, si->src_reg, \
- offsetof(OBJ, OBJ_FIELD)); \
- *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
- offsetof(struct bpf_sock_ops_kern, \
- temp)); \
- } while (0)
-
-#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
- do { \
- if (TYPE == BPF_WRITE) \
- SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
- else \
- SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
- } while (0)
-
- case offsetof(struct bpf_sock_ops, snd_cwnd):
- SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, srtt_us):
- SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
- break;
-
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
struct tcp_sock);
break;
- case offsetof(struct bpf_sock_ops, snd_ssthresh):
- SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, rcv_nxt):
- SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, snd_nxt):
- SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, snd_una):
- SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, mss_cache):
- SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, ecn_flags):
- SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, rate_delivered):
- SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
- struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, rate_interval_us):
- SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
- struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, packets_out):
- SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, retrans_out):
- SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, total_retrans):
- SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
- struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, segs_in):
- SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, data_segs_in):
- SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, segs_out):
- SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, data_segs_out):
- SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
- struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, lost_out):
- SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, sacked_out):
- SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
- break;
-
case offsetof(struct bpf_sock_ops, sk_txhash):
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
break;
-
- case offsetof(struct bpf_sock_ops, bytes_received):
- SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
- struct tcp_sock);
- break;
-
- case offsetof(struct bpf_sock_ops, bytes_acked):
- SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
- break;
-
}
return insn - insn_buf;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
new file mode 100644
index 000000000000..c3a00eac4804
--- /dev/null
+++ b/net/core/flow_offload.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <net/flow_offload.h>
+
+struct flow_rule *flow_rule_alloc(unsigned int num_actions)
+{
+ struct flow_rule *rule;
+
+ rule = kzalloc(sizeof(struct flow_rule) +
+ sizeof(struct flow_action_entry) * num_actions,
+ GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ rule->action.num_entries = num_actions;
+
+ return rule;
+}
+EXPORT_SYMBOL(flow_rule_alloc);
+
+#define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \
+ const struct flow_match *__m = &(__rule)->match; \
+ struct flow_dissector *__d = (__m)->dissector; \
+ \
+ (__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key); \
+ (__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask); \
+
+void flow_rule_match_basic(const struct flow_rule *rule,
+ struct flow_match_basic *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_basic);
+
+void flow_rule_match_control(const struct flow_rule *rule,
+ struct flow_match_control *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_control);
+
+void flow_rule_match_eth_addrs(const struct flow_rule *rule,
+ struct flow_match_eth_addrs *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_eth_addrs);
+
+void flow_rule_match_vlan(const struct flow_rule *rule,
+ struct flow_match_vlan *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out);
+}
+EXPORT_SYMBOL(flow_rule_match_vlan);
+
+void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
+ struct flow_match_ipv4_addrs *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv4_addrs);
+
+void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
+ struct flow_match_ipv6_addrs *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv6_addrs);
+
+void flow_rule_match_ip(const struct flow_rule *rule,
+ struct flow_match_ip *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ip);
+
+void flow_rule_match_ports(const struct flow_rule *rule,
+ struct flow_match_ports *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ports);
+
+void flow_rule_match_tcp(const struct flow_rule *rule,
+ struct flow_match_tcp *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_tcp);
+
+void flow_rule_match_icmp(const struct flow_rule *rule,
+ struct flow_match_icmp *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_icmp);
+
+void flow_rule_match_mpls(const struct flow_rule *rule,
+ struct flow_match_mpls *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_mpls);
+
+void flow_rule_match_enc_control(const struct flow_rule *rule,
+ struct flow_match_control *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_control);
+
+void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule,
+ struct flow_match_ipv4_addrs *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs);
+
+void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule,
+ struct flow_match_ipv6_addrs *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs);
+
+void flow_rule_match_enc_ip(const struct flow_rule *rule,
+ struct flow_match_ip *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ip);
+
+void flow_rule_match_enc_ports(const struct flow_rule *rule,
+ struct flow_match_ports *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ports);
+
+void flow_rule_match_enc_keyid(const struct flow_rule *rule,
+ struct flow_match_enc_keyid *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_keyid);
+
+void flow_rule_match_enc_opts(const struct flow_rule *rule,
+ struct flow_match_enc_opts *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_opts);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index a648568c5e8f..a5c8c79d468a 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -16,6 +16,8 @@
#include <linux/types.h>
#include <linux/bpf.h>
#include <net/lwtunnel.h>
+#include <net/gre.h>
+#include <net/ip6_route.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -55,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
switch (ret) {
case BPF_OK:
+ case BPF_LWT_REROUTE:
break;
case BPF_REDIRECT:
@@ -87,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
return ret;
}
+static int bpf_lwt_input_reroute(struct sk_buff *skb)
+{
+ int err = -EINVAL;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, skb_dst(skb)->dev);
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ err = ipv6_stub->ipv6_route_input(skb);
+ } else {
+ err = -EAFNOSUPPORT;
+ }
+
+ if (err)
+ goto err;
+ return dst_input(skb);
+
+err:
+ kfree_skb(skb);
+ return err;
+}
+
static int bpf_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -98,11 +125,11 @@ static int bpf_input(struct sk_buff *skb)
ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
if (ret < 0)
return ret;
+ if (ret == BPF_LWT_REROUTE)
+ return bpf_lwt_input_reroute(skb);
}
if (unlikely(!dst->lwtstate->orig_input)) {
- pr_warn_once("orig_input not set on dst for prog %s\n",
- bpf->out.name);
kfree_skb(skb);
return -EINVAL;
}
@@ -147,6 +174,102 @@ static int xmit_check_hhlen(struct sk_buff *skb)
return 0;
}
+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+{
+ struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
+ int oif = l3mdev ? l3mdev->ifindex : 0;
+ struct dst_entry *dst = NULL;
+ int err = -EAFNOSUPPORT;
+ struct sock *sk;
+ struct net *net;
+ bool ipv4;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ ipv4 = true;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ipv4 = false;
+ else
+ goto err;
+
+ sk = sk_to_full_sk(skb->sk);
+ if (sk) {
+ if (sk->sk_bound_dev_if)
+ oif = sk->sk_bound_dev_if;
+ net = sock_net(sk);
+ } else {
+ net = dev_net(skb_dst(skb)->dev);
+ }
+
+ if (ipv4) {
+ struct iphdr *iph = ip_hdr(skb);
+ struct flowi4 fl4 = {};
+ struct rtable *rt;
+
+ fl4.flowi4_oif = oif;
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_uid = sock_net_uid(net, sk);
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+ fl4.flowi4_proto = iph->protocol;
+ fl4.daddr = iph->daddr;
+ fl4.saddr = iph->saddr;
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto err;
+ }
+ dst = &rt->dst;
+ } else {
+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
+ struct flowi6 fl6 = {};
+
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_uid = sock_net_uid(net, sk);
+ fl6.flowlabel = ip6_flowinfo(iph6);
+ fl6.flowi6_proto = iph6->nexthdr;
+ fl6.daddr = iph6->daddr;
+ fl6.saddr = iph6->saddr;
+
+ err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
+ if (unlikely(err))
+ goto err;
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto err;
+ }
+ }
+ if (unlikely(dst->error)) {
+ err = dst->error;
+ dst_release(dst);
+ goto err;
+ }
+
+ /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
+ * was done for the previous dst, so we are doing it here again, in
+ * case the new dst needs much more space. The call below is a noop
+ * if there is enough header space in skb.
+ */
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto err;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ if (unlikely(err))
+ goto err;
+
+ /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+ return LWTUNNEL_XMIT_DONE;
+
+err:
+ kfree_skb(skb);
+ return err;
+}
+
static int bpf_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -154,11 +277,20 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ __be16 proto = skb->protocol;
int ret;
ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
switch (ret) {
case BPF_OK:
+ /* If the header changed, e.g. via bpf_lwt_push_encap,
+ * BPF_LWT_REROUTE below should have been used if the
+ * protocol was also changed.
+ */
+ if (skb->protocol != proto) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
@@ -169,6 +301,8 @@ static int bpf_xmit(struct sk_buff *skb)
return LWTUNNEL_XMIT_CONTINUE;
case BPF_REDIRECT:
return LWTUNNEL_XMIT_DONE;
+ case BPF_LWT_REROUTE:
+ return bpf_lwt_xmit_reroute(skb);
default:
return ret;
}
@@ -390,6 +524,133 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
.owner = THIS_MODULE,
};
+static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
+ int encap_len)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_type |= gso_type;
+ skb_decrease_gso_size(shinfo, encap_len);
+ shinfo->gso_segs = 0;
+ return 0;
+}
+
+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
+{
+ int next_hdr_offset;
+ void *next_hdr;
+ __u8 protocol;
+
+ /* SCTP and UDP_L4 gso need more nuanced handling than what
+ * handle_gso_type() does above: skb_decrease_gso_size() is not enough.
+ * So at the moment only TCP GSO packets are let through.
+ */
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ return -ENOTSUPP;
+
+ if (ipv4) {
+ protocol = ip_hdr(skb)->protocol;
+ next_hdr_offset = sizeof(struct iphdr);
+ next_hdr = skb_network_header(skb) + next_hdr_offset;
+ } else {
+ protocol = ipv6_hdr(skb)->nexthdr;
+ next_hdr_offset = sizeof(struct ipv6hdr);
+ next_hdr = skb_network_header(skb) + next_hdr_offset;
+ }
+
+ switch (protocol) {
+ case IPPROTO_GRE:
+ next_hdr_offset += sizeof(struct gre_base_hdr);
+ if (next_hdr_offset > encap_len)
+ return -EINVAL;
+
+ if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
+ return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
+ encap_len);
+ return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
+
+ case IPPROTO_UDP:
+ next_hdr_offset += sizeof(struct udphdr);
+ if (next_hdr_offset > encap_len)
+ return -EINVAL;
+
+ if (((struct udphdr *)next_hdr)->check)
+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
+ encap_len);
+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
+
+ case IPPROTO_IP:
+ case IPPROTO_IPV6:
+ if (ipv4)
+ return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
+ else
+ return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
+
+ default:
+ return -EPROTONOSUPPORT;
+ }
+}
+
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
+{
+ struct iphdr *iph;
+ bool ipv4;
+ int err;
+
+ if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
+ return -EINVAL;
+
+ /* validate protocol and length */
+ iph = (struct iphdr *)hdr;
+ if (iph->version == 4) {
+ ipv4 = true;
+ if (unlikely(len < iph->ihl * 4))
+ return -EINVAL;
+ } else if (iph->version == 6) {
+ ipv4 = false;
+ if (unlikely(len < sizeof(struct ipv6hdr)))
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
+
+ if (ingress)
+ err = skb_cow_head(skb, len + skb->mac_len);
+ else
+ err = skb_cow_head(skb,
+ len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ return err;
+
+ /* push the encap headers and fix pointers */
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ skb_push(skb, len);
+ if (ingress)
+ skb_postpush_rcsum(skb, iph, len);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), hdr, len);
+ bpf_compute_data_pointers(skb);
+ skb_clear_hash(skb);
+
+ if (ipv4) {
+ skb->protocol = htons(ETH_P_IP);
+ iph = ip_hdr(skb);
+
+ if (!iph->check)
+ iph->check = ip_fast_csum((unsigned char *)iph,
+ iph->ihl);
+ } else {
+ skb->protocol = htons(ETH_P_IPV6);
+ }
+
+ if (skb_is_gso(skb))
+ return handle_gso_encap(skb, ipv4, len);
+
+ return 0;
+}
+
static int __init bpf_lwt_init(void)
{
return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4230400b9a30..30f6fd8f68e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -42,6 +42,8 @@
#include <linux/inetdevice.h>
#include <net/addrconf.h>
+#include <trace/events/neigh.h>
+
#define DEBUG
#define NEIGH_DEBUG 1
#define neigh_dbg(level, fmt, ...) \
@@ -102,6 +104,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
+ trace_neigh_cleanup_and_release(neigh, 0);
__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
@@ -1095,6 +1098,8 @@ out:
if (notify)
neigh_update_notify(neigh, 0);
+ trace_neigh_timer_handler(neigh, 0);
+
neigh_release(neigh);
}
@@ -1165,6 +1170,7 @@ out_unlock_bh:
else
write_unlock(&neigh->lock);
local_bh_enable();
+ trace_neigh_event_send_done(neigh, rc);
return rc;
out_dead:
@@ -1172,6 +1178,7 @@ out_dead:
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
kfree_skb(skb);
+ trace_neigh_event_send_dead(neigh, 1);
return 1;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1227,6 +1234,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
struct net_device *dev;
int update_isrouter = 0;
+ trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
+
write_lock_bh(&neigh->lock);
dev = neigh->dev;
@@ -1393,6 +1402,8 @@ out:
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
+ trace_neigh_update_done(neigh, err);
+
return err;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ff9fd2bb4ce4..7c5061123ead 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -12,7 +12,6 @@
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
-#include <net/switchdev.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
@@ -501,16 +500,11 @@ static ssize_t phys_switch_id_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev)) {
- struct switchdev_attr attr = {
- .orig_dev = netdev,
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- .flags = SWITCHDEV_F_NO_RECURSE,
- };
+ struct netdev_phys_item_id ppid = { };
- ret = switchdev_port_attr_get(netdev, &attr);
+ ret = dev_get_port_parent_id(netdev, &ppid, false);
if (!ret)
- ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
- attr.u.ppid.id);
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
}
rtnl_unlock();
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 419af6dfe29f..470b179d599e 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
#endif
+#include <trace/events/neigh.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 43a932cb609b..5b2252c6d49b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -136,17 +136,19 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
goto skip_dma_map;
- /* Setup DMA mapping: use page->private for DMA-addr
+ /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
+ * since dma_addr_t can be either 32 or 64 bits and does not always fit
+ * into page private data (i.e 32bit cpu with 64bit DMA caps)
* This mapping is kept for lifetime of page, until leaving pool.
*/
- dma = dma_map_page(pool->p.dev, page, 0,
- (PAGE_SIZE << pool->p.order),
- pool->p.dma_dir);
+ dma = dma_map_page_attrs(pool->p.dev, page, 0,
+ (PAGE_SIZE << pool->p.order),
+ pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(pool->p.dev, dma)) {
put_page(page);
return NULL;
}
- set_page_private(page, dma); /* page->private = dma; */
+ page->dma_addr = dma;
skip_dma_map:
/* When page just alloc'ed is should/must have refcnt 1. */
@@ -175,13 +177,17 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
static void __page_pool_clean_page(struct page_pool *pool,
struct page *page)
{
+ dma_addr_t dma;
+
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
return;
+ dma = page->dma_addr;
/* DMA unmap */
- dma_unmap_page(pool->p.dev, page_private(page),
- PAGE_SIZE << pool->p.order, pool->p.dma_dir);
- set_page_private(page, 0);
+ dma_unmap_page_attrs(pool->p.dev, dma,
+ PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page->dma_addr = 0;
}
/* Return a page to the page allocator, cleaning up our state */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f5a98082ac7a..a51cab95ba64 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -46,7 +46,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <net/switchdev.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
@@ -1146,22 +1145,17 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
+ struct netdev_phys_item_id ppid = { };
int err;
- struct switchdev_attr attr = {
- .orig_dev = dev,
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- .flags = SWITCHDEV_F_NO_RECURSE,
- };
- err = switchdev_port_attr_get(dev, &attr);
+ err = dev_get_port_parent_id(dev, &ppid, false);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
- if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
- attr.u.ppid.id))
+ if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id))
return -EMSGSIZE;
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 26d848484912..2415d9cb9b89 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -356,6 +356,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
+ fragsz = SKB_DATA_ALIGN(fragsz);
+
return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
@@ -369,6 +371,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
void *napi_alloc_frag(unsigned int fragsz)
{
+ fragsz = SKB_DATA_ALIGN(fragsz);
+
return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e76ed8df9f13..ae6f06e45737 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -554,8 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */
- if (psock->parser.enabled)
- strp_done(&psock->parser.strp);
+ strp_done(&psock->parser.strp);
cancel_work_sync(&psock->work);
diff --git a/net/core/sock.c b/net/core/sock.c
index 71ded4d8025c..f4b8b78535f8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -785,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
*/
val = min_t(u32, val, sysctl_wmem_max);
set_sndbuf:
+ /* Ensure val * 2 fits into an int, to prevent max_t()
+ * from treating it as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
/* Wake up sending tasks if we upped the value. */
@@ -796,6 +800,12 @@ set_sndbuf:
ret = -EPERM;
break;
}
+
+ /* No negative values (to prevent underflow, as val will be
+ * multiplied by 2).
+ */
+ if (val < 0)
+ val = 0;
goto set_sndbuf;
case SO_RCVBUF:
@@ -806,6 +816,10 @@ set_sndbuf:
*/
val = min_t(u32, val, sysctl_rmem_max);
set_rcvbuf:
+ /* Ensure val * 2 fits into an int, to prevent max_t()
+ * from treating it as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
/*
* We double it on the way in to account for
@@ -830,6 +844,12 @@ set_rcvbuf:
ret = -EPERM;
break;
}
+
+ /* No negative values (to prevent underflow, as val will be
+ * multiplied by 2).
+ */
+ if (val < 0)
+ val = 0;
goto set_rcvbuf;
case SO_KEEPALIVE:
@@ -2475,7 +2495,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
}
if (sk_has_memory_pressure(sk)) {
- int alloc;
+ u64 alloc;
if (!sk_under_memory_pressure(sk))
return 1;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6eb837a47b5c..baaaeb2b2c42 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
u8 pkt, u8 opt, u8 *val, u8 len)
{
- if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+ if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)
return 0;
return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
}
@@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
u8 pkt, u8 opt, u8 *val, u8 len)
{
- if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+ if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)
return 0;
return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index a1917025e155..8c431e0f3627 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds)
struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
{
- size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port);
struct dsa_switch *ds;
int i;
- ds = devm_kzalloc(dev, size, GFP_KERNEL);
+ ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
if (!ds)
return NULL;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 1f4972dab9f2..47a1d1379d15 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -160,6 +160,10 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+ struct switchdev_trans *trans);
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+ struct switchdev_trans *trans);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 79e97d2f2d9b..c58f33931be1 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -248,6 +248,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)
rtnl_unlock();
}
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
@@ -261,6 +263,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
+ lockdep_set_class(&dev->addr_list_lock,
+ &dsa_master_addr_list_lock_key);
ret = dsa_master_ethtool_setup(dev);
if (ret)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2d7e01b23572..e9b5b50f8cf1 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -105,16 +105,23 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
};
int err;
- /* Here the port is already bridged. Reflect the current configuration
- * so that drivers can program their chips accordingly.
+ /* Set the flooding mode before joining the port in the switch */
+ err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL);
+ if (err)
+ return err;
+
+ /* Here the interface is already bridged. Reflect the current
+ * configuration so that drivers can program their chips accordingly.
*/
dp->bridge_dev = br;
err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
/* The bridging is rolled back on error */
- if (err)
+ if (err) {
+ dsa_port_bridge_flags(dp, 0, NULL);
dp->bridge_dev = NULL;
+ }
return err;
}
@@ -137,6 +144,9 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
if (err)
pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+ /* Port is leaving the bridge, disable flooding */
+ dsa_port_bridge_flags(dp, 0, NULL);
+
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
*/
@@ -177,6 +187,35 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
}
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->port_egress_floods ||
+ (flags & ~(BR_FLOOD | BR_MCAST_FLOOD)))
+ return -EINVAL;
+
+ return 0;
+}
+
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ int err = 0;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ if (ds->ops->port_egress_floods)
+ err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD,
+ flags & BR_MCAST_FLOOD);
+
+ return err;
+}
+
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 91de3a663226..a78b2bba0332 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev)
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
struct net_device *master = dsa_slave_to_master(dev);
-
- if (change & IFF_ALLMULTI)
- dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
- dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1);
+ if (dev->flags & IFF_UP) {
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(master,
+ dev->flags & IFF_ALLMULTI ? 1 : -1);
+ if (change & IFF_PROMISC)
+ dev_set_promiscuity(master,
+ dev->flags & IFF_PROMISC ? 1 : -1);
+ }
}
static void dsa_slave_set_rx_mode(struct net_device *dev)
@@ -292,6 +295,13 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
break;
+ case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+ ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
+ trans);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -362,24 +372,15 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
return err;
}
-static int dsa_slave_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr)
+static int dsa_slave_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
- attr->u.ppid.id_len = sizeof(dst->index);
- memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
- break;
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- attr->u.brport_flags_support = 0;
- break;
- default:
- return -EOPNOTSUPP;
- }
+ ppid->id_len = sizeof(dst->index);
+ memcpy(&ppid->id, &dst->index, ppid->id_len);
return 0;
}
@@ -639,7 +640,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev && !dp->pl)
+ if (!dev->phydev || !dp->pl)
return -ENODEV;
if (!ds->ops->set_mac_eee)
@@ -659,7 +660,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev && !dp->pl)
+ if (!dev->phydev || !dp->pl)
return -ENODEV;
if (!ds->ops->get_mac_eee)
@@ -1046,10 +1047,10 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_get_phys_port_name = dsa_slave_get_phys_port_name,
.ndo_setup_tc = dsa_slave_setup_tc,
.ndo_get_stats64 = dsa_slave_get_stats64,
+ .ndo_get_port_parent_id = dsa_slave_get_port_parent_id,
};
static const struct switchdev_ops dsa_slave_switchdev_ops = {
- .switchdev_port_attr_get = dsa_slave_port_attr_get,
.switchdev_port_attr_set = dsa_slave_port_attr_set,
};
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 8b2f92e3f3a2..67ff3fae18d8 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -146,8 +146,17 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
return skb;
}
+static int dsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ *offset = 4;
+ *proto = ((__be16 *)skb->data)[1];
+ return 0;
+}
+
const struct dsa_device_ops dsa_netdev_ops = {
.xmit = dsa_xmit,
.rcv = dsa_rcv,
+ .flow_dissect = dsa_tag_flow_dissect,
.overhead = DSA_HLEN,
};
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index f5b87ee5c94e..234585ec116e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -165,8 +165,17 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
return skb;
}
+static int edsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ *offset = 8;
+ *proto = ((__be16 *)skb->data)[3];
+ return 0;
+}
+
const struct dsa_device_ops edsa_netdev_ops = {
.xmit = edsa_xmit,
.rcv = edsa_rcv,
+ .flow_dissect = edsa_tag_flow_dissect,
.overhead = EDSA_HLEN,
};
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index da71b9e2af52..927e9c86f745 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
pskb_trim_rcsum(skb, skb->len - len);
+ skb->offload_fwd_mark = true;
+
return skb;
}
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index d14226ecfde4..bd61633d2c32 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -27,6 +27,7 @@
#include <net/6lowpan.h>
#include <net/ipv6_frag.h>
#include <net/inet_frag.h>
+#include <net/ip.h>
#include "6lowpan_i.h"
@@ -34,8 +35,8 @@ static const char lowpan_frags_cache_name[] = "lowpan-frags";
static struct inet_frags lowpan_frags;
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
- struct sk_buff *prev, struct net_device *ldev);
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev, struct net_device *ldev);
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -88,9 +89,15 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
struct sk_buff *skb, u8 frag_type)
{
- struct sk_buff *prev, *next;
+ struct sk_buff *prev_tail;
struct net_device *ldev;
- int end, offset;
+ int end, offset, err;
+
+ /* inet_frag_queue_* functions use skb->cb; see struct ipfrag_skb_cb
+ * in inet_fragment.c
+ */
+ BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet_skb_parm));
+ BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet6_skb_parm));
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
@@ -117,38 +124,15 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
}
}
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev ||
- lowpan_802154_cb(prev)->d_offset <
- lowpan_802154_cb(skb)->d_offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (lowpan_802154_cb(next)->d_offset >=
- lowpan_802154_cb(skb)->d_offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
-
ldev = skb->dev;
if (ldev)
skb->dev = NULL;
+ barrier();
+
+ prev_tail = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto err;
fq->q.stamp = skb->tstamp;
if (frag_type == LOWPAN_DISPATCH_FRAG1)
@@ -163,10 +147,11 @@ found:
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = lowpan_frag_reasm(fq, prev, ldev);
+ res = lowpan_frag_reasm(fq, skb, prev_tail, ldev);
skb->_skb_refdst = orefdst;
return res;
}
+ skb_dst_drop(skb);
return -1;
err:
@@ -175,97 +160,29 @@ err:
}
/* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
- struct net_device *ldev)
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *ldev)
{
- struct sk_buff *fp, *head = fq->q.fragments;
- int sum_truesize;
+ void *reasm_data;
inet_frag_kill(&fq->q);
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
-
- if (!fp)
- goto out_oom;
-
- fp->next = head->next;
- if (!fp->next)
- fq->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, fq->q.fragments);
- head->next = fq->q.fragments->next;
-
- consume_skb(fq->q.fragments);
- fq->q.fragments = head;
- }
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
goto out_oom;
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments.
- */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_oom;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = head->data_len - plen;
- clone->data_len = clone->len;
- head->data_len -= clone->len;
- head->len -= clone->len;
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- WARN_ON(head == NULL);
-
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
- }
- sub_frag_mem_limit(fq->q.net, sum_truesize);
-
- skb_mark_not_on_list(head);
- head->dev = ldev;
- head->tstamp = fq->q.stamp;
-
+ skb->dev = ldev;
+ skb->tstamp = fq->q.stamp;
fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
return 1;
out_oom:
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0dfb72c46671..eab3ebde981e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1385,6 +1385,15 @@ out:
}
EXPORT_SYMBOL(inet_gso_segment);
+static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+ return ERR_PTR(-EINVAL);
+
+ return inet_gso_segment(skb, features);
+}
+
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
@@ -1861,7 +1870,7 @@ static struct packet_offload ip_packet_offload __read_mostly = {
static const struct net_offload ipip_offload = {
.callbacks = {
- .gso_segment = inet_gso_segment,
+ .gso_segment = ipip_gso_segment,
.gro_receive = ipip_gro_receive,
.gro_complete = ipip_gro_complete,
},
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b448cf32296c..6c2febc39dca 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1541,7 +1541,6 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb)
case IGMP_HOST_LEAVE_MESSAGE:
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
- /* fall through */
return 0;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
return ip_mc_check_igmp_reportv3(skb);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 1a4e9ff02762..5731670c560b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk,
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
u32 classid = 0;
#ifdef CONFIG_SOCK_CGROUP_DATA
classid = sock_cgroup_classid(&sk->sk_cgrp_data);
#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
goto errout;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d757b9642d0d..be778599bfed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
+ p->n_redirects = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
* calculation of tokens is at its maximum.
*/
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d1cef66820d3..ccee9411dae1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1381,12 +1381,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+ if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
+ !t->collect_md)
+ o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fb99002c3d4e..2c931120c494 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,7 +67,6 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/nexthop.h>
-#include <net/switchdev.h>
#include <linux/nospec.h>
@@ -111,7 +110,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -416,7 +415,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
+ mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
+ MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
rhltable_destroy(&mrt->mfc_hash);
kfree(mrt);
}
@@ -837,10 +837,8 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
static int vif_add(struct net *net, struct mr_table *mrt,
struct vifctl *vifc, int mrtsock)
{
+ struct netdev_phys_item_id ppid = { };
int vifi = vifc->vifc_vifi;
- struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- };
struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
@@ -919,10 +917,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
(VIFF_TUNNEL | VIFF_REGISTER));
- attr.orig_dev = dev;
- if (!switchdev_port_attr_get(dev, &attr)) {
- memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
- v->dev_parent_id.id_len = attr.u.ppid.id_len;
+ err = dev_get_port_parent_id(dev, &ppid, true);
+ if (err == 0) {
+ memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
+ v->dev_parent_id.id_len = ppid.id_len;
} else {
v->dev_parent_id.id_len = 0;
}
@@ -1299,7 +1297,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
}
/* Close the multicast socket, and clear the vif tables etc */
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
{
struct net *net = read_pnet(&mrt->net);
struct mr_mfc *c, *tmp;
@@ -1308,35 +1306,44 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
int i;
/* Shut down all active vif entries */
- for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
- continue;
- vif_delete(mrt, i, 0, &list);
+ if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
+ for (i = 0; i < mrt->maxvif; i++) {
+ if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+ !(flags & MRT_FLUSH_VIFS_STATIC)) ||
+ (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
+ continue;
+ vif_delete(mrt, i, 0, &list);
+ }
+ unregister_netdevice_many(&list);
}
- unregister_netdevice_many(&list);
/* Wipe the cache */
- list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
- cache = (struct mfc_cache *)c;
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
- mrt->id);
- mroute_netlink_event(mrt, cache, RTM_DELROUTE);
- mr_cache_put(c);
- }
-
- if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
- list_del(&c->list);
+ if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
+ (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
+ list_del_rcu(&c->list);
cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
+ mrt->id);
mroute_netlink_event(mrt, cache, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, cache);
+ mr_cache_put(c);
+ }
+ }
+
+ if (flags & MRT_FLUSH_MFC) {
+ if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+ list_del(&c->list);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
+ }
+ spin_unlock_bh(&mfc_unres_lock);
}
- spin_unlock_bh(&mfc_unres_lock);
}
}
@@ -1357,7 +1364,7 @@ static void mrtsock_destruct(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
- mroute_clean_tables(mrt, false);
+ mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
}
}
rtnl_unlock();
@@ -1482,6 +1489,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
sk == rtnl_dereference(mrt->mroute_sk),
parent);
break;
+ case MRT_FLUSH:
+ if (optlen != sizeof(val)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (get_user(val, (int __user *)optval)) {
+ ret = -EFAULT;
+ break;
+ }
+ mroute_clean_tables(mrt, val);
+ break;
/* Control PIM assert. */
case MRT_ASSERT:
if (optlen != sizeof(val)) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2a909e5f9ba0..835d50b279f5 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -864,7 +864,7 @@ static struct pernet_operations clusterip_net_ops = {
.size = sizeof(struct clusterip_net),
};
-struct notifier_block cip_netdev_notifier = {
+static struct notifier_block cip_netdev_notifier = {
.notifier_call = clusterip_netdev_event
};
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index e26165af45cb..4b07eb8a9b18 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
/* Change outer to look like the reply to an incoming packet */
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+ target.dst.protonum = IPPROTO_ICMP;
if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
return 0;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index a0aa13bcabda..0a8a60c1bf9a 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
int snmp_version(void *context, size_t hdrlen, unsigned char tag,
const void *data, size_t datalen)
{
+ if (datalen != 1)
+ return -EINVAL;
if (*(unsigned char *)data > 1)
return -ENOTSUPP;
return 1;
@@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
const void *data, size_t datalen)
{
struct snmp_ctx *ctx = (struct snmp_ctx *)context;
- __be32 *pdata = (__be32 *)data;
+ __be32 *pdata;
+ if (datalen != 4)
+ return -EINVAL;
+ pdata = (__be32 *)data;
if (*pdata == ctx->from) {
pr_debug("%s: %pI4 to %pI4\n", __func__,
(void *)&ctx->from, (void *)&ctx->to);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index aa8304c618b8..7dc3c324b911 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -173,21 +173,16 @@ EXPORT_SYMBOL_GPL(nf_send_reset);
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
{
struct iphdr *iph = ip_hdr(skb_in);
- u8 proto;
+ u8 proto = iph->protocol;
if (iph->frag_off & htons(IP_OFFSET))
return;
- if (skb_csum_unnecessary(skb_in)) {
+ if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
}
- if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
- proto = iph->protocol;
- else
- proto = 0;
-
if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 16259ea9df54..ecc12a768191 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb)
/* No redirected packets during ip_rt_redirect_silence;
* reset the algorithm.
*/
- if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
+ if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
peer->rate_tokens = 0;
+ peer->n_redirects = 0;
+ }
/* Too many ignored redirects; do not send anything
* set dst.rate_last to the last seen redirected packet.
*/
- if (peer->rate_tokens >= ip_rt_redirect_number) {
+ if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
goto out_put_peer;
}
@@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->rate_tokens;
+ ++peer->n_redirects;
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (log_martians &&
peer->rate_tokens == ip_rt_redirect_number)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cab6b2f2f61d..769508c75dce 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2546,6 +2546,7 @@ void tcp_write_queue_purge(struct sock *sk)
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
tcp_sk(sk)->packets_out = 0;
+ inet_csk(sk)->icsk_backoff = 0;
}
int tcp_disconnect(struct sock *sk, int flags)
@@ -2596,6 +2597,7 @@ int tcp_disconnect(struct sock *sk, int flags)
if (tp->write_seq == 0)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
+ tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 662b034f1795..4010ae3644f3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -536,12 +536,15 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (sock_owned_by_user(sk))
break;
+ skb = tcp_rtx_queue_head(sk);
+ if (WARN_ON_ONCE(!skb))
+ break;
+
icsk->icsk_backoff--;
icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
- skb = tcp_rtx_queue_head(sk);
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dcb1d434f7da..da5a21050ba9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1200,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa == ifp)
continue;
- if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ if (ifa->prefix_len != ifp->prefix_len ||
+ !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
ifp->prefix_len))
continue;
if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5cd0029d930e..6c79af056d9b 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
return -EAFNOSUPPORT;
}
+static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+
static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
{
return NULL;
@@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d99753b5e39b..2f45d2a3e3a3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
+static int ipv6_route_input(struct sk_buff *skb)
+{
+ ip6_route_input(skb);
+ return skb_dst(skb)->error;
+}
+
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
.ipv6_dst_lookup = ip6_dst_lookup,
+ .ipv6_route_input = ipv6_route_input,
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e081e69d534e..bb525abd860e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static void ip6erspan_set_version(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ parms->erspan_ver = 1;
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
+}
+
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
@@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -2098,12 +2102,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+ if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
+ !p->collect_md)
+ o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
@@ -2198,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
int err;
ip6gre_netlink_parms(data, &nt->parms);
+ ip6erspan_set_version(data, &nt->parms);
ign = net_generic(net, ip6gre_net_id);
if (nt->parms.collect_md) {
@@ -2243,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6erspan_set_version(data, &p);
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5c045691c302..345882d9c061 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -383,9 +383,36 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
},
};
+static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+ return ERR_PTR(-EINVAL);
+
+ return ipv6_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+ return ERR_PTR(-EINVAL);
+
+ return inet_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+ return ERR_PTR(-EINVAL);
+
+ return ipv6_gso_segment(skb, features);
+}
+
static const struct net_offload sit_offload = {
.callbacks = {
- .gso_segment = ipv6_gso_segment,
+ .gso_segment = sit_gso_segment,
.gro_receive = sit_ip6ip6_gro_receive,
.gro_complete = sit_gro_complete,
},
@@ -393,7 +420,7 @@ static const struct net_offload sit_offload = {
static const struct net_offload ip4ip6_offload = {
.callbacks = {
- .gso_segment = inet_gso_segment,
+ .gso_segment = ip4ip6_gso_segment,
.gro_receive = ip4ip6_gro_receive,
.gro_complete = ip4ip6_gro_complete,
},
@@ -401,7 +428,7 @@ static const struct net_offload ip4ip6_offload = {
static const struct net_offload ip6ip6_offload = {
.callbacks = {
- .gso_segment = ipv6_gso_segment,
+ .gso_segment = ip6ip6_gso_segment,
.gro_receive = sit_ip6ip6_gro_receive,
.gro_complete = ip6ip6_gro_complete,
},
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc01aa3f2b5e..3594f1d9c68c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
+ mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
+ MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
rhltable_destroy(&mrt->mfc_hash);
kfree(mrt);
}
@@ -1496,42 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
{
struct mr_mfc *c, *tmp;
LIST_HEAD(list);
int i;
/* Shut down all active vif entries */
- for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
- continue;
- mif6_delete(mrt, i, 0, &list);
+ if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
+ for (i = 0; i < mrt->maxvif; i++) {
+ if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+ !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
+ (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
+ continue;
+ mif6_delete(mrt, i, 0, &list);
+ }
+ unregister_netdevice_many(&list);
}
- unregister_netdevice_many(&list);
/* Wipe the cache */
- list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
- list_del_rcu(&c->list);
- call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
- FIB_EVENT_ENTRY_DEL,
- (struct mfc6_cache *)c, mrt->id);
- mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
- mr_cache_put(c);
+ if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
+ (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c, mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
+ }
}
- if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
- list_del(&c->list);
- mr6_netlink_event(mrt, (struct mfc6_cache *)c,
- RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+ if (flags & MRT6_FLUSH_MFC) {
+ if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+ list_del(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+ }
+ spin_unlock_bh(&mfc_unres_lock);
}
- spin_unlock_bh(&mfc_unres_lock);
}
}
@@ -1587,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- mroute_clean_tables(mrt, false);
+ mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
err = 0;
break;
}
@@ -1703,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
rtnl_unlock();
return ret;
+ case MRT6_FLUSH:
+ {
+ int flags;
+
+ if (optlen != sizeof(flags))
+ return -EINVAL;
+ if (get_user(flags, (int __user *)optval))
+ return -EFAULT;
+ rtnl_lock();
+ mroute_clean_tables(mrt, flags);
+ rtnl_unlock();
+ return 0;
+ }
+
/*
* Control PIM assert (to activate pim will activate assert)
*/
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index 55e2ac179f28..dddd75d1be0e 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -128,7 +128,6 @@ static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
switch (mld->mld_type) {
case ICMPV6_MGM_REDUCTION:
case ICMPV6_MGM_REPORT:
- /* fall through */
return 0;
case ICMPV6_MLD2_REPORT:
return ipv6_mc_check_mld_reportv2(skb);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8b075f0bc351..1240ccd57f39 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
struct sock *sk = sk_to_full_sk(skb->sk);
unsigned int hh_len;
struct dst_entry *dst;
+ int strict = (ipv6_addr_type(&iph->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
+ strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -84,8 +86,8 @@ static int nf_ip6_reroute(struct sk_buff *skb,
return 0;
}
-static int nf_ip6_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict)
+int __nf_ip6_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict)
{
static const struct ipv6_pinfo fake_pinfo;
static const struct inet_sock fake_sk = {
@@ -105,12 +107,17 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
*dst = result;
return err;
}
+EXPORT_SYMBOL_GPL(__nf_ip6_route);
static const struct nf_ipv6_ops ipv6ops = {
+#if IS_MODULE(CONFIG_IPV6)
.chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
+ .route_me_harder = ip6_route_me_harder,
+ .dev_get_saddr = ipv6_dev_get_saddr,
+ .route = __nf_ip6_route,
+#endif
+ .route_input = ip6_route_input,
.fragment = ip6_fragment,
- .route = nf_ip6_route,
.reroute = nf_ip6_reroute,
};
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 9c914db44bec..490bfd3c9162 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -17,6 +17,7 @@
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <net/ip6_route.h>
+#include <net/xfrm.h>
#include <net/ipv6.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -226,6 +227,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+ target.dst.protonum = IPPROTO_ICMPV6;
if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
return 0;
@@ -317,6 +319,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
return ret;
}
+static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#ifdef CONFIG_IPV6_MODULE
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->route_me_harder(net, skb);
+#else
+ return ip6_route_me_harder(net, skb);
+#endif
+}
+
static unsigned int
nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -333,7 +349,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(state->net, skb);
+ err = nat_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 0ad0da5a2600..fd313b726263 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -24,6 +24,23 @@
static atomic_t v6_worker_count;
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+ const struct in6_addr *daddr, unsigned int srcprefs,
+ struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+ return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
const struct net_device *out)
@@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY)));
- if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
return NF_DROP;
nat = nf_ct_nat_ext_add(ct);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index b9c8a763c863..02e9228641e0 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
+ if (!nf_reject_verify_csum(proto))
+ return true;
+
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36be3cf0adef..73cdc0bc63f7 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
struct ipv6hdr *iph)
{
const struct net_device *dev = NULL;
- const struct nf_ipv6_ops *v6ops;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
};
u32 ret = 0;
- v6ops = nf_get_ipv6_ops();
- if (!v6ops)
- return RTN_UNREACHABLE;
-
if (priv->flags & NFTA_FIB_F_IIF)
dev = nft_in(pkt);
else if (priv->flags & NFTA_FIB_F_OIF)
@@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt,
flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc066fdf7e46..87a0561136dd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
- bool from_set;
-
- rcu_read_lock();
- from_set = !!rcu_dereference(rt->from);
- rcu_read_unlock();
-
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || from_set);
+ (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 8d0ba757a46c..9b2f272ca164 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
rcu_read_unlock();
genlmsg_end(msg, hdr);
- genlmsg_reply(msg, info);
-
- return 0;
+ return genlmsg_reply(msg, info);
nla_put_failure:
rcu_read_unlock();
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 8181ee7e1e27..ee5403cbe655 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
} else {
ip6_flow_hdr(hdr, 0, flowlabel);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
}
hdr->nexthdr = NEXTHDR_ROUTING;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1e03305c0549..e8a1dabef803 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
}
err = 0;
- if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
+ if (__in6_dev_get(skb->dev) &&
+ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
goto out;
if (t->parms.iph.daddr == 0)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 26f1d435696a..fed6becc5daf 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -83,8 +83,7 @@
#define L2TP_SLFLAG_S 0x40000000
#define L2TP_SL_SEQ_MASK 0x00ffffff
-#define L2TP_HDR_SIZE_SEQ 10
-#define L2TP_HDR_SIZE_NOSEQ 6
+#define L2TP_HDR_SIZE_MAX 14
/* Default trace flags */
#define L2TP_DEFAULT_DEBUG_FLAGS 0
@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
__skb_pull(skb, sizeof(struct udphdr));
/* Short packet? */
- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
l2tp_info(tunnel, L2TP_MSG_DATA,
"%s: recv short packet (len=%d)\n",
tunnel->name, skb->len);
@@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
goto error;
}
+ if (tunnel->version == L2TP_HDR_VER_3 &&
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto error;
+
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9c9afe94d389..b2ce90260c35 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel)
}
#endif
+static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
+ unsigned char **ptr, unsigned char **optr)
+{
+ int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
+
+ if (opt_len > 0) {
+ int off = *ptr - *optr;
+
+ if (!pskb_may_pull(skb, off + opt_len))
+ return -1;
+
+ if (skb->data != *optr) {
+ *optr = skb->data;
+ *ptr = skb->data + off;
+ }
+ }
+
+ return 0;
+}
+
#define l2tp_printk(ptr, type, func, fmt, ...) \
do { \
if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 35f6f86d4dcc..d4c60523c549 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
+ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto discard_sess;
+
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
l2tp_session_dec_refcount(session);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 237f1a4a0b0c..0ae6899edac0 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
+ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto discard_sess;
+
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
l2tp_session_dec_refcount(session);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index e94b1a0407af..2c4cd4183bf9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
+ ieee80211_agg_stop_txq(sta, tid);
+
spin_unlock_bh(&sta->lock);
ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n",
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d65aa019ce85..09dd1c2860fc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -941,6 +941,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_P2P_PS |
BSS_CHANGED_TXPOWER;
int err;
+ int prev_beacon_int;
old = sdata_dereference(sdata->u.ap.beacon, sdata);
if (old)
@@ -963,6 +964,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->needed_rx_chains = sdata->local->rx_chains;
+ prev_beacon_int = sdata->vif.bss_conf.beacon_int;
sdata->vif.bss_conf.beacon_int = params->beacon_interval;
if (params->he_cap)
@@ -974,8 +976,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (!err)
ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
mutex_unlock(&local->mtx);
- if (err)
+ if (err) {
+ sdata->vif.bss_conf.beacon_int = prev_beacon_int;
return err;
+ }
/*
* Apply control port protocol, this allows us to
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 8b26858ab4d5..574c3891c4b2 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -70,6 +70,7 @@ enum mesh_deferred_task_flags {
* @dst: mesh path destination mac address
* @mpp: mesh proxy mac address
* @rhash: rhashtable list pointer
+ * @walk_list: linked list containing all mesh_path objects.
* @gate_list: list pointer for known gates list
* @sdata: mesh subif
* @next_hop: mesh neighbor to which frames for this destination will be
@@ -106,6 +107,7 @@ struct mesh_path {
u8 dst[ETH_ALEN];
u8 mpp[ETH_ALEN]; /* used for MPP or MAP */
struct rhash_head rhash;
+ struct hlist_node walk_list;
struct hlist_node gate_list;
struct ieee80211_sub_if_data *sdata;
struct sta_info __rcu *next_hop;
@@ -135,12 +137,16 @@ struct mesh_path {
* gate's mpath may or may not be resolved and active.
* @gates_lock: protects updates to known_gates
* @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+ * @walk_head: linked list containging all mesh_path objects
+ * @walk_lock: lock protecting walk_head
* @entries: number of entries in the table
*/
struct mesh_table {
struct hlist_head known_gates;
spinlock_t gates_lock;
struct rhashtable rhead;
+ struct hlist_head walk_head;
+ spinlock_t walk_lock;
atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
};
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a5125624a76d..88a6d5e18ccc 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -59,8 +59,10 @@ static struct mesh_table *mesh_table_alloc(void)
return NULL;
INIT_HLIST_HEAD(&newtbl->known_gates);
+ INIT_HLIST_HEAD(&newtbl->walk_head);
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
+ spin_lock_init(&newtbl->walk_lock);
return newtbl;
}
@@ -249,28 +251,15 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
static struct mesh_path *
__mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
{
- int i = 0, ret;
- struct mesh_path *mpath = NULL;
- struct rhashtable_iter iter;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return NULL;
-
- rhashtable_walk_start(&iter);
+ int i = 0;
+ struct mesh_path *mpath;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (i++ == idx)
break;
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
- if (IS_ERR(mpath) || !mpath)
+ if (!mpath)
return NULL;
if (mpath_expired(mpath)) {
@@ -432,6 +421,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
return ERR_PTR(-ENOMEM);
tbl = sdata->u.mesh.mesh_paths;
+ spin_lock_bh(&tbl->walk_lock);
do {
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
@@ -441,20 +431,20 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
mpath = rhashtable_lookup_fast(&tbl->rhead,
dst,
mesh_rht_params);
-
+ else if (!ret)
+ hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
} while (unlikely(ret == -EEXIST && !mpath));
+ spin_unlock_bh(&tbl->walk_lock);
- if (ret && ret != -EEXIST)
- return ERR_PTR(ret);
-
- /* At this point either new_mpath was added, or we found a
- * matching entry already in the table; in the latter case
- * free the unnecessary new entry.
- */
- if (ret == -EEXIST) {
+ if (ret) {
kfree(new_mpath);
+
+ if (ret != -EEXIST)
+ return ERR_PTR(ret);
+
new_mpath = mpath;
}
+
sdata->u.mesh.mesh_paths_generation++;
return new_mpath;
}
@@ -480,9 +470,17 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
memcpy(new_mpath->mpp, mpp, ETH_ALEN);
tbl = sdata->u.mesh.mpp_paths;
+
+ spin_lock_bh(&tbl->walk_lock);
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
mesh_rht_params);
+ if (!ret)
+ hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head);
+ spin_unlock_bh(&tbl->walk_lock);
+
+ if (ret)
+ kfree(new_mpath);
sdata->u.mesh.mpp_paths_generation++;
return ret;
@@ -503,20 +501,9 @@ void mesh_plink_broken(struct sta_info *sta)
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta &&
mpath->flags & MESH_PATH_ACTIVE &&
!(mpath->flags & MESH_PATH_FIXED)) {
@@ -530,8 +517,7 @@ void mesh_plink_broken(struct sta_info *sta)
WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
}
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ rcu_read_unlock();
}
static void mesh_path_free_rcu(struct mesh_table *tbl,
@@ -551,6 +537,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
{
+ hlist_del_rcu(&mpath->walk_list);
rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params);
mesh_path_free_rcu(tbl, mpath);
}
@@ -571,27 +558,14 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta)
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
@@ -599,51 +573,26 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
{
struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (ether_addr_equal(mpath->mpp, proxy))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void table_flush_by_iface(struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
+ struct hlist_node *n;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
/**
@@ -675,15 +624,15 @@ static int table_path_del(struct mesh_table *tbl,
{
struct mesh_path *mpath;
- rcu_read_lock();
+ spin_lock_bh(&tbl->walk_lock);
mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params);
if (!mpath) {
- rcu_read_unlock();
+ spin_unlock_bh(&tbl->walk_lock);
return -ENXIO;
}
__mesh_path_del(tbl, mpath);
- rcu_read_unlock();
+ spin_unlock_bh(&tbl->walk_lock);
return 0;
}
@@ -854,28 +803,16 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
+ struct hlist_node *n;
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if ((!(mpath->flags & MESH_PATH_RESOLVING)) &&
(!(mpath->flags & MESH_PATH_FIXED)) &&
time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 61c7ea9de2cc..8a49a74c0a37 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1945,9 +1945,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
int head_need, bool may_encrypt)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_hdr *hdr;
+ bool enc_tailroom;
int tail_need = 0;
- if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) {
+ hdr = (struct ieee80211_hdr *) skb->data;
+ enc_tailroom = may_encrypt &&
+ (sdata->crypto_tx_tailroom_needed_cnt ||
+ ieee80211_is_mgmt(hdr->frame_control));
+
+ if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
tail_need -= skb_tailroom(skb);
tail_need = max_t(int, tail_need, 0);
@@ -1955,8 +1962,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
if (skb_cloned(skb) &&
(!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
- !skb_clone_writable(skb, ETH_HLEN) ||
- (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
+ !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else if (head_need || tail_need)
I802_DEBUG_INC(local->tx_expand_skb_head);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3f5a704d1ab0..4c1655972565 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -2233,6 +2233,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
break;
+ case NL80211_IFTYPE_ADHOC:
+ if (sdata->vif.bss_conf.ibss_joined)
+ WARN_ON(drv_join_ibss(local, sdata));
+ /* fall through */
default:
ieee80211_reconfig_stations(sdata);
/* fall through */
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 94f53a9b7d1a..dda8930f20e7 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla,
&n_labels, NULL, extack))
return -EINVAL;
- newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
- n_labels * sizeof(u32));
+ newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label,
+ n_labels));
if (!newts)
return -ENOMEM;
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index cad48d07c818..8401cefd9f65 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -29,6 +29,7 @@ config IP_VS_IPV6
bool "IPv6 support for IPVS"
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6
---help---
Add IPv6 support to IPVS.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e969dad66991..43bbaa32b1d6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1566,14 +1566,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
"ip_vs_in: packet continues traversal as normal");
- if (iph->fragoffs) {
- /* Fragment that couldn't be mapped to a conn entry
- * is missing module nf_defrag_ipv6
- */
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+
+ /* Fragment couldn't be mapped to a conn entry */
+ if (iph->fragoffs)
IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
"unhandled fragment");
- }
+
*verdict = NF_ACCEPT;
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7d6318664eb2..2a3d4e27cf3b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -43,6 +43,7 @@
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
#include <net/route.h>
#include <net/sock.h>
@@ -900,11 +901,17 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
#ifdef CONFIG_IP_VS_IPV6
if (udest->af == AF_INET6) {
+ int ret;
+
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
return -EINVAL;
+
+ ret = nf_defrag_ipv6_enable(svc->ipvs->net);
+ if (ret)
+ return ret;
} else
#endif
{
@@ -1228,6 +1235,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ret = -EINVAL;
goto out_err;
}
+
+ ret = nf_defrag_ipv6_enable(ipvs->net);
+ if (ret)
+ goto out_err;
}
#endif
@@ -2734,8 +2745,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int size;
get = (struct ip_vs_get_services *)arg;
- size = sizeof(*get) +
- sizeof(struct ip_vs_service_entry) * get->num_services;
+ size = struct_size(get, entrytable, get->num_services);
if (*len != size) {
pr_err("length: %u != %u\n", *len, size);
ret = -EINVAL;
@@ -2776,8 +2786,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int size;
get = (struct ip_vs_get_dests *)arg;
- size = sizeof(*get) +
- sizeof(struct ip_vs_dest_entry) * get->num_dests;
+ size = struct_size(get, entrytable, get->num_dests);
if (*len != size) {
pr_err("length: %u != %u\n", *len, size);
ret = -EINVAL;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 815956ac5a76..e139c256e269 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -936,10 +936,18 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* REJECT will give spurious warnings here.
*/
- /* No external references means no one else could have
- * confirmed us.
+ /* Another skb with the same unconfirmed conntrack may
+ * win the race. This may happen for bridge(br_flood)
+ * or broadcast/multicast packets do skb_clone with
+ * unconfirmed conntrack.
*/
- WARN_ON(nf_ct_is_confirmed(ct));
+ if (unlikely(nf_ct_is_confirmed(ct))) {
+ WARN_ON_ONCE(1);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
+ return NF_DROP;
+ }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1042,6 +1050,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
}
if (nf_ct_key_equal(h, tuple, zone, net)) {
+ /* Tuple is taken already, so caller will need to find
+ * a new source port to use.
+ *
+ * Only exception:
+ * If the *original tuples* are identical, then both
+ * conntracks refer to the same flow.
+ * This is a rare situation, it can occur e.g. when
+ * more than one UDP packet is sent from same socket
+ * in different threads.
+ *
+ * Let nf_ct_resolve_clash() deal with this later.
+ */
+ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ continue;
+
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8071bb04a849..349b42a65c8a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2675,7 +2675,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
+ ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
}
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c8d2b6688a2a..f067c6b50857 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -21,6 +21,8 @@
#include <linux/tcp.h>
#include <linux/netfilter.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -54,6 +56,11 @@ module_param(sip_direct_media, int, 0600);
MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
"endpoints only (default 1)");
+static int sip_external_media __read_mostly = 0;
+module_param(sip_external_media, int, 0600);
+MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external "
+ "endpoints (default 0)");
+
const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
@@ -861,6 +868,41 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3))
return NF_ACCEPT;
saddr = &ct->tuplehash[!dir].tuple.src.u3;
+ } else if (sip_external_media) {
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+ struct flowi4 fl4 = {};
+#if IS_ENABLED(CONFIG_IPV6)
+ struct flowi6 fl6 = {};
+#endif
+ struct dst_entry *dst = NULL;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ fl4.daddr = daddr->ip;
+ rt = ip_route_output_key(net, &fl4);
+ if (!IS_ERR(rt))
+ dst = &rt->dst;
+ break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ fl6.daddr = daddr->in6;
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ break;
+#endif
+ }
+
+ /* Don't predict any conntracks when media endpoint is reachable
+ * through the same interface as the signalling peer.
+ */
+ if (dst && dst->dev == dev)
+ return NF_ACCEPT;
}
/* We need to check whether the registration exists before attempting
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e92bedd09cde..e1a88ba2249e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -131,6 +131,23 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_set_is_anonymous(set))
+ return;
+
+ list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ nft_trans_set(trans) == set) {
+ nft_trans_set_bound(trans) = true;
+ break;
+ }
+ }
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
@@ -226,18 +243,6 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
}
-/* either expr ops provide both activate/deactivate, or neither */
-static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
-{
- if (!ops)
- return true;
-
- if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
- return false;
-
- return true;
-}
-
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
@@ -253,14 +258,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+ struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
if (expr->ops->deactivate)
- expr->ops->deactivate(ctx, expr);
+ expr->ops->deactivate(ctx, expr, phase);
expr = nft_expr_next(expr);
}
@@ -311,7 +317,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
nft_trans_destroy(trans);
return err;
}
- nft_rule_expr_deactivate(ctx, rule);
+ nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
return 0;
}
@@ -322,6 +328,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
int err;
list_for_each_entry(rule, &ctx->chain->rules, list) {
+ if (!nft_is_active_next(ctx->net, rule))
+ continue;
+
err = nft_delrule(ctx, rule);
if (err < 0)
return err;
@@ -1972,9 +1981,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
*/
int nft_register_expr(struct nft_expr_type *type)
{
- if (!nft_expr_check_ops(type->ops))
- return -EINVAL;
-
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -2122,10 +2128,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
err = PTR_ERR(ops);
goto err1;
}
- if (!nft_expr_check_ops(ops)) {
- err = -EINVAL;
- goto err1;
- }
} else
ops = type->ops;
@@ -2239,6 +2241,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_RULE_ID] = { .type = NLA_U32 },
+ [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -2554,7 +2557,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static void nf_tables_rule_release(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
- nft_rule_expr_deactivate(ctx, rule);
+ nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
}
@@ -3760,39 +3763,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
bind:
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
+ nft_set_trans_bind(ctx, set);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
-void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding)
-{
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
- nft_is_active(ctx->net, set))
- list_add_tail_rcu(&set->list, &ctx->table->sets);
-
- list_add_tail_rcu(&binding->list, &set->bindings);
-}
-EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
-
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding)
+ struct nft_set_binding *binding, bool event)
{
list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
- nft_is_active(ctx->net, set))
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ if (event)
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+ GFP_KERNEL);
+ }
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
- nft_is_active(ctx->net, set)) {
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
nft_set_destroy(set);
- }
}
EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
@@ -6621,6 +6615,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_DELRULE);
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_TRANS_COMMIT);
break;
case NFT_MSG_NEWSET:
nft_clear(net, nft_trans_set(trans));
@@ -6707,7 +6704,8 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_set_destroy(nft_trans_set(trans));
+ if (!nft_trans_set_bound(trans))
+ nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6768,7 +6766,9 @@ static int __nf_tables_abort(struct net *net)
case NFT_MSG_NEWRULE:
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
- nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_TRANS_ABORT);
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
@@ -6778,7 +6778,8 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- list_del_rcu(&nft_trans_set(trans)->list);
+ if (!nft_trans_set_bound(trans))
+ list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5eb269428832..0a4bad55a8aa 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net)
return net_generic(net, nft_compat_net_id);
}
+static void nft_xt_get(struct nft_xt *xt)
+{
+ /* refcount_inc() warns on 0 -> 1 transition, but we can't
+ * init the reference count to 1 in .select_ops -- we can't
+ * undo such an increase when another expression inside the same
+ * rule fails afterwards.
+ */
+ if (xt->listcnt == 0)
+ refcount_set(&xt->refcnt, 1);
+ else
+ refcount_inc(&xt->refcnt);
+
+ xt->listcnt++;
+}
+
static bool nft_xt_put(struct nft_xt *xt)
{
if (refcount_dec_and_test(&xt->refcnt)) {
@@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
nft_xt = container_of(expr->ops, struct nft_xt, ops);
- refcount_inc(&nft_xt->refcnt);
+ nft_xt_get(nft_xt);
return 0;
}
@@ -300,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
void *info = nft_expr_priv(expr);
+ struct module *me = target->me;
struct xt_tgdtor_param par;
par.net = ctx->net;
@@ -310,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
par.target->destroy(&par);
if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(target->me);
+ module_put(me);
}
static int nft_extension_dump_info(struct sk_buff *skb, int attr,
@@ -504,7 +520,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return ret;
nft_xt = container_of(expr->ops, struct nft_xt, ops);
- refcount_inc(&nft_xt->refcnt);
+ nft_xt_get(nft_xt);
return 0;
}
@@ -558,41 +574,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
}
-static void nft_compat_activate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- struct list_head *h)
-{
- struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
-
- if (xt->listcnt == 0)
- list_add(&xt->head, h);
-
- xt->listcnt++;
-}
-
-static void nft_compat_activate_mt(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
-
- nft_compat_activate(ctx, expr, &cn->nft_match_list);
-}
-
-static void nft_compat_activate_tg(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
-
- nft_compat_activate(ctx, expr, &cn->nft_target_list);
-}
-
static void nft_compat_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
- if (--xt->listcnt == 0)
- list_del_init(&xt->head);
+ if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) {
+ if (--xt->listcnt == 0)
+ list_del_init(&xt->head);
+ }
}
static void
@@ -848,7 +839,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
nft_match->ops.eval = nft_match_eval;
nft_match->ops.init = nft_match_init;
nft_match->ops.destroy = nft_match_destroy;
- nft_match->ops.activate = nft_compat_activate_mt;
nft_match->ops.deactivate = nft_compat_deactivate;
nft_match->ops.dump = nft_match_dump;
nft_match->ops.validate = nft_match_validate;
@@ -866,7 +856,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
nft_match->ops.size = matchsize;
- nft_match->listcnt = 1;
+ nft_match->listcnt = 0;
list_add(&nft_match->head, &cn->nft_match_list);
return &nft_match->ops;
@@ -953,7 +943,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.init = nft_target_init;
nft_target->ops.destroy = nft_target_destroy;
- nft_target->ops.activate = nft_compat_activate_tg;
nft_target->ops.deactivate = nft_compat_deactivate;
nft_target->ops.dump = nft_target_dump;
nft_target->ops.validate = nft_target_validate;
@@ -964,7 +953,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
else
nft_target->ops.eval = nft_target_eval_xt;
- nft_target->listcnt = 1;
+ nft_target->listcnt = 0;
list_add(&nft_target->head, &cn->nft_target_list);
return &nft_target->ops;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 9658493d37d4..a8a74a16f9c4 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -234,20 +234,17 @@ err1:
return err;
}
-static void nft_dynset_activate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_dynset *priv = nft_expr_priv(expr);
-
- nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
static void nft_dynset_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (phase == NFT_TRANS_PREPARE)
+ return;
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+ phase == NFT_TRANS_COMMIT);
}
static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -295,7 +292,6 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
- .activate = nft_dynset_activate,
.deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3e5ed787b1d4..5ec43124cbca 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
}
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 227b2b15a19c..14496da5141d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_lookup_activate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_lookup *priv = nft_expr_priv(expr);
-
- nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
static void nft_lookup_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (phase == NFT_TRANS_PREPARE)
+ return;
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+ phase == NFT_TRANS_COMMIT);
}
static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
- .activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index c1f2adf198a0..79ef074c18ca 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -156,20 +156,17 @@ nla_put_failure:
return -1;
}
-static void nft_objref_map_activate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_objref_map *priv = nft_expr_priv(expr);
-
- nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (phase == NFT_TRANS_PREPARE)
+ return;
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+ phase == NFT_TRANS_COMMIT);
}
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -186,7 +183,6 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
- .activate = nft_objref_map_activate,
.deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3a15f219e4e7..ea28588c5eed 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -15,6 +15,7 @@
struct nft_tunnel {
enum nft_tunnel_keys key:8;
enum nft_registers dreg:8;
+ enum nft_tunnel_mode mode:8;
};
static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_TUNNEL_PATH:
- nft_reg_store8(dest, !!tun_info);
+ if (!tun_info) {
+ nft_reg_store8(dest, false);
+ return;
+ }
+ if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+ (priv->mode == NFT_TUNNEL_MODE_RX &&
+ !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+ (priv->mode == NFT_TUNNEL_MODE_TX &&
+ (tun_info->mode & IP_TUNNEL_INFO_TX)))
+ nft_reg_store8(dest, true);
+ else
+ nft_reg_store8(dest, false);
break;
case NFT_TUNNEL_ID:
if (!tun_info) {
regs->verdict.code = NFT_BREAK;
return;
}
- *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+ if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+ (priv->mode == NFT_TUNNEL_MODE_RX &&
+ !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+ (priv->mode == NFT_TUNNEL_MODE_TX &&
+ (tun_info->mode & IP_TUNNEL_INFO_TX)))
+ *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+ else
+ regs->verdict.code = NFT_BREAK;
break;
default:
WARN_ON(1);
@@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
[NFTA_TUNNEL_KEY] = { .type = NLA_U32 },
[NFTA_TUNNEL_DREG] = { .type = NLA_U32 },
+ [NFTA_TUNNEL_MODE] = { .type = NLA_U32 },
};
static int nft_tunnel_get_init(const struct nft_ctx *ctx,
@@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
+ if (tb[NFTA_TUNNEL_MODE]) {
+ priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
+ if (priv->mode > NFT_TUNNEL_MODE_MAX)
+ return -EOPNOTSUPP;
+ } else {
+ priv->mode = NFT_TUNNEL_MODE_NONE;
+ }
+
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
@@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 55af9f247993..06dc55590441 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial);
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict, unsigned short family)
{
- const struct nf_ipv6_ops *v6ops;
+ const struct nf_ipv6_ops *v6ops __maybe_unused;
int ret = 0;
switch (family) {
@@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
ret = nf_ip_route(net, dst, fl, strict);
break;
case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- ret = v6ops->route(net, dst, fl, strict);
+ ret = nf_ip6_route(net, dst, fl, strict);
break;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index aecadd471e1d..13e1ac333fa4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1899,7 +1899,7 @@ static int __init xt_init(void)
seqcount_init(&per_cpu(xt_recseq, i));
}
- xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+ xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
if (!xt)
return -ENOMEM;
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 89e281b3bfc2..29987ff03621 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype");
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
- const struct nf_ipv6_ops *v6ops;
struct flowi6 flow;
struct rt6_info *rt;
u32 ret = 0;
@@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- v6ops = nf_get_ipv6_ops();
- if (v6ops) {
- if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
- if (v6ops->chk_addr(net, addr, dev, true))
- ret = XT_ADDRTYPE_LOCAL;
- }
- route_err = v6ops->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), false);
- } else {
- route_err = 1;
+ if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+ if (nf_ipv6_chk_addr(net, addr, dev, true))
+ ret = XT_ADDRTYPE_LOCAL;
}
+ route_err = nf_ip6_route(net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&flow), false);
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index f44de4bc2100..1664d2ec8b2f 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -337,7 +337,6 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
unsigned int nstamp_mask;
unsigned int i;
int ret = -EINVAL;
- size_t sz;
net_get_random_once(&hash_rnd, sizeof(hash_rnd));
@@ -387,8 +386,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
goto out;
}
- sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
- t = kvzalloc(sz, GFP_KERNEL);
+ t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL);
if (t == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3b1a78906bc0..1cd1d83a4be0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
if (unlikely(rb->frames_per_block == 0))
goto out;
- if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+ if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 65571a6273c3..d6cc97fbbbb0 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -254,7 +254,40 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
- return -ENOIOCTLCMD;
+ struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+ rds_tos_t utos, tos = 0;
+
+ switch (cmd) {
+ case SIOCRDSSETTOS:
+ if (get_user(utos, (rds_tos_t __user *)arg))
+ return -EFAULT;
+
+ if (rs->rs_transport &&
+ rs->rs_transport->get_tos_map)
+ tos = rs->rs_transport->get_tos_map(utos);
+ else
+ return -ENOIOCTLCMD;
+
+ spin_lock_bh(&rds_sock_lock);
+ if (rs->rs_tos || rs->rs_conn) {
+ spin_unlock_bh(&rds_sock_lock);
+ return -EINVAL;
+ }
+ rs->rs_tos = tos;
+ spin_unlock_bh(&rds_sock_lock);
+ break;
+ case SIOCRDSGETTOS:
+ spin_lock_bh(&rds_sock_lock);
+ tos = rs->rs_tos;
+ spin_unlock_bh(&rds_sock_lock);
+ if (put_user(tos, (rds_tos_t __user *)arg))
+ return -EFAULT;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return 0;
}
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
@@ -650,6 +683,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
+ rs->rs_tos = 0;
+ rs->rs_conn = NULL;
spin_lock_bh(&rds_sock_lock);
list_add_tail(&rs->rs_item, &rds_sock_list);
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 762d2c6788a3..17c9d9f0c848 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
__rds_create_bind_key(key, addr, port, scope_id);
rcu_read_lock();
rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
- if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
- rds_sock_addref(rs);
- else
+ if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) ||
+ !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))
rs = NULL;
+
rcu_read_unlock();
rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 3bd2f4a5a30d..7ea134f9a825 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- int dev_if)
+ u8 tos, int dev_if)
{
struct rds_connection *conn, *ret = NULL;
@@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
ipv6_addr_equal(&conn->c_laddr, laddr) &&
conn->c_trans == trans &&
+ conn->c_tos == tos &&
net == rds_conn_net(conn) &&
conn->c_dev_if == dev_if) {
ret = conn;
@@ -139,6 +140,7 @@ static void __rds_conn_path_init(struct rds_connection *conn,
atomic_set(&cp->cp_state, RDS_CONN_DOWN);
cp->cp_send_gen = 0;
cp->cp_reconnect_jiffies = 0;
+ cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
@@ -159,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp,
+ gfp_t gfp, u8 tos,
int is_outgoing,
int dev_if)
{
@@ -171,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
rcu_read_lock();
- conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
+ conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
if (conn &&
conn->c_loopback &&
conn->c_trans != &rds_loop_transport &&
@@ -205,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
conn->c_faddr = *faddr;
conn->c_dev_if = dev_if;
+ conn->c_tos = tos;
#if IS_ENABLED(CONFIG_IPV6)
/* If the local address is link local, set c_bound_if to be the
@@ -297,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_connection *found;
found = rds_conn_lookup(net, head, laddr, faddr, trans,
- dev_if);
+ tos, dev_if);
if (found) {
struct rds_conn_path *cp;
int i;
@@ -332,10 +335,10 @@ out:
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
- struct rds_transport *trans, gfp_t gfp,
- int dev_if)
+ struct rds_transport *trans, u8 tos,
+ gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create);
@@ -343,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp, int dev_if)
+ u8 tos, gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9d7b7586f240..2da9b75bad16 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
+ iinfo->tos = conn->c_tos;
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
@@ -514,6 +515,15 @@ void rds_ib_exit(void)
rds_ib_mr_exit();
}
+static u8 rds_ib_get_tos_map(u8 tos)
+{
+ /* 1:1 user to transport map for RDMA transport.
+ * In future, if custom map is desired, hook can export
+ * user configurable map.
+ */
+ return tos;
+}
+
struct rds_transport rds_ib_transport = {
.laddr_check = rds_ib_laddr_check,
.xmit_path_complete = rds_ib_xmit_path_complete,
@@ -536,6 +546,7 @@ struct rds_transport rds_ib_transport = {
.sync_mr = rds_ib_sync_mr,
.free_mr = rds_ib_free_mr,
.flush_mrs = rds_ib_flush_mrs,
+ .get_tos_map = rds_ib_get_tos_map,
.t_owner = THIS_MODULE,
.t_name = "infiniband",
.t_unloading = rds_ib_is_unloading,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 71ff356ee702..752f92235a38 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn {
u8 ricpc_protocol_major;
u8 ricpc_protocol_minor;
__be16 ricpc_protocol_minor_mask; /* bitmask */
- __be32 ricpc_reserved1;
+ u8 ricpc_dp_toss;
+ u8 ripc_reserved1;
+ __be16 ripc_reserved2;
__be64 ricpc_ack_seq;
__be32 ricpc_credit; /* non-zero enables flow ctl */
};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index bfbb31f0c7fd..66c6eb56072b 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -133,23 +133,24 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
rds_ib_set_flow_control(conn, be32_to_cpu(credit));
}
- if (conn->c_version < RDS_PROTOCOL(3, 1)) {
- pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
- &conn->c_laddr, &conn->c_faddr,
- RDS_PROTOCOL_MAJOR(conn->c_version),
- RDS_PROTOCOL_MINOR(conn->c_version));
- set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
- rds_conn_destroy(conn);
- return;
- } else {
- pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
- ic->i_active_side ? "Active" : "Passive",
- &conn->c_laddr, &conn->c_faddr,
- RDS_PROTOCOL_MAJOR(conn->c_version),
- RDS_PROTOCOL_MINOR(conn->c_version),
- ic->i_flowctl ? ", flow control" : "");
+ if (conn->c_version < RDS_PROTOCOL_VERSION) {
+ if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
+ pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
+ &conn->c_laddr, &conn->c_faddr,
+ RDS_PROTOCOL_MAJOR(conn->c_version),
+ RDS_PROTOCOL_MINOR(conn->c_version));
+ rds_conn_destroy(conn);
+ return;
+ }
}
+ pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
+ ic->i_active_side ? "Active" : "Passive",
+ &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+ RDS_PROTOCOL_MAJOR(conn->c_version),
+ RDS_PROTOCOL_MINOR(conn->c_version),
+ ic->i_flowctl ? ", flow control" : "");
+
atomic_set(&ic->i_cq_quiesce, 0);
/* Init rings and fill recv. this needs to wait until protocol
@@ -184,6 +185,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
NULL);
}
+ conn->c_proposed_version = conn->c_version;
rds_connect_complete(conn);
}
@@ -220,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
dp->ricp_v6.dp_ack_seq =
cpu_to_be64(rds_ib_piggyb_ack(ic));
+ dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
conn_param->private_data = &dp->ricp_v6;
conn_param->private_data_len = sizeof(dp->ricp_v6);
@@ -234,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
dp->ricp_v4.dp_ack_seq =
cpu_to_be64(rds_ib_piggyb_ack(ic));
+ dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
conn_param->private_data = &dp->ricp_v4;
conn_param->private_data_len = sizeof(dp->ricp_v4);
@@ -389,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
break;
default:
- rdsdebug("Fatal QP Event %u (%s) "
- "- connection %pI6c->%pI6c, reconnecting\n",
- event->event, ib_event_msg(event->event),
- &conn->c_laddr, &conn->c_faddr);
+ rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
+ event->event, ib_event_msg(event->event),
+ &conn->c_laddr, &conn->c_faddr);
rds_conn_drop(conn);
break;
}
@@ -660,13 +663,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
/* Even if len is crap *now* I still want to check it. -ASG */
if (event->param.conn.private_data_len < data_len || major == 0)
- return RDS_PROTOCOL_3_0;
+ return RDS_PROTOCOL_4_0;
common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
- if (major == 3 && common) {
- version = RDS_PROTOCOL_3_0;
+ if (major == 4 && common) {
+ version = RDS_PROTOCOL_4_0;
while ((common >>= 1) != 0)
version++;
+ } else if (RDS_PROTOCOL_COMPAT_VERSION ==
+ RDS_PROTOCOL(major, minor)) {
+ version = RDS_PROTOCOL_COMPAT_VERSION;
} else {
if (isv6)
printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
@@ -729,8 +735,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
/* Check whether the remote protocol version matches ours. */
version = rds_ib_protocol_compatible(event, isv6);
- if (!version)
+ if (!version) {
+ err = RDS_RDMA_REJ_INCOMPAT;
goto out;
+ }
dp = event->param.conn.private_data;
if (isv6) {
@@ -771,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
daddr6 = &d_mapped_addr;
}
- rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
- "0x%llx\n", saddr6, daddr6,
- RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
+ rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
+ saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
+ RDS_PROTOCOL_MINOR(version),
(unsigned long long)be64_to_cpu(lguid),
- (unsigned long long)be64_to_cpu(fguid));
+ (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
/* RDS/IB is not currently netns aware, thus init_net */
conn = rds_conn_create(&init_net, daddr6, saddr6,
- &rds_ib_transport, GFP_KERNEL, ifindex);
+ &rds_ib_transport, dp_cmn->ricpc_dp_toss,
+ GFP_KERNEL, ifindex);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
@@ -846,7 +855,7 @@ out:
if (conn)
mutex_unlock(&conn->c_cm_lock);
if (err)
- rdma_reject(cm_id, NULL, 0);
+ rdma_reject(cm_id, &err, sizeof(int));
return destroy;
}
@@ -861,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
/* If the peer doesn't do protocol negotiation, we must
* default to RDSv3.0 */
- rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
+ rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
ret = rds_ib_setup_qp(conn);
@@ -870,7 +879,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
goto out;
}
- rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
+ rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
+ conn->c_proposed_version,
UINT_MAX, UINT_MAX, isv6);
ret = rdma_connect(cm_id, &conn_param);
if (ret)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 2f16146e4ec9..d395eec98959 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn))
- rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
- wc->status,
+ conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 4e0c36acf866..09c46f2e97fa 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
- rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
- &conn->c_laddr, &conn->c_faddr, wc->status,
+ rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+ &conn->c_laddr, &conn->c_faddr,
+ conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 6b0f57c83a2a..46bce8389066 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -51,6 +51,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
struct rds_connection *conn = cm_id->context;
struct rds_transport *trans;
int ret = 0;
+ int *err;
+ u8 len;
rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
event->event, rdma_event_msg(event->event));
@@ -81,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
+ rdma_set_service_type(cm_id, conn->c_tos);
/* XXX do we need to clean up if this fails? */
ret = rdma_resolve_route(cm_id,
RDS_RDMA_RESOLVE_TIMEOUT_MS);
@@ -106,8 +109,19 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_REJECTED:
+ if (!conn)
+ break;
+ err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
+ if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
+ pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
+ &conn->c_laddr, &conn->c_faddr);
+ conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+ conn->c_tos = 0;
+ rds_conn_drop(conn);
+ }
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
+ break;
/* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index 200d3134aaae..bfafd4a6d827 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -11,6 +11,12 @@
#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
+/* Below reject reason is for legacy interoperability issue with non-linux
+ * RDS endpoints where older version incompatibility is conveyed via value 1.
+ * For future version(s), proper encoded reject reason should be be used.
+ */
+#define RDS_RDMA_REJ_INCOMPAT 1
+
int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 4ffe100ff5e6..0d8f67cadd74 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -19,10 +19,13 @@
*/
#define RDS_PROTOCOL_3_0 0x0300
#define RDS_PROTOCOL_3_1 0x0301
+#define RDS_PROTOCOL_4_0 0x0400
+#define RDS_PROTOCOL_4_1 0x0401
#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
+#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1
/* The following ports, 16385, 18634, 18635, are registered with IANA as
* the ports to be used for RDS over TCP and UDP. Currently, only RDS over
@@ -151,9 +154,13 @@ struct rds_connection {
struct rds_cong_map *c_fcong;
/* Protocol version */
+ unsigned int c_proposed_version;
unsigned int c_version;
possible_net_t c_net;
+ /* TOS */
+ u8 c_tos;
+
struct list_head c_map_item;
unsigned long c_map_queued;
@@ -567,6 +574,7 @@ struct rds_transport {
void (*free_mr)(void *trans_private, int invalidate);
void (*flush_mrs)(void);
bool (*t_unloading)(struct rds_connection *conn);
+ u8 (*get_tos_map)(u8 tos);
};
/* Bind hash table key length. It is the sum of the size of a struct
@@ -648,6 +656,7 @@ struct rds_sock {
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
struct rds_msg_zcopy_queue rs_zcookie_queue;
+ u8 rs_tos;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -756,13 +765,14 @@ void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
- struct rds_transport *trans, gfp_t gfp,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp,
int dev_if);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp, int dev_if);
+ u8 tos, gfp_t gfp, int dev_if);
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 6bb6b16ca270..853de4876088 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
minfo.len = be32_to_cpu(inc->i_hdr.h_len);
+ minfo.tos = inc->i_conn->c_tos;
if (flip) {
minfo.laddr = daddr;
diff --git a/net/rds/send.c b/net/rds/send.c
index fd8b687d5c05..166dd578c1cc 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* rds_conn_create has a spinlock that runs with IRQ off.
* Caching the conn in the socket helps a lot. */
- if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
+ if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
+ rs->rs_tos == rs->rs_conn->c_tos) {
conn = rs->rs_conn;
- else {
+ } else {
conn = rds_conn_create_outgoing(sock_net(sock->sk),
&rs->rs_bound_addr, &daddr,
- rs->rs_transport,
+ rs->rs_transport, rs->rs_tos,
sock->sk->sk_allocation,
scope_id);
if (IS_ERR(conn)) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c16f0a362c32..fd2694174607 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
tsinfo.last_expected_una = tc->t_last_expected_una;
tsinfo.last_seen_una = tc->t_last_seen_una;
+ tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
}
@@ -452,6 +453,12 @@ static void rds_tcp_destroy_conns(void)
static void rds_tcp_exit(void);
+static u8 rds_tcp_get_tos_map(u8 tos)
+{
+ /* all user tos mapped to default 0 for TCP transport */
+ return 0;
+}
+
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
.xmit_path_prepare = rds_tcp_xmit_path_prepare,
@@ -466,6 +473,7 @@ struct rds_transport rds_tcp_transport = {
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
.exit = rds_tcp_exit,
+ .get_tos_map = rds_tcp_get_tos_map,
.t_owner = THIS_MODULE,
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c12203f646da..810a3a49e947 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock)
conn = rds_conn_create(sock_net(sock->sk),
my_addr, peer_addr,
- &rds_tcp_transport, GFP_KERNEL, dev_if);
+ &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e64f9e4c3cda..32dc50f0a303 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -93,6 +93,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
}
rcu_read_unlock();
+ cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d00a0ef39a56..c96f63ffe31e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -689,8 +689,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
rose->source_call = user->call;
ax25_uid_put(user);
} else {
- if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE))
+ if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+ dev_put(dev);
return -EACCES;
+ }
rose->source_call = *source;
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index eaf19ebaa964..3f7bb11f3290 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -596,6 +596,7 @@ error_requeue_call:
}
error_no_call:
release_sock(&rx->sk);
+error_trace:
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
return ret;
@@ -604,7 +605,7 @@ wait_interrupted:
wait_error:
finish_wait(sk_sleep(&rx->sk), &wait);
call = NULL;
- goto error_no_call;
+ goto error_trace;
}
/**
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d4b8355737d8..aecf1bf233c8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act,
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
- if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+ if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
unregister_pernet_subsys(ops);
return -EEXIST;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7633843e223..aa5c38d11a30 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
- .type = TCA_ACT_BPF,
+ .id = TCA_ID_BPF,
.owner = THIS_MODULE,
.act = tcf_bpf_act,
.dump = tcf_bpf_dump,
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8475913f2070..5d24993cccfe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
- .type = TCA_ACT_CONNMARK,
+ .id = TCA_ID_CONNMARK,
.owner = THIS_MODULE,
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3dc25b7806d7..945fb34ae721 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .type = TCA_ACT_CSUM,
+ .id = TCA_ID_CSUM,
.owner = THIS_MODULE,
.act = tcf_csum_act,
.dump = tcf_csum_dump,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b61c20ebb314..93da0004e9f4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .type = TCA_ACT_GACT,
+ .id = TCA_ID_GACT,
.owner = THIS_MODULE,
.act = tcf_gact_act,
.stats_update = tcf_gact_stats_update,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 30b63fa23ee2..9b1f2b3990ee 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
- .type = TCA_ACT_IFE,
+ .id = TCA_ID_IFE,
.owner = THIS_MODULE,
.act = tcf_ife_act,
.dump = tcf_ife_dump,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8af6c11d2482..1bad190710ad 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .type = TCA_ACT_IPT,
+ .id = TCA_ID_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
@@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .type = TCA_ACT_XT,
+ .id = TCA_ID_XT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c8cf4d10c435..6692fd054617 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev)
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .type = TCA_ACT_MIRRED,
+ .id = TCA_ID_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred_act,
.stats_update = tcf_stats_update,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c5c1e23add77..543eab9193f1 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .type = TCA_ACT_NAT,
+ .id = TCA_ID_NAT,
.owner = THIS_MODULE,
.act = tcf_nat_act,
.dump = tcf_nat_dump,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2b372a06b432..a80373878df7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_t t;
int s;
- s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+ s = struct_size(opt, keys, p->tcfp_nkeys);
/* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
@@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .type = TCA_ACT_PEDIT,
+ .id = TCA_ID_PEDIT,
.owner = THIS_MODULE,
.act = tcf_pedit_act,
.dump = tcf_pedit_dump,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ec8ec55e0fe8..8271a6263824 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -366,7 +366,7 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .type = TCA_ID_POLICE,
+ .id = TCA_ID_POLICE,
.owner = THIS_MODULE,
.act = tcf_police_act,
.dump = tcf_police_dump,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1a0c682fd734..203e399e5c85 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
- .type = TCA_ACT_SAMPLE,
+ .id = TCA_ID_SAMPLE,
.owner = THIS_MODULE,
.act = tcf_sample_act,
.dump = tcf_sample_dump,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 902957beceb3..d54cb608dbaf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -19,8 +19,6 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#define TCA_ACT_SIMP 22
-
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .type = TCA_ACT_SIMP,
+ .id = TCA_ID_SIMP,
.owner = THIS_MODULE,
.act = tcf_simp_act,
.dump = tcf_simp_dump,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 64dba3708fce..39f8a67ea940 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .type = TCA_ACT_SKBEDIT,
+ .id = TCA_ID_SKBEDIT,
.owner = THIS_MODULE,
.act = tcf_skbedit_act,
.dump = tcf_skbedit_dump,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 59710a183bd3..7bac1d78e7a3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
- .type = TCA_ACT_SKBMOD,
+ .id = TCA_ACT_SKBMOD,
.owner = THIS_MODULE,
.act = tcf_skbmod_act,
.dump = tcf_skbmod_dump,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 8b43fe0130f7..9104b8e36482 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
- .type = TCA_ACT_TUNNEL_KEY,
+ .id = TCA_ID_TUNNEL_KEY,
.owner = THIS_MODULE,
.act = tunnel_key_act,
.dump = tunnel_key_dump,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 93fdaf707313..ac0061599225 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
- .type = TCA_ACT_VLAN,
+ .id = TCA_ID_VLAN,
.owner = THIS_MODULE,
.act = tcf_vlan_act,
.dump = tcf_vlan_dump,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2b5cb2eb34e..28592e9f803f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,13 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -61,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
}
static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
const struct tcf_proto_ops *ops;
@@ -69,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
if (ops)
return ops;
#ifdef CONFIG_MODULES
- rtnl_unlock();
+ if (rtnl_held)
+ rtnl_unlock();
request_module("cls_%s", kind);
- rtnl_lock();
+ if (rtnl_held)
+ rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
/* We dropped the RTNL semaphore in order to perform
* the module load. So, even if we succeeded in loading
@@ -152,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return TC_H_MAJ(first);
}
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+ const struct tcf_proto_ops *ops;
+ bool ret;
+
+ ops = tcf_proto_lookup_ops(kind, false, NULL);
+ /* On error return false to take rtnl lock. Proto lookup/create
+ * functions will perform lookup again and properly handle errors.
+ */
+ if (IS_ERR(ops))
+ return false;
+
+ ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+ module_put(ops->owner);
+ return ret;
+}
+
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
u32 prio, struct tcf_chain *chain,
+ bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
@@ -163,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
if (!tp)
return ERR_PTR(-ENOBUFS);
- tp->ops = tcf_proto_lookup_ops(kind, extack);
+ tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
if (IS_ERR(tp->ops)) {
err = PTR_ERR(tp->ops);
goto errout;
@@ -172,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
+ spin_lock_init(&tp->lock);
+ refcount_set(&tp->refcnt, 1);
err = tp->ops->init(tp);
if (err) {
@@ -185,14 +215,75 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp,
+static void tcf_proto_get(struct tcf_proto *tp)
+{
+ refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp, extack);
+ tp->ops->destroy(tp, rtnl_held, extack);
+ tcf_chain_put(tp->chain);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ if (refcount_dec_and_test(&tp->refcnt))
+ tcf_proto_destroy(tp, rtnl_held, extack);
+}
+
+static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+{
+ return -1;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+{
+ struct tcf_walker walker = { .fn = walker_noop, };
+
+ if (tp->ops->walk) {
+ tp->ops->walk(tp, &walker, rtnl_held);
+ return !walker.stop;
+ }
+ return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+{
+ spin_lock(&tp->lock);
+ if (tcf_proto_is_empty(tp, rtnl_held))
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+ return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+ spin_lock(&tp->lock);
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+ bool deleting;
+
+ spin_lock(&tp->lock);
+ deleting = tp->deleting;
+ spin_unlock(&tp->lock);
+
+ return deleting;
+}
+
+#define ASSERT_BLOCK_LOCKED(block) \
+ lockdep_assert_held(&(block)->lock)
+
struct tcf_filter_chain_list_item {
struct list_head list;
tcf_chain_head_change_t *chain_head_change;
@@ -204,10 +295,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
list_add_tail(&chain->list, &block->chain_list);
+ mutex_init(&chain->filter_chain_lock);
chain->block = block;
chain->index = chain_index;
chain->refcnt = 1;
@@ -231,29 +325,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
if (chain->index)
return;
+
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list)
tcf_chain_head_change_item(item, tp_head);
+ mutex_unlock(&block->lock);
}
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
{
struct tcf_block *block = chain->block;
+ ASSERT_BLOCK_LOCKED(block);
+
list_del(&chain->list);
if (!chain->index)
block->chain0.chain = NULL;
+
+ if (list_empty(&block->chain_list) &&
+ refcount_read(&block->refcnt) == 0)
+ return true;
+
+ return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+ mutex_destroy(&block->lock);
+ kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_destroy(&chain->filter_chain_lock);
kfree(chain);
- if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
- kfree_rcu(block, rcu);
+ if (free_block)
+ tcf_block_destroy(block);
}
static void tcf_chain_hold(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
++chain->refcnt;
}
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
/* In case all the references are action references, this
* chain should not be shown to the user.
*/
@@ -265,6 +389,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
list_for_each_entry(chain, &block->chain_list, list) {
if (chain->index == chain_index)
return chain;
@@ -279,31 +405,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
u32 chain_index, bool create,
bool by_act)
{
- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+ struct tcf_chain *chain = NULL;
+ bool is_first_reference;
+ mutex_lock(&block->lock);
+ chain = tcf_chain_lookup(block, chain_index);
if (chain) {
tcf_chain_hold(chain);
} else {
if (!create)
- return NULL;
+ goto errout;
chain = tcf_chain_create(block, chain_index);
if (!chain)
- return NULL;
+ goto errout;
}
if (by_act)
++chain->action_refcnt;
+ is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+ mutex_unlock(&block->lock);
/* Send notification only in case we got the first
* non-action reference. Until then, the chain acts only as
* a placeholder for actions pointing to it and user ought
* not know about them.
*/
- if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+ if (is_first_reference && !by_act)
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
return chain;
+
+errout:
+ mutex_unlock(&block->lock);
+ return chain;
}
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -318,51 +453,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
}
EXPORT_SYMBOL(tcf_chain_get_by_act);
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv);
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast);
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+ bool explicitly_created)
{
+ struct tcf_block *block = chain->block;
+ const struct tcf_proto_ops *tmplt_ops;
+ bool is_last, free_block = false;
+ unsigned int refcnt;
+ void *tmplt_priv;
+ u32 chain_index;
+
+ mutex_lock(&block->lock);
+ if (explicitly_created) {
+ if (!chain->explicitly_created) {
+ mutex_unlock(&block->lock);
+ return;
+ }
+ chain->explicitly_created = false;
+ }
+
if (by_act)
chain->action_refcnt--;
- chain->refcnt--;
+
+ /* tc_chain_notify_delete can't be called while holding block lock.
+ * However, when block is unlocked chain can be changed concurrently, so
+ * save these to temporary variables.
+ */
+ refcnt = --chain->refcnt;
+ is_last = refcnt - chain->action_refcnt == 0;
+ tmplt_ops = chain->tmplt_ops;
+ tmplt_priv = chain->tmplt_priv;
+ chain_index = chain->index;
+
+ if (refcnt == 0)
+ free_block = tcf_chain_detach(chain);
+ mutex_unlock(&block->lock);
/* The last dropped non-action reference will trigger notification. */
- if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
- tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+ if (is_last && !by_act) {
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
+ block, NULL, 0, 0, false);
+ /* Last reference to chain, no need to lock. */
+ chain->flushing = false;
+ }
- if (chain->refcnt == 0) {
- tc_chain_tmplt_del(chain);
- tcf_chain_destroy(chain);
+ if (refcnt == 0) {
+ tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
+ tcf_chain_destroy(chain, free_block);
}
}
static void tcf_chain_put(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, false);
+ __tcf_chain_put(chain, false, false);
}
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, true);
+ __tcf_chain_put(chain, true, false);
}
EXPORT_SYMBOL(tcf_chain_put_by_act);
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
- if (chain->explicitly_created)
- tcf_chain_put(chain);
+ __tcf_chain_put(chain, false, true);
}
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
{
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+ struct tcf_proto *tp, *tp_next;
+ mutex_lock(&chain->filter_chain_lock);
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ RCU_INIT_POINTER(chain->filter_chain, NULL);
tcf_chain0_head_change(chain, NULL);
+ chain->flushing = true;
+ mutex_unlock(&chain->filter_chain_lock);
+
while (tp) {
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp, NULL);
- tp = rtnl_dereference(chain->filter_chain);
- tcf_chain_put(chain);
+ tp_next = rcu_dereference_protected(tp->next, 1);
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tp = tp_next;
}
}
@@ -684,8 +862,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
struct tcf_block_ext_info *ei,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ struct tcf_chain *chain0;
item = kmalloc(sizeof(*item), GFP_KERNEL);
if (!item) {
@@ -694,9 +872,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
}
item->chain_head_change = ei->chain_head_change;
item->chain_head_change_priv = ei->chain_head_change_priv;
- if (chain0 && chain0->filter_chain)
- tcf_chain_head_change_item(item, chain0->filter_chain);
- list_add(&item->list, &block->chain0.filter_chain_list);
+
+ mutex_lock(&block->lock);
+ chain0 = block->chain0.chain;
+ if (chain0)
+ tcf_chain_hold(chain0);
+ else
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ if (chain0) {
+ struct tcf_proto *tp_head;
+
+ mutex_lock(&chain0->filter_chain_lock);
+
+ tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+ if (tp_head)
+ tcf_chain_head_change_item(item, tp_head);
+
+ mutex_lock(&block->lock);
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ mutex_unlock(&chain0->filter_chain_lock);
+ tcf_chain_put(chain0);
+ }
+
return 0;
}
@@ -704,20 +905,23 @@ static void
tcf_chain0_head_change_cb_del(struct tcf_block *block,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
(item->chain_head_change == ei->chain_head_change &&
item->chain_head_change_priv == ei->chain_head_change_priv)) {
- if (chain0)
+ if (block->chain0.chain)
tcf_chain_head_change_item(item, NULL);
list_del(&item->list);
+ mutex_unlock(&block->lock);
+
kfree(item);
return;
}
}
+ mutex_unlock(&block->lock);
WARN_ON(1);
}
@@ -764,6 +968,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
return ERR_PTR(-ENOMEM);
}
+ mutex_init(&block->lock);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
@@ -799,157 +1004,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
return block;
}
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain;
+ mutex_lock(&block->lock);
+ if (chain)
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+ else
+ chain = list_first_entry_or_null(&block->chain_list,
+ struct tcf_chain, list);
- /* Hold a refcnt for all chains, so that they don't disappear
- * while we are iterating.
- */
- list_for_each_entry(chain, &block->chain_list, list)
+ /* skip all action-only chains */
+ while (chain && tcf_chain_held_by_acts_only(chain))
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+
+ if (chain)
tcf_chain_hold(chain);
+ mutex_unlock(&block->lock);
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_flush(chain);
+ return chain;
}
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain, *tmp;
+ struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
- /* At this point, all the chains should have refcnt >= 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
- tcf_chain_put_explicitly_created(chain);
+ if (chain)
tcf_chain_put(chain);
- }
+
+ return chain_next;
}
+EXPORT_SYMBOL(tcf_get_next_chain);
-static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
- if (refcount_dec_and_test(&block->refcnt)) {
- /* Flushing/putting all chains will cause the block to be
- * deallocated when last chain is freed. However, if chain_list
- * is empty, block has to be manually deallocated. After block
- * reference counter reached 0, it is no longer possible to
- * increment it or add new chains to block.
- */
- bool free_block = list_empty(&block->chain_list);
+ u32 prio = 0;
- if (tcf_block_shared(block))
- tcf_block_remove(block, block->net);
- if (!free_block)
- tcf_block_flush_all_chains(block);
+ ASSERT_RTNL();
+ mutex_lock(&chain->filter_chain_lock);
- if (q)
- tcf_block_offload_unbind(block, q, ei);
+ if (!tp) {
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ } else if (tcf_proto_is_deleting(tp)) {
+ /* 'deleting' flag is set and chain->filter_chain_lock was
+ * unlocked, which means next pointer could be invalid. Restart
+ * search.
+ */
+ prio = tp->prio + 1;
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
- if (free_block)
- kfree_rcu(block, rcu);
- else
- tcf_block_put_all_chains(block);
- } else if (q) {
- tcf_block_offload_unbind(block, q, ei);
+ for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+ if (!tp->deleting && tp->prio >= prio)
+ break;
+ } else {
+ tp = tcf_chain_dereference(tp->next, chain);
}
+
+ if (tp)
+ tcf_proto_get(tp);
+
+ mutex_unlock(&chain->filter_chain_lock);
+
+ return tp;
}
-static void tcf_block_refcnt_put(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+ bool rtnl_held)
{
- __tcf_block_put(block, NULL, NULL);
+ struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+ if (tp)
+ tcf_proto_put(tp, rtnl_held, NULL);
+
+ return tp_next;
}
+EXPORT_SYMBOL(tcf_get_next_proto);
-/* Find tcf block.
- * Set q, parent, cl when appropriate.
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
+{
+ struct tcf_chain *chain;
+
+ /* Last reference to block. At this point chains cannot be added or
+ * removed concurrently.
+ */
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
+ tcf_chain_put_explicitly_created(chain);
+ tcf_chain_flush(chain, rtnl_held);
+ }
+}
+
+/* Lookup Qdisc and increments its reference counter.
+ * Set parent, if necessary.
*/
-static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
- u32 *parent, unsigned long *cl,
- int ifindex, u32 block_index,
- struct netlink_ext_ack *extack)
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+ u32 *parent, int ifindex, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
- struct tcf_block *block;
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
int err = 0;
- if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_refcnt_get(net, block_index);
- if (!block) {
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
- return ERR_PTR(-EINVAL);
- }
- } else {
- const struct Qdisc_class_ops *cops;
- struct net_device *dev;
-
- rcu_read_lock();
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
- /* Find link */
- dev = dev_get_by_index_rcu(net, ifindex);
- if (!dev) {
- rcu_read_unlock();
- return ERR_PTR(-ENODEV);
- }
+ rcu_read_lock();
- /* Find qdisc */
- if (!*parent) {
- *q = dev->qdisc;
- *parent = (*q)->handle;
- } else {
- *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
- if (!*q) {
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- err = -EINVAL;
- goto errout_rcu;
- }
- }
+ /* Find link */
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
- *q = qdisc_refcount_inc_nz(*q);
+ /* Find qdisc */
+ if (!*parent) {
+ *q = dev->qdisc;
+ *parent = (*q)->handle;
+ } else {
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
if (!*q) {
NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
err = -EINVAL;
goto errout_rcu;
}
+ }
- /* Is it classful? */
- cops = (*q)->ops->cl_ops;
- if (!cops) {
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
- err = -EINVAL;
- goto errout_rcu;
- }
+ *q = qdisc_refcount_inc_nz(*q);
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ err = -EINVAL;
+ goto errout_rcu;
+ }
- if (!cops->tcf_block) {
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- err = -EOPNOTSUPP;
- goto errout_rcu;
- }
+ /* Is it classful? */
+ cops = (*q)->ops->cl_ops;
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
+ err = -EINVAL;
+ goto errout_qdisc;
+ }
- /* At this point we know that qdisc is not noop_qdisc,
- * which means that qdisc holds a reference to net_device
- * and we hold a reference to qdisc, so it is safe to release
- * rcu read lock.
- */
- rcu_read_unlock();
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+ err = -EOPNOTSUPP;
+ goto errout_qdisc;
+ }
- /* Do we search for filter, attached to class? */
- if (TC_H_MIN(*parent)) {
- *cl = cops->find(*q, *parent);
- if (*cl == 0) {
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- err = -ENOENT;
- goto errout_qdisc;
- }
+errout_rcu:
+ /* At this point we know that qdisc is not noop_qdisc,
+ * which means that qdisc holds a reference to net_device
+ * and we hold a reference to qdisc, so it is safe to release
+ * rcu read lock.
+ */
+ rcu_read_unlock();
+ return err;
+
+errout_qdisc:
+ rcu_read_unlock();
+
+ if (rtnl_held)
+ qdisc_put(*q);
+ else
+ qdisc_put_unlocked(*q);
+ *q = NULL;
+
+ return err;
+}
+
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
+ int ifindex, struct netlink_ext_ack *extack)
+{
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
+
+ /* Do we search for filter, attached to class? */
+ if (TC_H_MIN(parent)) {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ *cl = cops->find(q, parent);
+ if (*cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+ return -ENOENT;
}
+ }
+
+ return 0;
+}
- /* And the last stroke */
- block = cops->tcf_block(*q, *cl, extack);
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
+ unsigned long cl, int ifindex,
+ u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
+
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_refcnt_get(net, block_index);
if (!block) {
- err = -EINVAL;
- goto errout_qdisc;
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
+ return ERR_PTR(-EINVAL);
}
+ } else {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ block = cops->tcf_block(q, cl, extack);
+ if (!block)
+ return ERR_PTR(-EINVAL);
+
if (tcf_block_shared(block)) {
NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- err = -EOPNOTSUPP;
- goto errout_qdisc;
+ return ERR_PTR(-EOPNOTSUPP);
}
/* Always take reference to block in order to support execution
@@ -962,24 +1251,91 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
}
return block;
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei, bool rtnl_held)
+{
+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
+ /* Flushing/putting all chains will cause the block to be
+ * deallocated when last chain is freed. However, if chain_list
+ * is empty, block has to be manually deallocated. After block
+ * reference counter reached 0, it is no longer possible to
+ * increment it or add new chains to block.
+ */
+ bool free_block = list_empty(&block->chain_list);
+
+ mutex_unlock(&block->lock);
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, block->net);
+
+ if (q)
+ tcf_block_offload_unbind(block, q, ei);
+
+ if (free_block)
+ tcf_block_destroy(block);
+ else
+ tcf_block_flush_all_chains(block, rtnl_held);
+ } else if (q) {
+ tcf_block_offload_unbind(block, q, ei);
+ }
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
+{
+ __tcf_block_put(block, NULL, NULL, rtnl_held);
+}
+
+/* Find tcf block.
+ * Set q, parent, cl when appropriate.
+ */
+
+static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
+ u32 *parent, unsigned long *cl,
+ int ifindex, u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
+ if (err)
+ goto errout;
+
+ err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
+ if (err)
+ goto errout_qdisc;
+
+ block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout_qdisc;
+ }
+
+ return block;
-errout_rcu:
- rcu_read_unlock();
errout_qdisc:
- if (*q) {
+ if (*q)
qdisc_put(*q);
- *q = NULL;
- }
+errout:
+ *q = NULL;
return ERR_PTR(err);
}
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+ bool rtnl_held)
{
if (!IS_ERR_OR_NULL(block))
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, rtnl_held);
- if (q)
- qdisc_put(q);
+ if (q) {
+ if (rtnl_held)
+ qdisc_put(q);
+ else
+ qdisc_put_unlocked(q);
+ }
}
struct tcf_block_owner_item {
@@ -1087,7 +1443,7 @@ err_chain0_head_change_cb_add:
tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
err_block_insert:
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
@@ -1124,7 +1480,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
- __tcf_block_put(block, q, ei);
+ __tcf_block_put(block, q, ei, true);
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -1181,13 +1537,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain;
- struct tcf_proto *tp;
+ struct tcf_chain *chain, *chain_prev;
+ struct tcf_proto *tp, *tp_prev;
int err;
- list_for_each_entry(chain, &block->chain_list, list) {
- for (tp = rtnl_dereference(chain->filter_chain); tp;
- tp = rtnl_dereference(tp->next)) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
+ for (tp = __tcf_get_next_proto(chain, NULL); tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL)) {
if (tp->ops->reoffload) {
err = tp->ops->reoffload(tp, add, cb, cb_priv,
extack);
@@ -1204,6 +1566,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
return 0;
err_playback_remove:
+ tcf_proto_put(tp, true, NULL);
+ tcf_chain_put(chain);
tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
extack);
return err;
@@ -1329,32 +1693,116 @@ struct tcf_chain_info {
struct tcf_proto __rcu *next;
};
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info)
{
- return rtnl_dereference(*chain_info->pprev);
+ return tcf_chain_dereference(*chain_info->pprev, chain);
}
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
- struct tcf_chain_info *chain_info,
- struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
{
+ if (chain->flushing)
+ return -EAGAIN;
+
if (*chain_info->pprev == chain->filter_chain)
tcf_chain0_head_change(chain, tp);
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+ tcf_proto_get(tp);
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
- tcf_chain_hold(chain);
+
+ return 0;
}
static void tcf_chain_tp_remove(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- struct tcf_proto *next = rtnl_dereference(chain_info->next);
+ struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
+ tcf_proto_mark_delete(tp);
if (tp == chain->filter_chain)
tcf_chain0_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
- tcf_chain_put(chain);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ u32 protocol, u32 prio,
+ bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+ struct tcf_proto *tp_new,
+ u32 protocol, u32 prio,
+ bool rtnl_held)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp;
+ int err = 0;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ tp = tcf_chain_tp_find(chain, &chain_info,
+ protocol, prio, false);
+ if (!tp)
+ err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ if (tp) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = tp;
+ } else if (err) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = ERR_PTR(err);
+ }
+
+ return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+ struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp_iter;
+ struct tcf_proto **pprev;
+ struct tcf_proto *next;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ /* Atomically find and remove tp from chain. */
+ for (pprev = &chain->filter_chain;
+ (tp_iter = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp_iter->next) {
+ if (tp_iter == tp) {
+ chain_info.pprev = pprev;
+ chain_info.next = tp_iter->next;
+ WARN_ON(tp_iter->deleting);
+ break;
+ }
+ }
+ /* Verify that tp still exists and no new filters were inserted
+ * concurrently.
+ * Mark tp for deletion if it is empty.
+ */
+ if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ return;
+ }
+
+ next = tcf_chain_dereference(chain_info.next, chain);
+ if (tp == chain->filter_chain)
+ tcf_chain0_head_change(chain, next);
+ RCU_INIT_POINTER(*chain_info.pprev, next);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, extack);
}
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1367,7 +1815,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
/* Check the chain for existence of proto-tcf with this priority */
for (pprev = &chain->filter_chain;
- (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+ (tp = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (prio_allocate ||
@@ -1380,14 +1829,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
}
chain_info->pprev = pprev;
- chain_info->next = tp ? tp->next : NULL;
+ if (tp) {
+ chain_info->next = tp->next;
+ tcf_proto_get(tp);
+ } else {
+ chain_info->next = NULL;
+ }
return tp;
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1415,7 +1870,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
if (!fh) {
tcm->tcm_handle = 0;
} else {
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ if (tp->ops->dump &&
+ tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
goto nla_put_failure;
}
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1430,7 +1886,8 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, int event, bool unicast)
+ u32 parent, void *fh, int event, bool unicast,
+ bool rtnl_held)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1440,7 +1897,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, event,
+ rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1456,7 +1914,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, bool unicast, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1467,13 +1925,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+ rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
}
- err = tp->ops->delete(tp, fh, last, extack);
+ err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
if (err) {
kfree_skb(skb);
return err;
@@ -1492,14 +1951,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct tcf_block *block, struct Qdisc *q,
u32 parent, struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+ struct tcf_chain *chain, int event,
+ bool rtnl_held)
{
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next))
+ for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+ tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, NULL, event, false);
+ q, parent, NULL, event, false, rtnl_held);
+}
+
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+ if (tp->ops->put && fh)
+ tp->ops->put(tp, fh);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1522,6 +1988,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
void *fh;
int err;
int tp_created;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1538,7 +2005,9 @@ replay:
prio = TC_H_MAJ(t->tcm_info);
prio_allocate = false;
parent = t->tcm_parent;
+ tp = NULL;
cl = 0;
+ block = NULL;
if (prio == 0) {
/* If no priority is provided by the user,
@@ -1555,8 +2024,27 @@ replay:
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+ * block is shared (no qdisc found), qdisc is not unlocked, classifier
+ * type is not specified, classifier is not unlocked.
+ */
+ if (rtnl_held ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1575,40 +2063,62 @@ replay:
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
- goto errout;
+ goto errout_locked;
}
if (tp == NULL) {
+ struct tcf_proto *tp_new = NULL;
+
+ if (chain->flushing) {
+ err = -EAGAIN;
+ goto errout_locked;
+ }
+
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol) {
NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
- goto errout;
+ goto errout_locked;
}
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
- goto errout;
+ goto errout_locked;
}
if (prio_allocate)
- prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+ prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+ &chain_info));
+
+ mutex_unlock(&chain->filter_chain_lock);
+ tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+ protocol, prio, chain, rtnl_held,
+ extack);
+ if (IS_ERR(tp_new)) {
+ err = PTR_ERR(tp_new);
+ goto errout_tp;
+ }
- tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, extack);
+ tp_created = 1;
+ tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+ rtnl_held);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
- goto errout;
+ goto errout_tp;
}
- tp_created = 1;
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ } else {
+ mutex_unlock(&chain->filter_chain_lock);
+ }
+
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
@@ -1623,6 +2133,7 @@ replay:
goto errout;
}
} else if (n->nlmsg_flags & NLM_F_EXCL) {
+ tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1636,25 +2147,41 @@ replay:
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- extack);
+ rtnl_held, extack);
if (err == 0) {
- if (tp_created)
- tcf_chain_tp_insert(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_NEWTFILTER, false);
- } else {
- if (tp_created)
- tcf_proto_destroy(tp, NULL);
+ RTM_NEWTFILTER, false, rtnl_held);
+ tfilter_put(tp, fh);
}
errout:
- if (chain)
- tcf_chain_put(chain);
- tcf_block_release(q, block);
- if (err == -EAGAIN)
+ if (err && tp_created)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
+errout_tp:
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
+ if (!tp_created)
+ tcf_chain_put(chain);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
+ if (err == -EAGAIN) {
+ /* Take rtnl lock in case EAGAIN is caused by concurrent flush
+ * of target chain.
+ */
+ rtnl_held = true;
/* Replay the request. */
goto replay;
+ }
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1670,11 +2197,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1695,8 +2223,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+ * found), qdisc is not unlocked, classifier type is not specified,
+ * classifier is not unlocked.
+ */
+ if (!prio ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1724,56 +2271,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (prio == 0) {
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
- tcf_chain_flush(chain);
+ chain, RTM_DELTFILTER, rtnl_held);
+ tcf_chain_flush(chain, rtnl_held);
err = 0;
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
- goto errout;
+ goto errout_locked;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
+ goto errout_locked;
+ } else if (t->tcm_handle == 0) {
+ tcf_chain_tp_remove(chain, &chain_info, tp);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, false, rtnl_held);
+ err = 0;
goto errout;
}
+ mutex_unlock(&chain->filter_chain_lock);
fh = tp->ops->get(tp, t->tcm_handle);
if (!fh) {
- if (t->tcm_handle == 0) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, extack);
- err = 0;
- } else {
- NL_SET_ERR_MSG(extack, "Specified filter handle not found");
- err = -ENOENT;
- }
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+ err = -ENOENT;
} else {
bool last;
err = tfilter_del_notify(net, skb, n, tp, block,
q, parent, fh, false, &last,
- extack);
+ rtnl_held, extack);
+
if (err)
goto errout;
- if (last) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp, extack);
- }
+ if (last)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
}
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1789,11 +2349,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
@@ -1811,8 +2372,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+ * unlocked, classifier type is not specified, classifier is not
+ * unlocked.
+ */
+ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1831,8 +2410,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
+ mutex_unlock(&chain->filter_chain_lock);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
@@ -1850,15 +2431,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -ENOENT;
} else {
err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true);
+ fh, RTM_NEWTFILTER, true, rtnl_held);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
}
+ tfilter_put(tp, fh);
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
}
@@ -1879,7 +2468,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER);
+ RTM_NEWTFILTER, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -1889,11 +2478,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_proto *tp, *tp_prev;
struct tcf_dump_args arg;
- struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+ for (tp = __tcf_get_next_proto(chain, NULL);
+ tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL),
+ (*p_index)++) {
if (*p_index < index_start)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
@@ -1909,9 +2502,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER) <= 0)
- return false;
-
+ RTM_NEWTFILTER, true) <= 0)
+ goto errout;
cb->args[1] = 1;
}
if (!tp->ops->walk)
@@ -1926,23 +2518,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
- return false;
+ goto errout;
}
return true;
+
+errout:
+ tcf_proto_put(tp, true, NULL);
+ return false;
}
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct tcf_chain *chain, *chain_prev;
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
long index_start;
long index;
@@ -2006,19 +2602,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
index_start, &index)) {
+ tcf_chain_put(chain);
err = -EMSGSIZE;
break;
}
}
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2028,8 +2629,10 @@ out:
return skb->len;
}
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
- struct sk_buff *skb, struct tcf_block *block,
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct net *net, struct sk_buff *skb,
+ struct tcf_block *block,
u32 portid, u32 seq, u16 flags, int event)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -2038,8 +2641,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
struct tcmsg *tcm;
void *priv;
- ops = chain->tmplt_ops;
- priv = chain->tmplt_priv;
+ ops = tmplt_ops;
+ priv = tmplt_priv;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
@@ -2057,7 +2660,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
tcm->tcm_block_index = block->index;
}
- if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+ if (nla_put_u32(skb, TCA_CHAIN, chain_index))
goto nla_put_failure;
if (ops) {
@@ -2088,7 +2691,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_chain_fill_node(chain, net, skb, block, portid,
+ if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block, portid,
seq, flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -2100,6 +2704,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast)
+{
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct net *net = block->net;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
+ block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
struct nlattr **tca,
struct netlink_ext_ack *extack)
@@ -2111,7 +2740,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
if (!tca[TCA_KIND])
return 0;
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+ ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2129,16 +2758,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return 0;
}
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv)
{
- const struct tcf_proto_ops *ops = chain->tmplt_ops;
-
/* If template ops are set, no work to do for us. */
- if (!ops)
+ if (!tmplt_ops)
return;
- ops->tmplt_destroy(chain->tmplt_priv);
- module_put(ops->owner);
+ tmplt_ops->tmplt_destroy(tmplt_priv);
+ module_put(tmplt_ops->owner);
}
/* Add/delete/get a chain */
@@ -2181,6 +2809,8 @@ replay:
err = -EINVAL;
goto errout_block;
}
+
+ mutex_lock(&block->lock);
chain = tcf_chain_lookup(block, chain_index);
if (n->nlmsg_type == RTM_NEWCHAIN) {
if (chain) {
@@ -2192,54 +2822,61 @@ replay:
} else {
NL_SET_ERR_MSG(extack, "Filter chain already exists");
err = -EEXIST;
- goto errout_block;
+ goto errout_block_locked;
}
} else {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
err = -ENOENT;
- goto errout_block;
+ goto errout_block_locked;
}
chain = tcf_chain_create(block, chain_index);
if (!chain) {
NL_SET_ERR_MSG(extack, "Failed to create filter chain");
err = -ENOMEM;
- goto errout_block;
+ goto errout_block_locked;
}
}
} else {
if (!chain || tcf_chain_held_by_acts_only(chain)) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = -EINVAL;
- goto errout_block;
+ goto errout_block_locked;
}
tcf_chain_hold(chain);
}
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
+ /* Modifying chain requires holding parent block lock. In case
+ * the chain was successfully added, take a reference to the
+ * chain. This ensures that an empty chain does not disappear at
+ * the end of this function.
+ */
+ tcf_chain_hold(chain);
+ chain->explicitly_created = true;
+ }
+ mutex_unlock(&block->lock);
+
switch (n->nlmsg_type) {
case RTM_NEWCHAIN:
err = tc_chain_tmplt_add(chain, net, tca, extack);
- if (err)
+ if (err) {
+ tcf_chain_put_explicitly_created(chain);
goto errout;
- /* In case the chain was successfully added, take a reference
- * to the chain. This ensures that an empty chain
- * does not disappear at the end of this function.
- */
- tcf_chain_hold(chain);
- chain->explicitly_created = true;
+ }
+
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
break;
case RTM_DELCHAIN:
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, true);
/* Flush the chain first as the user requested chain removal. */
- tcf_chain_flush(chain);
+ tcf_chain_flush(chain, true);
/* In case the chain was successfully deleted, put a reference
* to the chain previously taken during addition.
*/
tcf_chain_put_explicitly_created(chain);
- chain->explicitly_created = false;
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
@@ -2256,21 +2893,25 @@ replay:
errout:
tcf_chain_put(chain);
errout_block:
- tcf_block_release(q, block);
+ tcf_block_release(q, block, true);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
return err;
+
+errout_block_locked:
+ mutex_unlock(&block->lock);
+ goto errout_block;
}
/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct tcf_chain *chain, *chain_prev;
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
long index_start;
long index;
@@ -2334,7 +2975,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
if ((tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index))
continue;
@@ -2342,19 +2987,20 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index++;
continue;
}
- if (tcf_chain_held_by_acts_only(chain))
- continue;
- err = tc_chain_fill_node(chain, net, skb, block,
+ err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWCHAIN);
- if (err <= 0)
+ if (err <= 0) {
+ tcf_chain_put(chain);
break;
+ }
index++;
}
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2376,7 +3022,7 @@ EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -2386,7 +3032,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND, true, extack);
+ TCA_ACT_BIND, rtnl_held,
+ extack);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -2398,8 +3045,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, &attr_size, true,
- extack);
+ exts->actions, &attr_size,
+ rtnl_held, extack);
if (err < 0)
return err;
exts->nr_actions = err;
@@ -2515,6 +3162,114 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
}
EXPORT_SYMBOL(tc_setup_cb_call);
+int tc_setup_flow_action(struct flow_action *flow_action,
+ const struct tcf_exts *exts)
+{
+ const struct tc_action *act;
+ int i, j, k;
+
+ if (!exts)
+ return 0;
+
+ j = 0;
+ tcf_exts_for_each_action(i, act, exts) {
+ struct flow_action_entry *entry;
+
+ entry = &flow_action->entries[j];
+ if (is_tcf_gact_ok(act)) {
+ entry->id = FLOW_ACTION_ACCEPT;
+ } else if (is_tcf_gact_shot(act)) {
+ entry->id = FLOW_ACTION_DROP;
+ } else if (is_tcf_gact_trap(act)) {
+ entry->id = FLOW_ACTION_TRAP;
+ } else if (is_tcf_gact_goto_chain(act)) {
+ entry->id = FLOW_ACTION_GOTO;
+ entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_mirred_egress_redirect(act)) {
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_mirred_egress_mirror(act)) {
+ entry->id = FLOW_ACTION_MIRRED;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_vlan(act)) {
+ switch (tcf_vlan_action(act)) {
+ case TCA_VLAN_ACT_PUSH:
+ entry->id = FLOW_ACTION_VLAN_PUSH;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ case TCA_VLAN_ACT_POP:
+ entry->id = FLOW_ACTION_VLAN_POP;
+ break;
+ case TCA_VLAN_ACT_MODIFY:
+ entry->id = FLOW_ACTION_VLAN_MANGLE;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ default:
+ goto err_out;
+ }
+ } else if (is_tcf_tunnel_set(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_tunnel_release(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_pedit(act)) {
+ for (k = 0; k < tcf_pedit_nkeys(act); k++) {
+ switch (tcf_pedit_cmd(act, k)) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ entry->id = FLOW_ACTION_MANGLE;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ entry->id = FLOW_ACTION_ADD;
+ break;
+ default:
+ goto err_out;
+ }
+ entry->mangle.htype = tcf_pedit_htype(act, k);
+ entry->mangle.mask = tcf_pedit_mask(act, k);
+ entry->mangle.val = tcf_pedit_val(act, k);
+ entry->mangle.offset = tcf_pedit_offset(act, k);
+ entry = &flow_action->entries[++j];
+ }
+ } else if (is_tcf_csum(act)) {
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = tcf_csum_update_flags(act);
+ } else if (is_tcf_skbedit_mark(act)) {
+ entry->id = FLOW_ACTION_MARK;
+ entry->mark = tcf_skbedit_mark(act);
+ } else {
+ goto err_out;
+ }
+
+ if (!is_tcf_pedit(act))
+ j++;
+ }
+ return 0;
+err_out:
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tc_setup_flow_action);
+
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
+{
+ unsigned int num_acts = 0;
+ struct tc_action *act;
+ int i;
+
+ tcf_exts_for_each_action(i, act, exts) {
+ if (is_tcf_pedit(act))
+ num_acts += tcf_pedit_nkeys(act);
+ else
+ num_acts++;
+ }
+ return num_acts;
+}
+EXPORT_SYMBOL(tcf_exts_num_actions);
+
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
@@ -2555,10 +3310,12 @@ static int __init tc_filter_init(void)
if (err)
goto err_rhash_setup_block_ht;
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
- tc_dump_tfilter, 0);
+ tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4a57fec6f306..2383f449d2bc 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
@@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = arg;
@@ -153,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -247,7 +248,8 @@ errout:
return err;
}
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
@@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_basic_pcnt gpf = {};
struct basic_filter *f = fh;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a95cb240a606..062350c6621c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
}
static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ extack);
if (ret < 0)
return ret;
@@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *oldprog = *arg;
@@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
}
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *tm)
+ struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
{
struct cls_bpf_prog *prog = fh;
struct nlattr *nest;
@@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
prog->res.class = cl;
}
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..02b05066b635 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr,
+ void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
goto errout;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto errout;
@@ -130,7 +130,7 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,18 +145,21 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
}
static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0) {
arg->stop = 1;
return;
@@ -166,7 +169,7 @@ skip:
}
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..204e2edae8d5 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
@@ -445,7 +446,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
goto err2;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto err2;
@@ -566,7 +567,7 @@ err1:
}
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = arg;
@@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
@@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle)
}
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct flow_filter *f = fh;
struct nlattr *nest;
@@ -677,7 +679,8 @@ nla_put_failure:
return -1;
}
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f6aa57fbbbaf..640f83e7f93f 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -381,16 +381,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
bool skip_sw = tc_skip_sw(f->flags);
int err;
+ cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
- cls_flower.dissector = &f->mask->dissector;
- cls_flower.mask = &f->mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &f->mask->dissector;
+ cls_flower.rule->match.mask = &f->mask->key;
+ cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ return 0;
+ }
+
err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+ kfree(cls_flower.rule);
+
if (err < 0) {
fl_hw_destroy_filter(tp, f, NULL);
return err;
@@ -413,10 +428,13 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
- cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+
+ tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
+ cls_flower.stats.pkts,
+ cls_flower.stats.lastused);
}
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -451,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
module_put(THIS_MODULE);
}
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct fl_flow_mask *mask, *next_mask;
@@ -1258,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -1285,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
@@ -1371,7 +1392,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tc_skip_hw(fnew->flags)) {
err = fl_hw_replace_filter(tp, fnew, extack);
if (err)
- goto errout_mask;
+ goto errout_mask_ht;
}
if (!tc_in_hw(fnew->flags))
@@ -1401,6 +1422,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
kfree(mask);
return 0;
+errout_mask_ht:
+ rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+ fnew->mask->filter_ht_params);
+
errout_mask:
fl_mask_put(head, fnew->mask, false);
@@ -1418,7 +1443,7 @@ errout_mask_alloc:
}
static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
@@ -1430,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
@@ -1463,18 +1489,36 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
if (tc_skip_hw(f->flags))
continue;
+ cls_flower.rule =
+ flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp,
f->flags, extack);
cls_flower.command = add ?
TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long)f;
- cls_flower.dissector = &mask->dissector;
- cls_flower.mask = &mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &mask->dissector;
+ cls_flower.rule->match.mask = &mask->key;
+ cls_flower.rule->match.key = &f->mkey;
+
+ err = tc_setup_flow_action(&cls_flower.rule->action,
+ &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (tc_skip_sw(f->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ continue;
+ }
+
cls_flower.classid = f->res.classid;
err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+
if (err) {
if (add && tc_skip_sw(f->flags))
return err;
@@ -1489,25 +1533,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
return 0;
}
-static void fl_hw_create_tmplt(struct tcf_chain *chain,
- struct fl_flow_tmplt *tmplt)
+static int fl_hw_create_tmplt(struct tcf_chain *chain,
+ struct fl_flow_tmplt *tmplt)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = chain->block;
- struct tcf_exts dummy_exts = { 0, };
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return -ENOMEM;
cls_flower.common.chain_index = chain->index;
cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
cls_flower.cookie = (unsigned long) tmplt;
- cls_flower.dissector = &tmplt->dissector;
- cls_flower.mask = &tmplt->mask;
- cls_flower.key = &tmplt->dummy_key;
- cls_flower.exts = &dummy_exts;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ kfree(cls_flower.rule);
+
+ return 0;
}
static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1551,12 +1600,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
if (err)
goto errout_tmplt;
- kfree(tb);
fl_init_dissector(&tmplt->dissector, &tmplt->mask);
- fl_hw_create_tmplt(chain, tmplt);
+ err = fl_hw_create_tmplt(chain, tmplt);
+ if (err)
+ goto errout_tmplt;
+ kfree(tb);
return tmplt;
errout_tmplt:
@@ -2004,7 +2055,7 @@ nla_put_failure:
}
static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_fl_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..4e34966f2ae2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
@@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
@@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
int err;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
return err;
@@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, struct netlink_ext_ack *extack)
+ bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -354,15 +356,13 @@ errout:
return err;
}
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
int h;
- if (head == NULL)
- arg->stop = 1;
-
- if (arg->stop)
+ if (head == NULL || arg->stop)
return;
for (h = 0; h < HTSIZE; h++) {
@@ -384,7 +384,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = fh;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a1b803fd372e..1f9d481b0fbb 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -145,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -159,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -232,17 +235,21 @@ err_exts_init:
}
static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0)
arg->stop = 1;
skip:
@@ -279,7 +286,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_matchall_pcnt gpf = {};
struct cls_mall_head *head = fh;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..444d15a75d98 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f)
tcf_queue_work(&f->rwork, route4_delete_filter_work);
}
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
@@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
@@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -560,15 +561,13 @@ errout:
return err;
}
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
- if (head == NULL)
- arg->stop = 1;
-
- if (arg->stop)
+ if (head == NULL || arg->stop)
return;
for (h = 0; h <= 256; h++) {
@@ -597,7 +596,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct route4_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7daea7d..4d3836178fa5 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
@@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -502,7 +504,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ extack);
if (err < 0)
goto errout2;
@@ -654,7 +657,8 @@ errout2:
return err;
}
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
@@ -688,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct rsvp_filter *f = fh;
struct rsvp_session *s;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9ccc93f257db..fbf3519a12d8 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -48,7 +48,7 @@ struct tcindex_data {
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
- struct rcu_head rcu;
+ struct rcu_work rwork;
};
static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
@@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work)
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
@@ -221,17 +221,11 @@ found:
return 0;
}
-static int tcindex_destroy_element(struct tcf_proto *tp,
- void *arg, struct tcf_walker *walker)
-{
- bool last;
-
- return tcindex_delete(tp, arg, &last, NULL);
-}
-
-static void __tcindex_destroy(struct rcu_head *head)
+static void tcindex_destroy_work(struct work_struct *work)
{
- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+ struct tcindex_data *p = container_of(to_rcu_work(work),
+ struct tcindex_data,
+ rwork);
kfree(p->perfect);
kfree(p->h);
@@ -258,9 +252,11 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r)
return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
-static void __tcindex_partial_destroy(struct rcu_head *head)
+static void tcindex_partial_destroy_work(struct work_struct *work)
{
- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+ struct tcindex_data *p = container_of(to_rcu_work(work),
+ struct tcindex_data,
+ rwork);
kfree(p->perfect);
kfree(p);
@@ -275,7 +271,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp)
kfree(cp->perfect);
}
-static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
{
int i, err = 0;
@@ -289,6 +285,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
+#ifdef CONFIG_NET_CLS_ACT
+ cp->perfect[i].exts.net = net;
+#endif
}
return 0;
@@ -305,16 +304,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
- struct tcindex_filter_result cr;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
+ struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
if (err < 0)
goto errout;
@@ -337,7 +336,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (p->perfect) {
int i;
- if (tcindex_alloc_perfect_hash(cp) < 0)
+ if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
for (i = 0; i < cp->hash; i++)
cp->perfect[i].res = p->perfect[i].res;
@@ -348,11 +347,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result);
if (err < 0)
goto errout1;
- err = tcindex_filter_result_init(&cr);
- if (err < 0)
- goto errout1;
if (old_r)
- cr.res = r->res;
+ cr = r->res;
if (tb[TCA_TCINDEX_HASH])
cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -406,7 +402,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp->perfect && !cp->h) {
if (valid_perfect_hash(cp)) {
- if (tcindex_alloc_perfect_hash(cp) < 0)
+ if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout_alloc;
balloc = 1;
} else {
@@ -443,8 +439,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (tb[TCA_TCINDEX_CLASSID]) {
- cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
- tcf_bind_filter(tp, &cr.res, base);
+ cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+ tcf_bind_filter(tp, &cr, base);
}
if (old_r && old_r != r) {
@@ -456,7 +452,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
oldp = p;
- r->res = cr.res;
+ r->res = cr;
tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp);
@@ -475,10 +471,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
; /* nothing */
rcu_assign_pointer(*fp, f);
+ } else {
+ tcf_exts_destroy(&new_filter_result.exts);
}
if (oldp)
- call_rcu(&oldp->rcu, __tcindex_partial_destroy);
+ tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
return 0;
errout_alloc:
@@ -487,7 +485,6 @@ errout_alloc:
else if (balloc == 2)
kfree(cp->h);
errout1:
- tcf_exts_destroy(&cr.exts);
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
@@ -499,7 +496,7 @@ static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -522,7 +519,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr, extack);
}
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+ bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
@@ -558,24 +556,43 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcf_walker walker;
+ int i;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
- walker.count = 0;
- walker.skip = 0;
- walker.fn = tcindex_destroy_element;
- tcindex_walk(tp, &walker);
- call_rcu(&p->rcu, __tcindex_destroy);
+ if (p->perfect) {
+ for (i = 0; i < p->hash; i++) {
+ struct tcindex_filter_result *r = p->perfect + i;
+
+ tcf_unbind_filter(tp, &r->res);
+ if (tcf_exts_get_net(&r->exts))
+ tcf_queue_work(&r->rwork,
+ tcindex_destroy_rexts_work);
+ else
+ __tcindex_destroy_rexts(r);
+ }
+ }
+
+ for (i = 0; p->h && i < p->hash; i++) {
+ struct tcindex_filter *f, *next;
+ bool last;
+
+ for (f = rtnl_dereference(p->h[i]); f; f = next) {
+ next = rtnl_dereference(f->next);
+ tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
+ }
+ }
+
+ tcf_queue_work(&p->rwork, tcindex_destroy_work);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index dcea21004604..27d29c04dcc9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
struct tc_u_common *tp_c = tp->data;
@@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
+ struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -1123,7 +1124,8 @@ erridr:
return err;
}
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_u_knode *n = fh;
struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 03e26e8d0ec9..352b46f98440 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -526,11 +526,6 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
-static void stab_kfree_rcu(struct rcu_head *head)
-{
- kfree(container_of(head, struct qdisc_size_table, rcu));
-}
-
void qdisc_put_stab(struct qdisc_size_table *tab)
{
if (!tab)
@@ -538,7 +533,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (--tab->refcnt == 0) {
list_del(&tab->list);
- call_rcu(&tab->rcu, stab_kfree_rcu);
+ kfree_rcu(tab, rcu);
}
}
EXPORT_SYMBOL(qdisc_put_stab);
@@ -1201,9 +1196,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
} else {
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
+ if (handle == 0) {
+ NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
+ err = -ENOSPC;
goto err_out3;
+ }
}
if (!netif_is_multiqueue(dev))
sch->flags |= TCQ_F_ONETXQUEUE;
@@ -1909,17 +1906,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
block = cops->tcf_block(q, cl, NULL);
if (!block)
return;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next)) {
+ for (tp = tcf_get_next_proto(chain, NULL, true);
+ tp; tp = tcf_get_next_proto(chain, tp, true)) {
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
arg.classid = clid;
arg.cl = new_cl;
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
}
}
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 66ba2ce2320f..38e5add14fab 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev)
* netif_carrier_on - set carrier
* @dev: network device
*
- * Device has detected that carrier.
+ * Device has detected acquisition of carrier.
*/
void netif_carrier_on(struct net_device *dev)
{
@@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head)
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
{
- struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ /* Protected with chain0->filter_chain_lock.
+ * Can't access chain directly because tp_head can be NULL.
+ */
+ struct mini_Qdisc *miniq_old =
+ rcu_dereference_protected(*miniqp->p_miniq, 1);
struct mini_Qdisc *miniq;
if (!tp_head) {
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 078f01a8d582..435847d98b51 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
+ nla_total_size(sizeof(struct inet_diag_meminfo))
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 123e9f2dc226..edfcf16e704c 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
skb->csum_not_inet = 0;
+ gso_reset_checksum(skb, ~0);
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9644bdc8e85c..a78e55a1bb9c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_transport *transport = NULL;
struct sctp_sndrcvinfo _sinfo, *sinfo;
- struct sctp_association *asoc;
+ struct sctp_association *asoc, *tmp;
struct sctp_cmsgs cmsgs;
union sctp_addr *daddr;
bool new = false;
@@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
/* SCTP_SENDALL process */
if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
- list_for_each_entry(asoc, &ep->asocs, asocs) {
+ list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) {
err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
msg_len);
if (err == 0)
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 80e0ae5534ec..2936ed17bf9e 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count)
}
}
+static size_t fa_index(struct flex_array *fa, void *elem, size_t count)
+{
+ size_t index = 0;
+
+ while (count--) {
+ if (elem == flex_array_get(fa, index))
+ break;
+ index++;
+ }
+
+ return index;
+}
+
/* Migrates chunks from stream queues to new stream queues if needed,
* but not across associations. Also, removes those chunks to streams
* higher than the new max.
@@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
}
}
- for (i = outcnt; i < stream->outcnt; i++)
+ for (i = outcnt; i < stream->outcnt; i++) {
kfree(SCTP_SO(stream, i)->ext);
+ SCTP_SO(stream, i)->ext = NULL;
+ }
}
static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
@@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
if (stream->out) {
fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
+ if (stream->out_curr) {
+ size_t index = fa_index(stream->out, stream->out_curr,
+ stream->outcnt);
+
+ BUG_ON(index == stream->outcnt);
+ stream->out_curr = flex_array_get(out, index);
+ }
fa_free(stream->out);
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 369870b0ef79..77ef53596d18 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -30,6 +30,10 @@
#include <net/smc.h>
#include <asm/ioctls.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include "smc_netns.h"
+
#include "smc.h"
#include "smc_clc.h"
#include "smc_llc.h"
@@ -42,8 +46,11 @@
#include "smc_rx.h"
#include "smc_close.h"
-static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
- * creation
+static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
+ * creation on server
+ */
+static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
+ * creation on client
*/
static void smc_tcp_listen_work(struct work_struct *);
@@ -145,32 +152,33 @@ static int smc_release(struct socket *sock)
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
- }
-
- sk->sk_prot->unhash(sk);
-
- if (smc->clcsock) {
- if (smc->use_fallback && sk->sk_state == SMC_LISTEN) {
+ } else {
+ if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
+ if (sk->sk_state == SMC_LISTEN) {
/* wake up clcsock accept */
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
- mutex_lock(&smc->clcsock_release_lock);
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- mutex_unlock(&smc->clcsock_release_lock);
- }
- if (smc->use_fallback) {
- if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
- sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
+ sk->sk_prot->unhash(sk);
+
+ if (sk->sk_state == SMC_CLOSED) {
+ if (smc->clcsock) {
+ mutex_lock(&smc->clcsock_release_lock);
+ sock_release(smc->clcsock);
+ smc->clcsock = NULL;
+ mutex_unlock(&smc->clcsock_release_lock);
+ }
+ if (!smc->use_fallback)
+ smc_conn_free(&smc->conn);
+ }
+
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
- smc_conn_free(&smc->conn);
release_sock(sk);
sock_put(sk); /* final sock_put */
@@ -476,7 +484,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
{
if (local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(smc->conn.lgr);
- mutex_unlock(&smc_create_lgr_pending);
+ if (smc->conn.lgr->is_smcd)
+ /* there is only one lgr role for SMC-D; use server lock */
+ mutex_unlock(&smc_server_lgr_pending);
+ else
+ mutex_unlock(&smc_client_lgr_pending);
+
smc_conn_free(&smc->conn);
return reason_code;
}
@@ -561,7 +574,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
struct smc_link *link;
int reason_code = 0;
- mutex_lock(&smc_create_lgr_pending);
+ mutex_lock(&smc_client_lgr_pending);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
ibport, ntoh24(aclc->qpn), &aclc->lcl,
NULL, 0);
@@ -572,7 +585,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
else
reason_code = SMC_CLC_DECL_INTERR; /* other error */
- return smc_connect_abort(smc, reason_code, 0);
+ mutex_unlock(&smc_client_lgr_pending);
+ return reason_code;
}
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -616,7 +630,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, reason_code,
local_contact);
}
- mutex_unlock(&smc_create_lgr_pending);
+ mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
@@ -633,11 +647,14 @@ static int smc_connect_ism(struct smc_sock *smc,
int local_contact = SMC_FIRST_CONTACT;
int rc = 0;
- mutex_lock(&smc_create_lgr_pending);
+ /* there is only one lgr role for SMC-D; use server lock */
+ mutex_lock(&smc_server_lgr_pending);
local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
NULL, ismdev, aclc->gid);
- if (local_contact < 0)
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+ if (local_contact < 0) {
+ mutex_unlock(&smc_server_lgr_pending);
+ return SMC_CLC_DECL_MEM;
+ }
/* Create send and receive buffers */
if (smc_buf_create(smc, true))
@@ -651,7 +668,7 @@ static int smc_connect_ism(struct smc_sock *smc,
rc = smc_clc_send_confirm(smc);
if (rc)
return smc_connect_abort(smc, rc, local_contact);
- mutex_unlock(&smc_create_lgr_pending);
+ mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
@@ -1250,7 +1267,7 @@ static void smc_listen_work(struct work_struct *work)
return;
}
- mutex_lock(&smc_create_lgr_pending);
+ mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
@@ -1272,7 +1289,7 @@ static void smc_listen_work(struct work_struct *work)
&local_contact) ||
smc_listen_rdma_reg(new_smc, local_contact))) {
/* SMC not supported, decline */
- mutex_unlock(&smc_create_lgr_pending);
+ mutex_unlock(&smc_server_lgr_pending);
smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
local_contact);
return;
@@ -1281,29 +1298,33 @@ static void smc_listen_work(struct work_struct *work)
/* send SMC Accept CLC message */
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc) {
- mutex_unlock(&smc_create_lgr_pending);
+ mutex_unlock(&smc_server_lgr_pending);
smc_listen_decline(new_smc, rc, local_contact);
return;
}
+ /* SMC-D does not need this lock any more */
+ if (ism_supported)
+ mutex_unlock(&smc_server_lgr_pending);
+
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM, CLC_WAIT_TIME);
if (reason_code) {
- mutex_unlock(&smc_create_lgr_pending);
+ if (!ism_supported)
+ mutex_unlock(&smc_server_lgr_pending);
smc_listen_decline(new_smc, reason_code, local_contact);
return;
}
/* finish worker */
if (!ism_supported) {
- if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) {
- mutex_unlock(&smc_create_lgr_pending);
+ rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+ mutex_unlock(&smc_server_lgr_pending);
+ if (rc)
return;
- }
}
smc_conn_save_peer_info(new_smc, &cclc);
- mutex_unlock(&smc_create_lgr_pending);
smc_listen_out_connected(new_smc);
}
@@ -1506,6 +1527,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
smc = smc_sk(sk);
lock_sock(sk);
+ if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+ /* socket was connected before, no more data to read */
+ rc = 0;
+ goto out;
+ }
if ((sk->sk_state == SMC_INIT) ||
(sk->sk_state == SMC_LISTEN) ||
(sk->sk_state == SMC_CLOSED))
@@ -1841,7 +1867,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
smc = smc_sk(sk);
lock_sock(sk);
-
+ if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+ /* socket was connected before, no more data to read */
+ rc = 0;
+ goto out;
+ }
if (sk->sk_state == SMC_INIT ||
sk->sk_state == SMC_LISTEN ||
sk->sk_state == SMC_CLOSED)
@@ -1940,10 +1970,33 @@ static const struct net_proto_family smc_sock_family_ops = {
.create = smc_create,
};
+unsigned int smc_net_id;
+
+static __net_init int smc_net_init(struct net *net)
+{
+ return smc_pnet_net_init(net);
+}
+
+static void __net_exit smc_net_exit(struct net *net)
+{
+ smc_pnet_net_exit(net);
+}
+
+static struct pernet_operations smc_net_ops = {
+ .init = smc_net_init,
+ .exit = smc_net_exit,
+ .id = &smc_net_id,
+ .size = sizeof(struct smc_net),
+};
+
static int __init smc_init(void)
{
int rc;
+ rc = register_pernet_subsys(&smc_net_ops);
+ if (rc)
+ return rc;
+
rc = smc_pnet_init();
if (rc)
return rc;
@@ -2009,6 +2062,7 @@ static void __exit smc_exit(void)
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
+ unregister_pernet_subsys(&smc_net_ops);
}
module_init(smc_init);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index db83332ac1c8..d0b0f4c865b4 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -21,13 +21,6 @@
/********************************** send *************************************/
-struct smc_cdc_tx_pend {
- struct smc_connection *conn; /* socket connection */
- union smc_host_cursor cursor; /* tx sndbuf cursor sent */
- union smc_host_cursor p_cursor; /* rx RMBE cursor produced */
- u16 ctrl_seq; /* conn. tx sequence # */
-};
-
/* handler for send/transmission completion of a CDC msg */
static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
struct smc_link *link,
@@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
+ struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
if (!conn->alert_token_local)
/* abnormal termination */
@@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend)
{
+ union smc_host_cursor cfed;
struct smc_link *link;
int rc;
@@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn,
conn->tx_cdc_seq++;
conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
- smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
- &conn->local_tx_ctrl, conn);
+ smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
- if (!rc)
- smc_curs_copy(&conn->rx_curs_confirmed,
- &conn->local_tx_ctrl.cons, conn);
+ if (!rc) {
+ smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
+ conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+ }
return rc;
}
@@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
if (rc)
return rc;
- return smc_cdc_msg_send(conn, wr_buf, pend);
+ spin_lock_bh(&conn->send_lock);
+ rc = smc_cdc_msg_send(conn, wr_buf, pend);
+ spin_unlock_bh(&conn->send_lock);
+ return rc;
}
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
@@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
if (rc)
return rc;
smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
+ conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smp_mb__after_atomic();
smc->sk.sk_data_ready(&smc->sk);
} else {
- if (conn->local_rx_ctrl.prod_flags.write_blocked ||
- conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
- conn->local_rx_ctrl.prod_flags.urg_data_pending) {
- if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
- conn->urg_state = SMC_URG_NOTYET;
- /* force immediate tx of current consumer cursor, but
- * under send_lock to guarantee arrival in seqno-order
- */
- if (smc->sk.sk_state != SMC_INIT)
- smc_tx_sndbuf_nonempty(conn);
- }
+ if (conn->local_rx_ctrl.prod_flags.write_blocked)
+ smc->sk.sk_data_ready(&smc->sk);
+ if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
+ conn->urg_state = SMC_URG_NOTYET;
}
- /* piggy backed tx info */
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
+ if ((diff_cons && smc_tx_prepared_sends(conn)) ||
+ conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
+ conn->local_rx_ctrl.prod_flags.urg_data_pending)
smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
- }
+
if (diff_cons && conn->urg_tx_pend &&
atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
/* urg data confirmed by peer, indicate we're ready for more */
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index b5bfe38c7f9b..861dc24c588c 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
#endif
}
-/* calculate cursor difference between old and new, where old <= new */
+/* calculate cursor difference between old and new, where old <= new and
+ * difference cannot exceed size
+ */
static inline int smc_curs_diff(unsigned int size,
union smc_host_cursor *old,
union smc_host_cursor *new)
@@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size,
return smc_curs_diff(size, old, new);
}
+/* calculate cursor difference between old and new, where old <= new and
+ * difference may exceed size
+ */
+static inline int smc_curs_diff_large(unsigned int size,
+ union smc_host_cursor *old,
+ union smc_host_cursor *new)
+{
+ if (old->wrap < new->wrap)
+ return min_t(int,
+ (size - old->count) + new->count +
+ (new->wrap - old->wrap - 1) * size,
+ size);
+
+ if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */
+ return min_t(int,
+ (size - old->count) + new->count +
+ (new->wrap + 0xffff - old->wrap) * size,
+ size);
+
+ return max_t(int, 0, (new->count - old->count));
+}
+
static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
union smc_host_cursor *local,
+ union smc_host_cursor *save,
struct smc_connection *conn)
{
- union smc_host_cursor temp;
-
- smc_curs_copy(&temp, local, conn);
- peer->count = htonl(temp.count);
- peer->wrap = htons(temp.wrap);
+ smc_curs_copy(save, local, conn);
+ peer->count = htonl(save->count);
+ peer->wrap = htons(save->wrap);
/* peer->reserved = htons(0); must be ensured by caller */
}
static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer,
- struct smc_host_cdc_msg *local,
- struct smc_connection *conn)
+ struct smc_connection *conn,
+ union smc_host_cursor *save)
{
+ struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+
peer->common.type = local->common.type;
peer->len = local->len;
peer->seqno = htons(local->seqno);
peer->token = htonl(local->token);
- smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn);
- smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn);
+ smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn);
+ smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn);
peer->prod_flags = local->prod_flags;
peer->conn_state_flags = local->conn_state_flags;
}
@@ -245,17 +270,18 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
}
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
- struct smcd_cdc_msg *peer)
+ struct smcd_cdc_msg *peer,
+ struct smc_connection *conn)
{
union smc_host_cursor temp;
temp.wrap = peer->prod.wrap;
temp.count = peer->prod.count;
- atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+ smc_curs_copy(&local->prod, &temp, conn);
temp.wrap = peer->cons.wrap;
temp.count = peer->cons.count;
- atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+ smc_curs_copy(&local->cons, &temp, conn);
local->prod_flags = peer->cons.prod_flags;
local->conn_state_flags = peer->cons.conn_state_flags;
}
@@ -265,15 +291,21 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_connection *conn)
{
if (conn->lgr->is_smcd)
- smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
+ smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn);
else
smcr_cdc_msg_to_host(local, peer, conn);
}
-struct smc_cdc_tx_pend;
+struct smc_cdc_tx_pend {
+ struct smc_connection *conn; /* socket connection */
+ union smc_host_cursor cursor; /* tx sndbuf cursor sent */
+ union smc_host_cursor p_cursor; /* rx RMBE cursor produced */
+ u16 ctrl_seq; /* conn. tx sequence # */
+};
int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
+ struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 776e9dfc915d..d53fd588d1f5 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
vec.iov_len = sizeof(struct smc_clc_msg_decline);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
sizeof(struct smc_clc_msg_decline));
- if (len < sizeof(struct smc_clc_msg_decline))
+ if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
len = -EPROTO;
return len > 0 ? 0 : len;
}
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index ea2b87f29469..2ad37e998509 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work)
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&conn->bytes_to_rcv) ||
- (rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn))) {
- sk->sk_state = SMC_APPCLOSEWAIT1;
- } else {
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* passive closing */
- }
+ sk->sk_state = SMC_APPCLOSEWAIT1;
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
@@ -405,8 +398,13 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
+ if (smc->clcsock) {
+ sock_release(smc->clcsock);
+ smc->clcsock = NULL;
+ }
+ }
}
release_sock(sk);
sock_put(sk); /* sock_hold done by schedulers of close_work */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 35c1cdc93e1c..53a17cfa61af 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -118,7 +118,6 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn)
rb_erase(&conn->alert_node, &lgr->conns_all);
lgr->conns_num--;
conn->alert_token_local = 0;
- conn->lgr = NULL;
sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}
@@ -128,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
+ if (!lgr)
+ return;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
__smc_lgr_unregister_conn(conn);
@@ -159,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
- if (list_empty(&lgr->list))
- goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -168,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work)
spin_unlock_bh(&smc_lgr_list.lock);
return;
}
- list_del_init(&lgr->list); /* remove from smc_lgr_list */
-free:
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list); /* remove from smc_lgr_list */
spin_unlock_bh(&smc_lgr_list.lock);
if (!lgr->is_smcd && !lgr->terminating) {
@@ -300,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn,
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
- conn->rmb_desc->used = 0;
if (!lgr->is_smcd) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(
&lgr->lnk[SMC_SINGLE_LINK],
conn->rmb_desc);
}
+ conn->rmb_desc->used = 0;
} else {
/* buf registration failed, reuse not possible */
write_lock_bh(&lgr->rmbs_lock);
@@ -331,8 +330,9 @@ void smc_conn_free(struct smc_connection *conn)
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn); /* unsets conn->lgr */
+ smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ conn->lgr = NULL;
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
@@ -462,6 +462,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
sock_hold(&smc->sk); /* sock_put in close work */
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
+ conn->lgr = NULL;
write_unlock_bh(&lgr->conns_lock);
if (!schedule_work(&conn->close_work))
sock_put(&smc->sk);
@@ -628,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
local_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ if (delayed_work_pending(&lgr->free_work))
+ cancel_delayed_work(&lgr->free_work);
write_unlock_bh(&lgr->conns_lock);
break;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b00287989a3d..8806d2afa6ed 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -52,6 +52,24 @@ enum smc_wr_reg_state {
FAILED /* ib_wr_reg_mr response: failure */
};
+struct smc_rdma_sge { /* sges for RDMA writes */
+ struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
+};
+
+#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per
+ * message send
+ */
+
+struct smc_rdma_sges { /* sges per message send */
+ struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES];
+};
+
+struct smc_rdma_wr { /* work requests per message
+ * send
+ */
+ struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES];
+};
+
struct smc_link {
struct smc_ib_device *smcibdev; /* ib-device */
u8 ibport; /* port - values 1 | 2 */
@@ -64,6 +82,8 @@ struct smc_link {
struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */
struct ib_send_wr *wr_tx_ibs; /* WR send meta data */
struct ib_sge *wr_tx_sges; /* WR send gather meta data */
+ struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
+ struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
/* above four vectors have wr_tx_cnt elements and use the same index */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index dbf64a93d68a..371b4cf31fcd 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -38,6 +38,7 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ r->diag_family = sk->sk_family;
if (!smc->clcsock)
return;
r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
@@ -45,14 +46,12 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (sk->sk_protocol == SMCPROTO_SMC) {
- r->diag_family = PF_INET;
memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
#if IS_ENABLED(CONFIG_IPV6)
} else if (sk->sk_protocol == SMCPROTO_SMC6) {
- r->diag_family = PF_INET6;
memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr,
sizeof(smc->clcsock->sk->sk_v6_rcv_saddr));
memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index e519ef29c0ff..53f429c04843 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
smcibdev = container_of(handler, struct smc_ib_device, event_handler);
switch (ibevent->event) {
- case IB_EVENT_PORT_ERR:
case IB_EVENT_DEVICE_FATAL:
+ /* terminate all ports on device */
+ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
+ break;
+ case IB_EVENT_PORT_ERR:
case IB_EVENT_PORT_ACTIVE:
+ case IB_EVENT_GID_CHANGE:
port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
+ if (port_idx < SMC_MAX_PORTS) {
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
+ }
break;
default:
break;
@@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
- struct smc_ib_device *smcibdev =
- (struct smc_ib_device *)ibevent->device;
+ struct smc_link *lnk = (struct smc_link *)priv;
+ struct smc_ib_device *smcibdev = lnk->smcibdev;
u8 port_idx;
switch (ibevent->event) {
- case IB_EVENT_DEVICE_FATAL:
- case IB_EVENT_GID_CHANGE:
- case IB_EVENT_PORT_ERR:
+ case IB_EVENT_QP_FATAL:
case IB_EVENT_QP_ACCESS_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
+ port_idx = ibevent->element.qp->port - 1;
+ if (port_idx < SMC_MAX_PORTS) {
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
+ }
break;
default:
break;
@@ -556,7 +564,6 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
spin_unlock(&smc_ib_devices.lock);
- smc_pnet_remove_by_ibdev(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
kfree(smcibdev);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index bac7fd65a4c0..da60ab9e8d70 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -42,6 +42,8 @@ struct smc_ib_device { /* ib-device infos for smc */
/* mac address per port*/
u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
/* pnetid per port */
+ bool pnetid_by_user[SMC_MAX_PORTS];
+ /* pnetid defined by user? */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index a6d3623d06f4..4fd60c522802 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link,
{
int rc;
- rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
+ pend);
if (rc < 0)
return rc;
BUILD_BUG_ON_MSG(
diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h
new file mode 100644
index 000000000000..e7a8fc4ae02f
--- /dev/null
+++ b/net/smc/smc_netns.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Shared Memory Communications
+ *
+ * Network namespace definitions.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef SMC_NETNS_H
+#define SMC_NETNS_H
+
+#include "smc_pnet.h"
+
+extern unsigned int smc_net_id;
+
+/* per-network namespace private data */
+struct smc_net {
+ struct smc_pnettable pnettable;
+};
+#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 7cb3e4f07c10..25dfbe343e26 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -20,14 +20,21 @@
#include <rdma/ib_verbs.h>
+#include <net/netns/generic.h>
+#include "smc_netns.h"
+
#include "smc_pnet.h"
#include "smc_ib.h"
#include "smc_ism.h"
+#define SMC_ASCII_BLANK 32
+
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
+
static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
[SMC_PNETID_NAME] = {
.type = NLA_NUL_STRING,
- .len = SMC_MAX_PNETID_LEN - 1
+ .len = SMC_MAX_PNETID_LEN
},
[SMC_PNETID_ETHNAME] = {
.type = NLA_NUL_STRING,
@@ -43,118 +50,127 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
static struct genl_family smc_pnet_nl_family;
/**
- * struct smc_pnettable - SMC PNET table anchor
- * @lock: Lock for list action
- * @pnetlist: List of PNETIDs
- */
-static struct smc_pnettable {
- rwlock_t lock;
- struct list_head pnetlist;
-} smc_pnettable = {
- .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
- .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
-};
-
-/**
- * struct smc_pnetentry - pnet identifier name entry
+ * struct smc_user_pnetentry - pnet identifier name entry for/from user
* @list: List node.
* @pnet_name: Pnet identifier name
* @ndev: pointer to network device.
* @smcibdev: Pointer to IB device.
+ * @ib_port: Port of IB device.
+ * @smcd_dev: Pointer to smcd device.
*/
-struct smc_pnetentry {
+struct smc_user_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct net_device *ndev;
struct smc_ib_device *smcibdev;
u8 ib_port;
+ struct smcd_dev *smcd_dev;
};
-/* Check if two RDMA device entries are identical. Use device name and port
- * number for comparison.
- */
-static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
- u8 ibport)
-{
- return pnetelem->ib_port == ibport &&
- !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
- sizeof(pnetelem->smcibdev->ibdev->name));
-}
+/* pnet entry stored in pnet table */
+struct smc_pnetentry {
+ struct list_head list;
+ char pnet_name[SMC_MAX_PNETID_LEN + 1];
+ struct net_device *ndev;
+};
-/* Find a pnetid in the pnet table.
- */
-static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
+/* Check if two given pnetids match */
+static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
{
- struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
+ int i;
- read_lock(&smc_pnettable.lock);
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
- if (!strncmp(pnetelem->pnet_name, pnet_name,
- sizeof(pnetelem->pnet_name))) {
- found_pnetelem = pnetelem;
+ for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
+ if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
+ (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
break;
- }
+ if (pnetid1[i] != pnetid2[i])
+ return false;
}
- read_unlock(&smc_pnettable.lock);
- return found_pnetelem;
+ return true;
}
/* Remove a pnetid from the pnet table.
*/
-static int smc_pnet_remove_by_pnetid(char *pnet_name)
+static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
{
struct smc_pnetentry *pnetelem, *tmp_pe;
+ struct smc_pnettable *pnettable;
+ struct smc_ib_device *ibdev;
+ struct smcd_dev *smcd_dev;
+ struct smc_net *sn;
int rc = -ENOENT;
+ int ibport;
+
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
- write_lock(&smc_pnettable.lock);
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
+ /* remove netdevices */
+ write_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
- if (!strncmp(pnetelem->pnet_name, pnet_name,
- sizeof(pnetelem->pnet_name))) {
+ if (!pnet_name ||
+ smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
dev_put(pnetelem->ndev);
kfree(pnetelem);
rc = 0;
- break;
}
}
- write_unlock(&smc_pnettable.lock);
- return rc;
-}
+ write_unlock(&pnettable->lock);
-/* Remove a pnet entry mentioning a given network device from the pnet table.
- */
-static int smc_pnet_remove_by_ndev(struct net_device *ndev)
-{
- struct smc_pnetentry *pnetelem, *tmp_pe;
- int rc = -ENOENT;
+ /* if this is not the initial namespace, stop here */
+ if (net != &init_net)
+ return rc;
- write_lock(&smc_pnettable.lock);
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
- list) {
- if (pnetelem->ndev == ndev) {
- list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
- kfree(pnetelem);
+ /* remove ib devices */
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+ for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
+ if (ibdev->pnetid_by_user[ibport] &&
+ (!pnet_name ||
+ smc_pnet_match(pnet_name,
+ ibdev->pnetid[ibport]))) {
+ memset(ibdev->pnetid[ibport], 0,
+ SMC_MAX_PNETID_LEN);
+ ibdev->pnetid_by_user[ibport] = false;
+ rc = 0;
+ }
+ }
+ }
+ spin_unlock(&smc_ib_devices.lock);
+ /* remove smcd devices */
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+ if (smcd_dev->pnetid_by_user &&
+ (!pnet_name ||
+ smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+ memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
+ smcd_dev->pnetid_by_user = false;
rc = 0;
- break;
}
}
- write_unlock(&smc_pnettable.lock);
+ spin_unlock(&smcd_dev_list.lock);
return rc;
}
-/* Remove a pnet entry mentioning a given ib device from the pnet table.
+/* Remove a pnet entry mentioning a given network device from the pnet table.
*/
-int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
+static int smc_pnet_remove_by_ndev(struct net_device *ndev)
{
struct smc_pnetentry *pnetelem, *tmp_pe;
+ struct smc_pnettable *pnettable;
+ struct net *net = dev_net(ndev);
+ struct smc_net *sn;
int rc = -ENOENT;
- write_lock(&smc_pnettable.lock);
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
- list) {
- if (pnetelem->smcibdev == ibdev) {
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ write_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+ if (pnetelem->ndev == ndev) {
list_del(&pnetelem->list);
dev_put(pnetelem->ndev);
kfree(pnetelem);
@@ -162,35 +178,84 @@ int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
break;
}
}
- write_unlock(&smc_pnettable.lock);
+ write_unlock(&pnettable->lock);
return rc;
}
/* Append a pnetid to the end of the pnet table if not already on this list.
*/
-static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
+static int smc_pnet_enter(struct smc_pnettable *pnettable,
+ struct smc_user_pnetentry *new_pnetelem)
{
+ u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ struct smc_pnetentry *tmp_pnetelem;
struct smc_pnetentry *pnetelem;
- int rc = -EEXIST;
-
- write_lock(&smc_pnettable.lock);
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
- if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
- sizeof(new_pnetelem->pnet_name)) ||
- !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
- sizeof(new_pnetelem->ndev->name)) ||
- smc_pnet_same_ibname(pnetelem,
- new_pnetelem->smcibdev->ibdev->name,
- new_pnetelem->ib_port)) {
- dev_put(pnetelem->ndev);
- goto found;
+ bool new_smcddev = false;
+ struct net_device *ndev;
+ bool new_netdev = true;
+ bool new_ibdev = false;
+
+ if (new_pnetelem->smcibdev) {
+ struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
+ int ib_port = new_pnetelem->ib_port;
+
+ spin_lock(&smc_ib_devices.lock);
+ if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+ memcpy(ib_dev->pnetid[ib_port - 1],
+ new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
+ ib_dev->pnetid_by_user[ib_port - 1] = true;
+ new_ibdev = true;
}
+ spin_unlock(&smc_ib_devices.lock);
}
- list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
- rc = 0;
-found:
- write_unlock(&smc_pnettable.lock);
- return rc;
+ if (new_pnetelem->smcd_dev) {
+ struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+
+ spin_lock(&smcd_dev_list.lock);
+ if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+ memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
+ SMC_MAX_PNETID_LEN);
+ smcd_dev->pnetid_by_user = true;
+ new_smcddev = true;
+ }
+ spin_unlock(&smcd_dev_list.lock);
+ }
+
+ if (!new_pnetelem->ndev)
+ return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+
+ /* check if (base) netdev already has a pnetid. If there is one, we do
+ * not want to add a pnet table entry
+ */
+ ndev = pnet_find_base_ndev(new_pnetelem->ndev);
+ if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+ ndev_pnetid))
+ return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+
+ /* add a new netdev entry to the pnet table if there isn't one */
+ tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
+ if (!tmp_pnetelem)
+ return -ENOMEM;
+ memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
+ SMC_MAX_PNETID_LEN);
+ tmp_pnetelem->ndev = new_pnetelem->ndev;
+
+ write_lock(&pnettable->lock);
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+ if (pnetelem->ndev == new_pnetelem->ndev)
+ new_netdev = false;
+ }
+ if (new_netdev) {
+ dev_hold(tmp_pnetelem->ndev);
+ list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
+ write_unlock(&pnettable->lock);
+ } else {
+ write_unlock(&pnettable->lock);
+ kfree(tmp_pnetelem);
+ }
+
+ return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
}
/* The limit for pnetid is 16 characters.
@@ -228,7 +293,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (!strncmp(ibdev->ibdev->name, ib_name,
- sizeof(ibdev->ibdev->name))) {
+ sizeof(ibdev->ibdev->name)) ||
+ !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+ IB_DEVICE_NAME_MAX - 1)) {
goto out;
}
}
@@ -238,10 +305,28 @@ out:
return ibdev;
}
+/* Find an smcd device by a given name. The device might not exist. */
+static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
+{
+ struct smcd_dev *smcd_dev;
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+ if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
+ IB_DEVICE_NAME_MAX - 1))
+ goto out;
+ }
+ smcd_dev = NULL;
+out:
+ spin_unlock(&smcd_dev_list.lock);
+ return smcd_dev;
+}
+
/* Parse the supplied netlink attributes and fill a pnetentry structure.
* For ethernet and infiniband device names verify that the devices exist.
*/
-static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
+static int smc_pnet_fill_entry(struct net *net,
+ struct smc_user_pnetentry *pnetelem,
struct nlattr *tb[])
{
char *string, *ibname;
@@ -258,30 +343,34 @@ static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
goto error;
rc = -EINVAL;
- if (!tb[SMC_PNETID_ETHNAME])
- goto error;
- rc = -ENOENT;
- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
- pnetelem->ndev = dev_get_by_name(net, string);
- if (!pnetelem->ndev)
- goto error;
+ if (tb[SMC_PNETID_ETHNAME]) {
+ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+ pnetelem->ndev = dev_get_by_name(net, string);
+ if (!pnetelem->ndev)
+ goto error;
+ }
- rc = -EINVAL;
- if (!tb[SMC_PNETID_IBNAME])
- goto error;
- rc = -ENOENT;
- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
- ibname = strim(ibname);
- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
- if (!pnetelem->smcibdev)
- goto error;
+ /* if this is not the initial namespace, stop here */
+ if (net != &init_net)
+ return 0;
rc = -EINVAL;
- if (!tb[SMC_PNETID_IBPORT])
- goto error;
- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
- if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
- goto error;
+ if (tb[SMC_PNETID_IBNAME]) {
+ ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+ ibname = strim(ibname);
+ pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+ pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
+ if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
+ goto error;
+ if (pnetelem->smcibdev) {
+ if (!tb[SMC_PNETID_IBPORT])
+ goto error;
+ pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+ if (pnetelem->ib_port < 1 ||
+ pnetelem->ib_port > SMC_MAX_PORTS)
+ goto error;
+ }
+ }
return 0;
@@ -292,79 +381,65 @@ error:
}
/* Convert an smc_pnetentry to a netlink attribute sequence */
-static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
+static int smc_pnet_set_nla(struct sk_buff *msg,
+ struct smc_user_pnetentry *pnetelem)
{
- if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
- nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
- nla_put_string(msg, SMC_PNETID_IBNAME,
- pnetelem->smcibdev->ibdev->name) ||
- nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
+ if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
return -1;
- return 0;
-}
-
-/* Retrieve one PNETID entry */
-static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct smc_pnetentry *pnetelem;
- struct sk_buff *msg;
- void *hdr;
- int rc;
-
- if (!info->attrs[SMC_PNETID_NAME])
- return -EINVAL;
- pnetelem = smc_pnet_find_pnetid(
- (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
- if (!pnetelem)
- return -ENOENT;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &smc_pnet_nl_family, 0, SMC_PNETID_GET);
- if (!hdr) {
- rc = -EMSGSIZE;
- goto err_out;
+ if (pnetelem->ndev) {
+ if (nla_put_string(msg, SMC_PNETID_ETHNAME,
+ pnetelem->ndev->name))
+ return -1;
+ } else {
+ if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
+ return -1;
}
-
- if (smc_pnet_set_nla(msg, pnetelem)) {
- rc = -ENOBUFS;
- goto err_out;
+ if (pnetelem->smcibdev) {
+ if (nla_put_string(msg, SMC_PNETID_IBNAME,
+ dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
+ nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
+ return -1;
+ } else if (pnetelem->smcd_dev) {
+ if (nla_put_string(msg, SMC_PNETID_IBNAME,
+ dev_name(&pnetelem->smcd_dev->dev)) ||
+ nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
+ return -1;
+ } else {
+ if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
+ nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
+ return -1;
}
- genlmsg_end(msg, hdr);
- return genlmsg_reply(msg, info);
-
-err_out:
- nlmsg_free(msg);
- return rc;
+ return 0;
}
static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct smc_pnetentry *pnetelem;
+ struct smc_user_pnetentry pnetelem;
+ struct smc_pnettable *pnettable;
+ struct smc_net *sn;
int rc;
- pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
- if (!pnetelem)
- return -ENOMEM;
- rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
if (!rc)
- rc = smc_pnet_enter(pnetelem);
- if (rc) {
- kfree(pnetelem);
- return rc;
- }
+ rc = smc_pnet_enter(pnettable, &pnetelem);
+ if (pnetelem.ndev)
+ dev_put(pnetelem.ndev);
return rc;
}
static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = genl_info_net(info);
+
if (!info->attrs[SMC_PNETID_NAME])
return -EINVAL;
- return smc_pnet_remove_by_pnetid(
+ return smc_pnet_remove_by_pnetid(net,
(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
}
@@ -376,7 +451,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb)
static int smc_pnet_dumpinfo(struct sk_buff *skb,
u32 portid, u32 seq, u32 flags,
- struct smc_pnetentry *pnetelem)
+ struct smc_user_pnetentry *pnetelem)
{
void *hdr;
@@ -392,42 +467,143 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb,
return 0;
}
-static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
+ u32 seq, u8 *pnetid, int start_idx)
{
+ struct smc_user_pnetentry tmp_entry;
+ struct smc_pnettable *pnettable;
struct smc_pnetentry *pnetelem;
+ struct smc_ib_device *ibdev;
+ struct smcd_dev *smcd_dev;
+ struct smc_net *sn;
int idx = 0;
+ int ibport;
- read_lock(&smc_pnettable.lock);
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
- if (idx++ < cb->args[0])
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ /* dump netdevices */
+ read_lock(&pnettable->lock);
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+ if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
+ continue;
+ if (idx++ < start_idx)
continue;
- if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- pnetelem)) {
+ memset(&tmp_entry, 0, sizeof(tmp_entry));
+ memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
+ SMC_MAX_PNETID_LEN);
+ tmp_entry.ndev = pnetelem->ndev;
+ if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+ &tmp_entry)) {
--idx;
break;
}
}
+ read_unlock(&pnettable->lock);
+
+ /* if this is not the initial namespace, stop here */
+ if (net != &init_net)
+ return idx;
+
+ /* dump ib devices */
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+ for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
+ if (ibdev->pnetid_by_user[ibport]) {
+ if (pnetid &&
+ !smc_pnet_match(ibdev->pnetid[ibport],
+ pnetid))
+ continue;
+ if (idx++ < start_idx)
+ continue;
+ memset(&tmp_entry, 0, sizeof(tmp_entry));
+ memcpy(&tmp_entry.pnet_name,
+ ibdev->pnetid[ibport],
+ SMC_MAX_PNETID_LEN);
+ tmp_entry.smcibdev = ibdev;
+ tmp_entry.ib_port = ibport + 1;
+ if (smc_pnet_dumpinfo(skb, portid, seq,
+ NLM_F_MULTI,
+ &tmp_entry)) {
+ --idx;
+ break;
+ }
+ }
+ }
+ }
+ spin_unlock(&smc_ib_devices.lock);
+
+ /* dump smcd devices */
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+ if (smcd_dev->pnetid_by_user) {
+ if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
+ continue;
+ if (idx++ < start_idx)
+ continue;
+ memset(&tmp_entry, 0, sizeof(tmp_entry));
+ memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
+ SMC_MAX_PNETID_LEN);
+ tmp_entry.smcd_dev = smcd_dev;
+ if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+ &tmp_entry)) {
+ --idx;
+ break;
+ }
+ }
+ }
+ spin_unlock(&smcd_dev_list.lock);
+
+ return idx;
+}
+
+static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int idx;
+
+ idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NULL, cb->args[0]);
+
cb->args[0] = idx;
- read_unlock(&smc_pnettable.lock);
return skb->len;
}
+/* Retrieve one PNETID entry */
+static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct sk_buff *msg;
+ void *hdr;
+
+ if (!info->attrs[SMC_PNETID_NAME])
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
+ nla_data(info->attrs[SMC_PNETID_NAME]), 0);
+
+ /* finish multi part message and send it */
+ hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
+ NLM_F_MULTI);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+ }
+ return genlmsg_reply(msg, info);
+}
+
/* Remove and delete all pnetids from pnet table.
*/
static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
{
- struct smc_pnetentry *pnetelem, *tmp_pe;
+ struct net *net = genl_info_net(info);
- write_lock(&smc_pnettable.lock);
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
- list) {
- list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
- kfree(pnetelem);
- }
- write_unlock(&smc_pnettable.lock);
- return 0;
+ return smc_pnet_remove_by_pnetid(net, NULL);
}
/* SMC_PNETID generic netlink operation definition */
@@ -491,6 +667,18 @@ static struct notifier_block smc_netdev_notifier = {
.notifier_call = smc_pnet_netdev_event
};
+/* init network namespace */
+int smc_pnet_net_init(struct net *net)
+{
+ struct smc_net *sn = net_generic(net, smc_net_id);
+ struct smc_pnettable *pnettable = &sn->pnettable;
+
+ INIT_LIST_HEAD(&pnettable->pnetlist);
+ rwlock_init(&pnettable->lock);
+
+ return 0;
+}
+
int __init smc_pnet_init(void)
{
int rc;
@@ -504,9 +692,15 @@ int __init smc_pnet_init(void)
return rc;
}
+/* exit network namespace */
+void smc_pnet_net_exit(struct net *net)
+{
+ /* flush pnet table */
+ smc_pnet_remove_by_pnetid(net, NULL);
+}
+
void smc_pnet_exit(void)
{
- smc_pnet_flush(NULL, NULL);
unregister_netdevice_notifier(&smc_netdev_notifier);
genl_unregister_family(&smc_pnet_nl_family);
}
@@ -534,6 +728,32 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
return ndev;
}
+static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
+ u8 *pnetid)
+{
+ struct smc_pnettable *pnettable;
+ struct net *net = dev_net(ndev);
+ struct smc_pnetentry *pnetelem;
+ struct smc_net *sn;
+ int rc = -ENOENT;
+
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ read_lock(&pnettable->lock);
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+ if (ndev == pnetelem->ndev) {
+ /* get pnetid of netdev device */
+ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
+ rc = 0;
+ break;
+ }
+ }
+ read_unlock(&pnettable->lock);
+ return rc;
+}
+
/* Determine the corresponding IB device port based on the hardware PNETID.
* Searching stops at the first matching active IB device port with vlan_id
* configured.
@@ -549,7 +769,8 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
- ndev_pnetid))
+ ndev_pnetid) &&
+ smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smc_ib_devices.lock);
@@ -557,8 +778,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
- if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
- SMC_MAX_PNETID_LEN) &&
+ if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
!smc_ib_determine_gid(ibdev, i, vlan_id, gid,
NULL)) {
@@ -580,12 +800,13 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
- ndev_pnetid))
+ ndev_pnetid) &&
+ smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
- if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
+ if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
*smcismdev = ismdev;
break;
}
@@ -593,31 +814,6 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_unlock(&smcd_dev_list.lock);
}
-/* Lookup of coupled ib_device via SMC pnet table */
-static void smc_pnet_find_roce_by_table(struct net_device *netdev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id,
- u8 gid[])
-{
- struct smc_pnetentry *pnetelem;
-
- read_lock(&smc_pnettable.lock);
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
- if (netdev == pnetelem->ndev) {
- if (smc_ib_port_active(pnetelem->smcibdev,
- pnetelem->ib_port) &&
- !smc_ib_determine_gid(pnetelem->smcibdev,
- pnetelem->ib_port, vlan_id,
- gid, NULL)) {
- *smcibdev = pnetelem->smcibdev;
- *ibport = pnetelem->ib_port;
- }
- break;
- }
- }
- read_unlock(&smc_pnettable.lock);
-}
-
/* PNET table analysis for a given sock:
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
@@ -636,13 +832,7 @@ void smc_pnet_find_roce_resource(struct sock *sk,
if (!dst->dev)
goto out_rel;
- /* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
- if (*smcibdev)
- goto out_rel;
-
- /* lookup via SMC PNET table */
- smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
out_rel:
dst_release(dst);
@@ -660,7 +850,6 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
if (!dst->dev)
goto out_rel;
- /* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
out_rel:
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 8ff777636e32..5eac42fb45d0 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -19,6 +19,16 @@
struct smc_ib_device;
struct smcd_dev;
+/**
+ * struct smc_pnettable - SMC PNET table anchor
+ * @lock: Lock for list action
+ * @pnetlist: List of PNETIDs
+ */
+struct smc_pnettable {
+ rwlock_t lock;
+ struct list_head pnetlist;
+};
+
static inline int smc_pnetid_by_dev_port(struct device *dev,
unsigned short port, u8 *pnetid)
{
@@ -30,8 +40,9 @@ static inline int smc_pnetid_by_dev_port(struct device *dev,
}
int smc_pnet_init(void) __init;
+int smc_pnet_net_init(struct net *net);
void smc_pnet_exit(void);
-int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
+void smc_pnet_net_exit(struct net *net);
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport,
unsigned short vlan_id, u8 gid[]);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index d8366ed51757..f0de323d15d6 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,10 +24,11 @@
#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
+#include "smc_close.h"
#include "smc_ism.h"
#include "smc_tx.h"
-#define SMC_TX_WORK_DELAY HZ
+#define SMC_TX_WORK_DELAY 0
#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
/***************************** sndbuf producer *******************************/
@@ -165,12 +166,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;
if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) {
+ if (send_done)
+ return send_done;
rc = smc_tx_wait(smc, msg->msg_flags);
- if (rc) {
- if (send_done)
- return send_done;
+ if (rc)
goto out_err;
- }
continue;
}
@@ -267,27 +267,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
- int num_sges, struct ib_sge sges[])
+ int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
- struct ib_rdma_wr rdma_wr;
struct smc_link *link;
int rc;
- memset(&rdma_wr, 0, sizeof(rdma_wr));
link = &lgr->lnk[SMC_SINGLE_LINK];
- rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
- rdma_wr.wr.sg_list = sges;
- rdma_wr.wr.num_sge = num_sges;
- rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
- rdma_wr.remote_addr =
+ rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
+ rdma_wr->wr.num_sge = num_sges;
+ rdma_wr->remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
- rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
- rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
+ rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+ rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
smc_lgr_terminate(lgr);
@@ -314,24 +310,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
/* SMC-R helper for smc_tx_rdma_writes() */
static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
- size_t dst_off, size_t dst_len)
+ size_t dst_off, size_t dst_len,
+ struct smc_rdma_wr *wr_rdma_buf)
{
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int src_len_sum = src_len, dst_len_sum = dst_len;
- struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
int sent_count = src_off;
int srcchunk, dstchunk;
int num_sges;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+ struct ib_sge *sge =
+ wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
- sges[srcchunk].addr = dma_addr + src_off;
- sges[srcchunk].length = src_len;
- sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+ sge[srcchunk].addr = dma_addr + src_off;
+ sge[srcchunk].length = src_len;
num_sges++;
src_off += src_len;
@@ -344,7 +341,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
- rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+ rc = smc_tx_rdma_write(conn, dst_off, num_sges,
+ &wr_rdma_buf->wr_tx_rdma[dstchunk]);
if (rc)
return rc;
if (dst_len_sum == len)
@@ -403,7 +401,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
* usable snd_wnd as max transmit
*/
-static int smc_tx_rdma_writes(struct smc_connection *conn)
+static int smc_tx_rdma_writes(struct smc_connection *conn,
+ struct smc_rdma_wr *wr_rdma_buf)
{
size_t len, src_len, dst_off, dst_len; /* current chunk values */
union smc_host_cursor sent, prep, prod, cons;
@@ -464,7 +463,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_off, dst_len);
else
rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
- dst_off, dst_len);
+ dst_off, dst_len, wr_rdma_buf);
if (rc)
return rc;
@@ -484,32 +483,31 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
*/
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
- struct smc_cdc_producer_flags *pflags;
+ struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+ struct smc_rdma_wr *wr_rdma_buf;
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
int rc;
- spin_lock_bh(&conn->send_lock);
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
container_of(conn, struct smc_sock, conn);
- if (smc->sk.sk_err == ECONNABORTED) {
- rc = sock_error(&smc->sk);
- goto out_unlock;
- }
+ if (smc->sk.sk_err == ECONNABORTED)
+ return sock_error(&smc->sk);
rc = 0;
if (conn->alert_token_local) /* connection healthy */
mod_delayed_work(system_wq, &conn->tx_work,
SMC_TX_WORK_DELAY);
}
- goto out_unlock;
+ return rc;
}
- if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
- rc = smc_tx_rdma_writes(conn);
+ spin_lock_bh(&conn->send_lock);
+ if (!pflags->urg_data_present) {
+ rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
(struct smc_wr_tx_pend_priv *)pend);
@@ -518,7 +516,6 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
- pflags = &conn->local_tx_ctrl.prod_flags;
if (!rc && pflags->urg_data_present) {
pflags->urg_data_pending = 0;
pflags->urg_data_present = 0;
@@ -536,7 +533,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
spin_lock_bh(&conn->send_lock);
if (!pflags->urg_data_present)
- rc = smc_tx_rdma_writes(conn);
+ rc = smc_tx_rdma_writes(conn, NULL);
if (!rc)
rc = smcd_cdc_msg_send(conn);
@@ -557,6 +554,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
else
rc = smcr_tx_sndbuf_nonempty(conn);
+ if (!rc) {
+ /* trigger socket release if connection is closing */
+ struct smc_sock *smc = container_of(conn, struct smc_sock,
+ conn);
+ smc_close_wake_tx_prepared(smc);
+ }
return rc;
}
@@ -598,7 +601,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
if (to_confirm > conn->rmbe_update_limit) {
smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
sender_free = conn->rmb_desc->len -
- smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+ smc_curs_diff_large(conn->rmb_desc->len,
+ &cfed, &prod);
}
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
@@ -612,9 +616,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
SMC_TX_WORK_DELAY);
return;
}
- smc_curs_copy(&conn->rx_curs_confirmed,
- &conn->local_tx_ctrl.cons, conn);
- conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
}
if (conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index c2694750a6a8..253aa75dc2b6 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
* @link: Pointer to smc_link used to later send the message.
* @handler: Send completion handler function pointer.
* @wr_buf: Out value returns pointer to message buffer.
+ * @wr_rdma_buf: Out value returns pointer to rdma work request.
* @wr_pend_priv: Out value returns pointer serving as handler context.
*
* Return: 0 on success, or -errno on error.
@@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
int smc_wr_tx_get_free_slot(struct smc_link *link,
smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
+ struct smc_rdma_wr **wr_rdma_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
@@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
wr_ib = &link->wr_tx_ibs[idx];
wr_ib->wr_id = wr_id;
*wr_buf = &link->wr_tx_bufs[idx];
+ if (wr_rdma_buf)
+ *wr_rdma_buf = &link->wr_tx_rdmas[idx];
*wr_pend_priv = &wr_pend->priv;
return 0;
}
@@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link,
u32 idx = pend->idx;
/* clear the full struct smc_wr_tx_pend including .priv */
- memset(&link->wr_tx_pends[pend->idx], 0,
- sizeof(link->wr_tx_pends[pend->idx]));
- memset(&link->wr_tx_bufs[pend->idx], 0,
- sizeof(link->wr_tx_bufs[pend->idx]));
+ memset(&link->wr_tx_pends[idx], 0,
+ sizeof(link->wr_tx_pends[idx]));
+ memset(&link->wr_tx_bufs[idx], 0,
+ sizeof(link->wr_tx_bufs[idx]));
test_and_clear_bit(idx, link->wr_tx_mask);
return 1;
}
@@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
+ lnk->roce_pd->local_dma_lkey;
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
+ lnk->roce_pd->local_dma_lkey;
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
+ lnk->roce_pd->local_dma_lkey;
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
+ lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_ibs[i].next = NULL;
lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
lnk->wr_tx_ibs[i].num_sge = 1;
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
+ lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
+ lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
+ lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
+ lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
}
for (i = 0; i < lnk->wr_rx_cnt; i++) {
lnk->wr_rx_sges[i].addr =
@@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_tx_mask = NULL;
kfree(lnk->wr_tx_sges);
lnk->wr_tx_sges = NULL;
+ kfree(lnk->wr_tx_rdma_sges);
+ lnk->wr_tx_rdma_sges = NULL;
kfree(lnk->wr_rx_sges);
lnk->wr_rx_sges = NULL;
+ kfree(lnk->wr_tx_rdmas);
+ lnk->wr_tx_rdmas = NULL;
kfree(lnk->wr_rx_ibs);
lnk->wr_rx_ibs = NULL;
kfree(lnk->wr_tx_ibs);
@@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_rx_ibs)
goto no_mem_wr_tx_ibs;
+ link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
+ sizeof(link->wr_tx_rdmas[0]),
+ GFP_KERNEL);
+ if (!link->wr_tx_rdmas)
+ goto no_mem_wr_rx_ibs;
+ link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
+ sizeof(link->wr_tx_rdma_sges[0]),
+ GFP_KERNEL);
+ if (!link->wr_tx_rdma_sges)
+ goto no_mem_wr_tx_rdmas;
link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
GFP_KERNEL);
if (!link->wr_tx_sges)
- goto no_mem_wr_rx_ibs;
+ goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
sizeof(link->wr_rx_sges[0]),
GFP_KERNEL);
@@ -579,6 +611,10 @@ no_mem_wr_rx_sges:
kfree(link->wr_rx_sges);
no_mem_wr_tx_sges:
kfree(link->wr_tx_sges);
+no_mem_wr_tx_rdma_sges:
+ kfree(link->wr_tx_rdma_sges);
+no_mem_wr_tx_rdmas:
+ kfree(link->wr_tx_rdmas);
no_mem_wr_rx_ibs:
kfree(link->wr_rx_ibs);
no_mem_wr_tx_ibs:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 1d85bb14fd6f..09bf32fd3959 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev);
int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
+ struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
diff --git a/net/socket.c b/net/socket.c
index d51930689b98..643a1648fcc2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -963,8 +963,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
EXPORT_SYMBOL(dlci_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
- unsigned int cmd, unsigned long arg,
- unsigned int ifreq_size)
+ unsigned int cmd, unsigned long arg)
{
int err;
void __user *argp = (void __user *)arg;
@@ -990,11 +989,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
} else {
struct ifreq ifr;
bool need_copyout;
- if (copy_from_user(&ifr, argp, ifreq_size))
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
return -EFAULT;
err = dev_ioctl(net, cmd, &ifr, &need_copyout);
if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, ifreq_size))
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
return -EFAULT;
}
return err;
@@ -1093,8 +1092,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
err = open_related_ns(&net->ns, get_net_ns);
break;
default:
- err = sock_do_ioctl(net, sock, cmd, arg,
- sizeof(struct ifreq));
+ err = sock_do_ioctl(net, sock, cmd, arg);
break;
}
return err;
@@ -2802,8 +2800,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
int err;
set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
- sizeof(struct compat_ifreq));
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
set_fs(old_fs);
if (!err)
err = compat_put_timeval(&ktv, up);
@@ -2819,8 +2816,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
int err;
set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
- sizeof(struct compat_ifreq));
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
set_fs(old_fs);
if (!err)
err = compat_put_timespec(&kts, up);
@@ -3016,6 +3012,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
return dev_ioctl(net, cmd, &ifreq, NULL);
}
+static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
+ unsigned int cmd,
+ struct compat_ifreq __user *uifr32)
+{
+ struct ifreq __user *uifr;
+ int err;
+
+ /* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+
+ uifr = compat_alloc_user_space(sizeof(*uifr));
+ if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
+ return -EFAULT;
+
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
+
+ if (!err) {
+ switch (cmd) {
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFMEM:
+ case SIOCGIFHWADDR:
+ case SIOCGIFINDEX:
+ case SIOCGIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCGIFDSTADDR:
+ case SIOCGIFNETMASK:
+ case SIOCGIFPFLAGS:
+ case SIOCGIFTXQLEN:
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCGIFNAME:
+ if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
+ err = -EFAULT;
+ break;
+ }
+ }
+ return err;
+}
+
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
struct compat_ifreq __user *uifr32)
{
@@ -3131,8 +3175,7 @@ static int routing_ioctl(struct net *net, struct socket *sock,
}
set_fs(KERNEL_DS);
- ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
- sizeof(struct compat_ifreq));
+ ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
set_fs(old_fs);
out:
@@ -3232,21 +3275,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFTXQLEN:
case SIOCBRADDIF:
case SIOCBRDELIF:
+ case SIOCGIFNAME:
case SIOCSIFNAME:
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCSARP:
- case SIOCGARP:
- case SIOCDARP:
- case SIOCATMARK:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
- case SIOCGIFNAME:
- return sock_do_ioctl(net, sock, cmd, arg,
- sizeof(struct compat_ifreq));
+ return compat_ifreq_ioctl(net, sock, cmd, argp);
+
+ case SIOCSARP:
+ case SIOCGARP:
+ case SIOCDARP:
+ case SIOCATMARK:
+ return sock_do_ioctl(net, sock, cmd, arg);
}
return -ENOIOCTLCMD;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index fb6656295204..507105127095 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
struct crypto_sync_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -52,6 +52,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
if (IS_ERR(cipher))
return PTR_ERR(cipher);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
@@ -67,6 +71,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
+ kfree(plain);
crypto_free_sync_skcipher(cipher);
return code;
}
@@ -77,12 +82,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
- unsigned char plain[8];
+ unsigned char *plain;
+ s32 code;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_make_rc4_seq_num(kctx, direction, seqnum,
cksum, buf);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) (seqnum & 0xff);
plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -93,7 +103,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
plain[6] = direction;
plain[7] = direction;
- return krb5_encrypt(key, cksum, plain, buf, 8);
+ code = krb5_encrypt(key, cksum, plain, buf, 8);
+ kfree(plain);
+ return code;
}
static s32
@@ -101,7 +113,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
struct crypto_sync_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -113,20 +125,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
if (code)
goto out;
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain) {
+ code = -ENOMEM;
+ goto out;
+ }
+
code = krb5_decrypt(cipher, cksum, buf, plain, 8);
if (code)
- goto out;
+ goto out_plain;
if ((plain[4] != plain[5]) || (plain[4] != plain[6])
|| (plain[4] != plain[7])) {
code = (s32)KG_BAD_SEQ;
- goto out;
+ goto out_plain;
}
*direction = plain[4];
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
+out_plain:
+ kfree(plain);
out:
crypto_free_sync_skcipher(cipher);
return code;
@@ -139,7 +159,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
int *direction, u32 *seqnum)
{
s32 code;
- unsigned char plain[8];
+ unsigned char *plain;
struct crypto_sync_skcipher *key = kctx->seq;
dprintk("RPC: krb5_get_seq_num:\n");
@@ -147,18 +167,25 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_get_rc4_seq_num(kctx, cksum, buf,
direction, seqnum);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- return code;
+ goto out;
if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7]))
- return (s32)KG_BAD_SEQ;
+ (plain[4] != plain[7])) {
+ code = (s32)KG_BAD_SEQ;
+ goto out;
+ }
*direction = plain[4];
*seqnum = ((plain[0]) |
(plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
- return 0;
+out:
+ kfree(plain);
+ return code;
}
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a033329cd4..19bb356230ed 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -146,7 +146,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
/* no "debugfs" dentry? Don't bother with the symlink. */
- if (!xprt->debugfs) {
+ if (IS_ERR_OR_NULL(xprt->debugfs)) {
rcu_read_unlock();
return;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index cf51b8f9b15f..1f200119268c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
DMA_TO_DEVICE);
}
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+ struct xdr_buf *xdr,
+ __be32 *wr_lst)
+{
+ int elements;
+
+ /* xdr->head */
+ elements = 1;
+
+ /* xdr->pages */
+ if (!wr_lst) {
+ unsigned int remaining;
+ unsigned long pageoff;
+
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ ++elements;
+ remaining -= min_t(u32, PAGE_SIZE - pageoff,
+ remaining);
+ pageoff = 0;
+ }
+ }
+
+ /* xdr->tail */
+ if (xdr->tail[0].iov_len)
+ ++elements;
+
+ /* assume 1 SGE is needed for the transport header */
+ return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
+{
+ unsigned char *dst, *tailbase;
+ unsigned int taillen;
+
+ dst = ctxt->sc_xprt_buf;
+ dst += ctxt->sc_sges[0].length;
+
+ memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+ dst += xdr->head[0].iov_len;
+
+ tailbase = xdr->tail[0].iov_base;
+ taillen = xdr->tail[0].iov_len;
+ if (wr_lst) {
+ u32 xdrpad;
+
+ xdrpad = xdr_padsize(xdr->page_len);
+ if (taillen && xdrpad) {
+ tailbase += xdrpad;
+ taillen -= xdrpad;
+ }
+ } else {
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+ memcpy(dst, page_address(*ppages), len);
+ remaining -= len;
+ dst += len;
+ pageoff = 0;
+ }
+ }
+
+ if (taillen)
+ memcpy(dst, tailbase, taillen);
+
+ ctxt->sc_sges[0].length += xdr->len;
+ ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr,
+ ctxt->sc_sges[0].length,
+ DMA_TO_DEVICE);
+
+ return 0;
+}
+
/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
* @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR
@@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad;
int ret;
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+ return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt,
xdr->head[0].iov_base,
xdr->head[0].iov_len);
@@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining);
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
page_off, len);
if (ret < 0)
@@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len;
tail:
if (len) {
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 924c17d46903..57f86c63a463 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
- newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
- if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
- pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
- newxprt->sc_max_send_sges);
- goto errout;
- }
+ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4994e75945b8..21113bfd4eca 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -527,7 +527,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 1, IB_POLL_WORKQUEUE);
+ ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
goto out1;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index cd78253de31d..7e1357db33d7 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -592,26 +592,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
-bool switchdev_port_same_parent_id(struct net_device *a,
- struct net_device *b)
-{
- struct switchdev_attr a_attr = {
- .orig_dev = a,
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- };
- struct switchdev_attr b_attr = {
- .orig_dev = b,
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- };
-
- if (switchdev_port_attr_get(a, &a_attr) ||
- switchdev_port_attr_get(b, &b_attr))
- return false;
-
- return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ac306d17f8ad..341ecd796aa4 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
default:
pr_warn("Dropping received illegal msg type\n");
kfree_skb(skb);
- return false;
+ return true;
};
}
@@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
+ if (mtyp == ACTIVATE_MSG) {
+ msg_set_dest_session_valid(hdr, 1);
+ msg_set_dest_session(hdr, l->peer_session);
+ }
msg_set_max_pkt(hdr, l->advertised_mtu);
strcpy(data, l->if_name);
msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
@@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
break;
}
+
+ /* If this endpoint was re-created while peer was ESTABLISHING
+ * it doesn't know current session number. Force re-synch.
+ */
+ if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) &&
+ l->session != msg_dest_session(hdr)) {
+ if (less(l->session, msg_dest_session(hdr)))
+ l->session = msg_dest_session(hdr) + 1;
+ break;
+ }
+
/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
if (mtyp == RESET_MSG || !link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a0924956bb61..d7e4b8b93f9d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
msg_set_bits(m, 1, 0, 0xffff, n);
}
+/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch
+ * link peer session number
+ */
+static inline bool msg_dest_session_valid(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 16, 0x1);
+}
+
+static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid)
+{
+ msg_set_bits(m, 1, 16, 0x1, valid);
+}
+
+static inline u16 msg_dest_session(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 0, 0xffff);
+}
+
+static inline void msg_set_dest_session(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 1, 0, 0xffff, n);
+}
/*
* Word 2
diff --git a/net/tipc/node.c b/net/tipc/node.c
index db2a6c3e0be9..2dc4919ab23c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_node_write_lock(n);
if (!tipc_link_is_establishing(l)) {
__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
- if (delete) {
- kfree(l);
- le->link = NULL;
- n->link_cnt--;
- }
} else {
/* Defuse pending tipc_node_link_up() */
+ tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
+ if (delete) {
+ kfree(l);
+ le->link = NULL;
+ n->link_cnt--;
+ }
trace_tipc_node_link_down(n, true, "node link down or deleted!");
tipc_node_write_unlock(n);
if (delete)
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7ee9008b2187..a5c17c47d08a 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -247,6 +247,7 @@ static int tls_push_record(struct sock *sk,
int flags,
unsigned char record_type)
{
+ struct tls_prot_info *prot = &ctx->prot_info;
struct tcp_sock *tp = tcp_sk(sk);
struct page_frag dummy_tag_frag;
skb_frag_t *frag;
@@ -256,7 +257,7 @@ static int tls_push_record(struct sock *sk,
frag = &record->frags[0];
tls_fill_prepend(ctx,
skb_frag_address(frag),
- record->len - ctx->tx.prepend_size,
+ record->len - prot->prepend_size,
record_type,
ctx->crypto_send.info.version);
@@ -264,7 +265,7 @@ static int tls_push_record(struct sock *sk,
dummy_tag_frag.page = skb_frag_page(frag);
dummy_tag_frag.offset = 0;
- tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size);
+ tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
record->end_seq = tp->write_seq + record->len;
spin_lock_irq(&offload_ctx->lock);
list_add_tail(&record->list, &offload_ctx->records_list);
@@ -347,6 +348,7 @@ static int tls_push_data(struct sock *sk,
unsigned char record_type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
@@ -376,10 +378,10 @@ static int tls_push_data(struct sock *sk,
* we need to leave room for an authentication tag.
*/
max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
- tls_ctx->tx.prepend_size;
+ prot->prepend_size;
do {
rc = tls_do_allocation(sk, ctx, pfrag,
- tls_ctx->tx.prepend_size);
+ prot->prepend_size);
if (rc) {
rc = sk_stream_wait_memory(sk, &timeo);
if (!rc)
@@ -397,7 +399,7 @@ handle_error:
size = orig_size;
destroy_record(record);
ctx->open_record = NULL;
- } else if (record->len > tls_ctx->tx.prepend_size) {
+ } else if (record->len > prot->prepend_size) {
goto last_record;
}
@@ -658,6 +660,8 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
{
u16 nonce_size, tag_size, iv_size, rec_seq_size;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_record_info *start_marker_record;
struct tls_offload_context_tx *offload_ctx;
struct tls_crypto_info *crypto_info;
@@ -703,10 +707,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
goto free_offload_ctx;
}
- ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
- ctx->tx.tag_size = tag_size;
- ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
- ctx->tx.iv_size = iv_size;
+ prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+ prot->tag_size = tag_size;
+ prot->overhead_size = prot->prepend_size + prot->tag_size;
+ prot->iv_size = iv_size;
ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
GFP_KERNEL);
if (!ctx->tx.iv) {
@@ -716,7 +720,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
- ctx->tx.rec_seq_size = rec_seq_size;
+ prot->rec_seq_size = rec_seq_size;
ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
if (!ctx->tx.rec_seq) {
rc = -ENOMEM;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d1c2fd9a3f63..caff15b2f9b2 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -435,6 +435,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
unsigned int optlen, int tx)
{
struct tls_crypto_info *crypto_info;
+ struct tls_crypto_info *alt_crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
size_t optsize;
int rc = 0;
@@ -445,10 +446,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto out;
}
- if (tx)
+ if (tx) {
crypto_info = &ctx->crypto_send.info;
- else
+ alt_crypto_info = &ctx->crypto_recv.info;
+ } else {
crypto_info = &ctx->crypto_recv.info;
+ alt_crypto_info = &ctx->crypto_send.info;
+ }
/* Currently we don't support set crypto info more than one time */
if (TLS_CRYPTO_INFO_READY(crypto_info)) {
@@ -469,6 +473,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto err_crypto_info;
}
+ /* Ensure that TLS version and ciphers are same in both directions */
+ if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) {
+ if (alt_crypto_info->version != crypto_info->version ||
+ alt_crypto_info->cipher_type != crypto_info->cipher_type) {
+ rc = -EINVAL;
+ goto err_crypto_info;
+ }
+ }
+
switch (crypto_info->cipher_type) {
case TLS_CIPHER_AES_GCM_128:
case TLS_CIPHER_AES_GCM_256: {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8051a9164139..71be8acfbc9b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -127,7 +127,7 @@ static int padding_length(struct tls_sw_context_rx *ctx,
int sub = 0;
/* Determine zero-padding length */
- if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) {
+ if (tls_ctx->prot_info.version == TLS_1_3_VERSION) {
char content_type = 0;
int err;
int back = 17;
@@ -155,6 +155,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_context *tls_ctx;
+ struct tls_prot_info *prot;
struct scatterlist *sg;
struct sk_buff *skb;
unsigned int pages;
@@ -163,6 +164,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
skb = (struct sk_buff *)req->data;
tls_ctx = tls_get_ctx(skb->sk);
ctx = tls_sw_ctx_rx(tls_ctx);
+ prot = &tls_ctx->prot_info;
/* Propagate if there was an err */
if (err) {
@@ -171,8 +173,8 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
} else {
struct strp_msg *rxm = strp_msg(skb);
rxm->full_len -= padding_length(ctx, tls_ctx, skb);
- rxm->offset += tls_ctx->rx.prepend_size;
- rxm->full_len -= tls_ctx->rx.overhead_size;
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
}
/* After using skb->sk to propagate sk through crypto async callback
@@ -209,13 +211,14 @@ static int tls_do_decryption(struct sock *sk,
bool async)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
int ret;
aead_request_set_tfm(aead_req, ctx->aead_recv);
- aead_request_set_ad(aead_req, tls_ctx->rx.aad_size);
+ aead_request_set_ad(aead_req, prot->aad_size);
aead_request_set_crypt(aead_req, sgin, sgout,
- data_len + tls_ctx->rx.tag_size,
+ data_len + prot->tag_size,
(u8 *)iv_recv);
if (async) {
@@ -253,12 +256,13 @@ static int tls_do_decryption(struct sock *sk,
static void tls_trim_both_msgs(struct sock *sk, int target_size)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec = ctx->open_rec;
sk_msg_trim(sk, &rec->msg_plaintext, target_size);
if (target_size > 0)
- target_size += tls_ctx->tx.overhead_size;
+ target_size += prot->overhead_size;
sk_msg_trim(sk, &rec->msg_encrypted, target_size);
}
@@ -275,6 +279,7 @@ static int tls_alloc_encrypted_msg(struct sock *sk, int len)
static int tls_clone_plaintext_msg(struct sock *sk, int required)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec = ctx->open_rec;
struct sk_msg *msg_pl = &rec->msg_plaintext;
@@ -290,7 +295,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required)
/* Skip initial bytes in msg_en's data to be able to use
* same offset of both plain and encrypted data.
*/
- skip = tls_ctx->tx.prepend_size + msg_pl->sg.size;
+ skip = prot->prepend_size + msg_pl->sg.size;
return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
}
@@ -298,6 +303,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required)
static struct tls_rec *tls_get_rec(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct sk_msg *msg_pl, *msg_en;
struct tls_rec *rec;
@@ -316,13 +322,11 @@ static struct tls_rec *tls_get_rec(struct sock *sk)
sk_msg_init(msg_en);
sg_init_table(rec->sg_aead_in, 2);
- sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
- tls_ctx->tx.aad_size);
+ sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size);
sg_unmark_end(&rec->sg_aead_in[1]);
sg_init_table(rec->sg_aead_out, 2);
- sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
- tls_ctx->tx.aad_size);
+ sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size);
sg_unmark_end(&rec->sg_aead_out[1]);
return rec;
@@ -411,6 +415,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct aead_request *aead_req = (struct aead_request *)req;
struct sock *sk = req->data;
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct scatterlist *sge;
struct sk_msg *msg_en;
@@ -422,8 +427,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
msg_en = &rec->msg_encrypted;
sge = sk_msg_elem(msg_en, msg_en->sg.curr);
- sge->offset -= tls_ctx->tx.prepend_size;
- sge->length += tls_ctx->tx.prepend_size;
+ sge->offset -= prot->prepend_size;
+ sge->length += prot->prepend_size;
/* Check if error is previously set on socket */
if (err || sk->sk_err) {
@@ -470,22 +475,23 @@ static int tls_do_encryption(struct sock *sk,
struct aead_request *aead_req,
size_t data_len, u32 start)
{
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_rec *rec = ctx->open_rec;
struct sk_msg *msg_en = &rec->msg_encrypted;
struct scatterlist *sge = sk_msg_elem(msg_en, start);
int rc;
memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data));
- xor_iv_with_seq(tls_ctx->crypto_send.info.version, rec->iv_data,
+ xor_iv_with_seq(prot->version, rec->iv_data,
tls_ctx->tx.rec_seq);
- sge->offset += tls_ctx->tx.prepend_size;
- sge->length -= tls_ctx->tx.prepend_size;
+ sge->offset += prot->prepend_size;
+ sge->length -= prot->prepend_size;
msg_en->sg.curr = start;
aead_request_set_tfm(aead_req, ctx->aead_send);
- aead_request_set_ad(aead_req, tls_ctx->tx.aad_size);
+ aead_request_set_ad(aead_req, prot->aad_size);
aead_request_set_crypt(aead_req, rec->sg_aead_in,
rec->sg_aead_out,
data_len, rec->iv_data);
@@ -500,8 +506,8 @@ static int tls_do_encryption(struct sock *sk,
rc = crypto_aead_encrypt(aead_req);
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
- sge->offset -= tls_ctx->tx.prepend_size;
- sge->length += tls_ctx->tx.prepend_size;
+ sge->offset -= prot->prepend_size;
+ sge->length += prot->prepend_size;
}
if (!rc) {
@@ -513,8 +519,7 @@ static int tls_do_encryption(struct sock *sk,
/* Unhook the record from context if encryption is not failure */
ctx->open_rec = NULL;
- tls_advance_record_sn(sk, &tls_ctx->tx,
- tls_ctx->crypto_send.info.version);
+ tls_advance_record_sn(sk, &tls_ctx->tx, prot->version);
return rc;
}
@@ -640,6 +645,7 @@ static int tls_push_record(struct sock *sk, int flags,
unsigned char record_type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
u32 i, split_point, uninitialized_var(orig_end);
@@ -658,12 +664,12 @@ static int tls_push_record(struct sock *sk, int flags,
split = split_point && split_point < msg_pl->sg.size;
if (split) {
rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
- split_point, tls_ctx->tx.overhead_size,
+ split_point, prot->overhead_size,
&orig_end);
if (rc < 0)
return rc;
sk_msg_trim(sk, msg_en, msg_pl->sg.size +
- tls_ctx->tx.overhead_size);
+ prot->overhead_size);
}
rec->tx_flags = flags;
@@ -673,7 +679,7 @@ static int tls_push_record(struct sock *sk, int flags,
sk_msg_iter_var_prev(i);
rec->content_type = record_type;
- if (tls_ctx->crypto_send.info.version == TLS_1_3_VERSION) {
+ if (prot->version == TLS_1_3_VERSION) {
/* Add content type to end of message. No padding added */
sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
sg_mark_end(&rec->sg_content_type);
@@ -694,22 +700,20 @@ static int tls_push_record(struct sock *sk, int flags,
i = msg_en->sg.start;
sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
- tls_make_aad(rec->aad_space, msg_pl->sg.size + tls_ctx->tx.tail_size,
- tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
- record_type,
- tls_ctx->crypto_send.info.version);
+ tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size,
+ tls_ctx->tx.rec_seq, prot->rec_seq_size,
+ record_type, prot->version);
tls_fill_prepend(tls_ctx,
page_address(sg_page(&msg_en->sg.data[i])) +
msg_en->sg.data[i].offset,
- msg_pl->sg.size + tls_ctx->tx.tail_size,
- record_type,
- tls_ctx->crypto_send.info.version);
+ msg_pl->sg.size + prot->tail_size,
+ record_type, prot->version);
tls_ctx->pending_open_record_frags = false;
rc = tls_do_encryption(sk, tls_ctx, ctx, req,
- msg_pl->sg.size + tls_ctx->tx.tail_size, i);
+ msg_pl->sg.size + prot->tail_size, i);
if (rc < 0) {
if (rc != -EINPROGRESS) {
tls_err_abort(sk, EBADMSG);
@@ -723,8 +727,7 @@ static int tls_push_record(struct sock *sk, int flags,
} else if (split) {
msg_pl = &tmp->msg_plaintext;
msg_en = &tmp->msg_encrypted;
- sk_msg_trim(sk, msg_en, msg_pl->sg.size +
- tls_ctx->tx.overhead_size);
+ sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
tls_ctx->pending_open_record_frags = true;
ctx->open_rec = tmp;
}
@@ -859,6 +862,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
bool async_capable = ctx->async_capable;
unsigned char record_type = TLS_RECORD_TYPE_DATA;
@@ -925,7 +929,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
required_size = msg_pl->sg.size + try_to_copy +
- tls_ctx->tx.overhead_size;
+ prot->overhead_size;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -994,8 +998,8 @@ fallback_to_reg_send:
*/
try_to_copy -= required_size - msg_pl->sg.size;
full_record = true;
- sk_msg_trim(sk, msg_en, msg_pl->sg.size +
- tls_ctx->tx.overhead_size);
+ sk_msg_trim(sk, msg_en,
+ msg_pl->sg.size + prot->overhead_size);
}
if (try_to_copy) {
@@ -1081,6 +1085,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct sk_msg *msg_pl;
struct tls_rec *rec;
@@ -1130,8 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
full_record = true;
}
- required_size = msg_pl->sg.size + copy +
- tls_ctx->tx.overhead_size;
+ required_size = msg_pl->sg.size + copy + prot->overhead_size;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -1330,6 +1334,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm = strp_msg(skb);
int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
struct aead_request *aead_req;
@@ -1337,16 +1342,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
u8 *aad, *iv, *mem = NULL;
struct scatterlist *sgin = NULL;
struct scatterlist *sgout = NULL;
- const int data_len = rxm->full_len - tls_ctx->rx.overhead_size +
- tls_ctx->rx.tail_size;
+ const int data_len = rxm->full_len - prot->overhead_size +
+ prot->tail_size;
if (*zc && (out_iov || out_sg)) {
if (out_iov)
n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
else
n_sgout = sg_nents(out_sg);
- n_sgin = skb_nsg(skb, rxm->offset + tls_ctx->rx.prepend_size,
- rxm->full_len - tls_ctx->rx.prepend_size);
+ n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
+ rxm->full_len - prot->prepend_size);
} else {
n_sgout = 0;
*zc = false;
@@ -1363,7 +1368,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
mem_size = aead_size + (nsg * sizeof(struct scatterlist));
- mem_size = mem_size + tls_ctx->rx.aad_size;
+ mem_size = mem_size + prot->aad_size;
mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
/* Allocate a single block of memory which contains
@@ -1379,37 +1384,35 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
sgin = (struct scatterlist *)(mem + aead_size);
sgout = sgin + n_sgin;
aad = (u8 *)(sgout + n_sgout);
- iv = aad + tls_ctx->rx.aad_size;
+ iv = aad + prot->aad_size;
/* Prepare IV */
err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- tls_ctx->rx.iv_size);
+ prot->iv_size);
if (err < 0) {
kfree(mem);
return err;
}
- if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION)
+ if (prot->version == TLS_1_3_VERSION)
memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv));
else
memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- xor_iv_with_seq(tls_ctx->crypto_recv.info.version, iv,
- tls_ctx->rx.rec_seq);
+ xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq);
/* Prepare AAD */
- tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size +
- tls_ctx->rx.tail_size,
- tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size,
- ctx->control,
- tls_ctx->crypto_recv.info.version);
+ tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+ prot->tail_size,
+ tls_ctx->rx.rec_seq, prot->rec_seq_size,
+ ctx->control, prot->version);
/* Prepare sgin */
sg_init_table(sgin, n_sgin);
- sg_set_buf(&sgin[0], aad, tls_ctx->rx.aad_size);
+ sg_set_buf(&sgin[0], aad, prot->aad_size);
err = skb_to_sgvec(skb, &sgin[1],
- rxm->offset + tls_ctx->rx.prepend_size,
- rxm->full_len - tls_ctx->rx.prepend_size);
+ rxm->offset + prot->prepend_size,
+ rxm->full_len - prot->prepend_size);
if (err < 0) {
kfree(mem);
return err;
@@ -1418,7 +1421,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
if (n_sgout) {
if (out_iov) {
sg_init_table(sgout, n_sgout);
- sg_set_buf(&sgout[0], aad, tls_ctx->rx.aad_size);
+ sg_set_buf(&sgout[0], aad, prot->aad_size);
*chunk = 0;
err = tls_setup_from_iter(sk, out_iov, data_len,
@@ -1459,7 +1462,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- int version = tls_ctx->crypto_recv.info.version;
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ int version = prot->version;
struct strp_msg *rxm = strp_msg(skb);
int err = 0;
@@ -1480,8 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
rxm->full_len -= padding_length(ctx, tls_ctx, skb);
- rxm->offset += tls_ctx->rx.prepend_size;
- rxm->full_len -= tls_ctx->rx.overhead_size;
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
tls_advance_record_sn(sk, &tls_ctx->rx, version);
ctx->decrypted = true;
ctx->saved_data_ready(sk);
@@ -1605,6 +1609,7 @@ int tls_sw_recvmsg(struct sock *sk,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct sk_psock *psock;
unsigned char control = 0;
ssize_t decrypted = 0;
@@ -1645,10 +1650,10 @@ int tls_sw_recvmsg(struct sock *sk,
do {
bool retain_skb = false;
- bool async = false;
bool zc = false;
int to_decrypt;
int chunk = 0;
+ bool async;
skb = tls_wait_data(sk, psock, flags, timeo, &err);
if (!skb) {
@@ -1667,25 +1672,28 @@ int tls_sw_recvmsg(struct sock *sk,
rxm = strp_msg(skb);
- to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size;
+ to_decrypt = rxm->full_len - prot->overhead_size;
if (to_decrypt <= len && !is_kvec && !is_peek &&
ctx->control == TLS_RECORD_TYPE_DATA &&
- tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
+ prot->version != TLS_1_3_VERSION)
zc = true;
+ /* Do not use async mode if record is non-data */
+ if (ctx->control == TLS_RECORD_TYPE_DATA)
+ async = ctx->async_capable;
+ else
+ async = false;
+
err = decrypt_skb_update(sk, skb, &msg->msg_iter,
- &chunk, &zc, ctx->async_capable);
+ &chunk, &zc, async);
if (err < 0 && err != -EINPROGRESS) {
tls_err_abort(sk, EBADMSG);
goto recv_end;
}
- if (err == -EINPROGRESS) {
- async = true;
+ if (err == -EINPROGRESS)
num_async++;
- goto pick_next_record;
- }
if (!cmsg) {
int cerr;
@@ -1704,6 +1712,9 @@ int tls_sw_recvmsg(struct sock *sk,
goto recv_end;
}
+ if (async)
+ goto pick_next_record;
+
if (!zc) {
if (rxm->full_len > len) {
retain_skb = true;
@@ -1869,6 +1880,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
struct strp_msg *rxm = strp_msg(skb);
size_t cipher_overhead;
@@ -1876,17 +1888,17 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
int ret;
/* Verify that we have a full TLS header, or wait for more data */
- if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+ if (rxm->offset + prot->prepend_size > skb->len)
return 0;
/* Sanity-check size of on-stack buffer. */
- if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) {
+ if (WARN_ON(prot->prepend_size > sizeof(header))) {
ret = -EINVAL;
goto read_failure;
}
/* Linearize header to local buffer */
- ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+ ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
if (ret < 0)
goto read_failure;
@@ -1895,12 +1907,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
data_len = ((header[4] & 0xFF) | (header[3] << 8));
- cipher_overhead = tls_ctx->rx.tag_size;
- if (tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
- cipher_overhead += tls_ctx->rx.iv_size;
+ cipher_overhead = prot->tag_size;
+ if (prot->version != TLS_1_3_VERSION)
+ cipher_overhead += prot->iv_size;
if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead +
- tls_ctx->rx.tail_size) {
+ prot->tail_size) {
ret = -EMSGSIZE;
goto read_failure;
}
@@ -2060,6 +2072,8 @@ static void tx_work_handler(struct work_struct *work)
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_crypto_info *crypto_info;
struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
@@ -2165,18 +2179,20 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (crypto_info->version == TLS_1_3_VERSION) {
nonce_size = 0;
- cctx->aad_size = TLS_HEADER_SIZE;
- cctx->tail_size = 1;
+ prot->aad_size = TLS_HEADER_SIZE;
+ prot->tail_size = 1;
} else {
- cctx->aad_size = TLS_AAD_SPACE_SIZE;
- cctx->tail_size = 0;
+ prot->aad_size = TLS_AAD_SPACE_SIZE;
+ prot->tail_size = 0;
}
- cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
- cctx->tag_size = tag_size;
- cctx->overhead_size = cctx->prepend_size + cctx->tag_size +
- cctx->tail_size;
- cctx->iv_size = iv_size;
+ prot->version = crypto_info->version;
+ prot->cipher_type = crypto_info->cipher_type;
+ prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+ prot->tag_size = tag_size;
+ prot->overhead_size = prot->prepend_size +
+ prot->tag_size + prot->tail_size;
+ prot->iv_size = iv_size;
cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
GFP_KERNEL);
if (!cctx->iv) {
@@ -2186,7 +2202,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
/* Note: 128 & 256 bit salt are the same size */
memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
- cctx->rec_seq_size = rec_seq_size;
+ prot->rec_seq_size = rec_seq_size;
cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
if (!cctx->rec_seq) {
rc = -ENOMEM;
@@ -2209,14 +2225,18 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (rc)
goto free_aead;
- rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+ rc = crypto_aead_setauthsize(*aead, prot->tag_size);
if (rc)
goto free_aead;
if (sw_ctx_rx) {
tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
- sw_ctx_rx->async_capable =
- tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+
+ if (crypto_info->version == TLS_1_3_VERSION)
+ sw_ctx_rx->async_capable = false;
+ else
+ sw_ctx_rx->async_capable =
+ tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
/* Set up strparser */
memset(&cb, 0, sizeof(cb));
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5d3cce9e8744..15eb5d3d4750 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void)
{
struct virtio_vsock *vsock = virtio_vsock_get();
+ if (!vsock)
+ return VMADDR_CID_ANY;
+
return vsock->guest_cid;
}
@@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
virtio_vsock_update_guest_cid(vsock);
- ret = vsock_core_init(&virtio_transport.transport);
- if (ret < 0)
- goto out_vqs;
-
vsock->rx_buf_nr = 0;
vsock->rx_buf_max_nr = 0;
atomic_set(&vsock->queued_replies, 0);
@@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_unlock(&the_virtio_vsock_mutex);
return 0;
-out_vqs:
- vsock->vdev->config->del_vqs(vsock->vdev);
out:
kfree(vsock);
mutex_unlock(&the_virtio_vsock_mutex);
@@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
flush_work(&vsock->event_work);
flush_work(&vsock->send_pkt_work);
+ /* Reset all connected sockets when the device disappear */
+ vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+
vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
@@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
mutex_lock(&the_virtio_vsock_mutex);
the_virtio_vsock = NULL;
- vsock_core_exit();
mutex_unlock(&the_virtio_vsock_mutex);
vdev->config->del_vqs(vdev);
@@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void)
virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
if (!virtio_vsock_workqueue)
return -ENOMEM;
+
ret = register_virtio_driver(&virtio_vsock_driver);
if (ret)
- destroy_workqueue(virtio_vsock_workqueue);
+ goto out_wq;
+
+ ret = vsock_core_init(&virtio_transport.transport);
+ if (ret)
+ goto out_vdr;
+
+ return 0;
+
+out_vdr:
+ unregister_virtio_driver(&virtio_vsock_driver);
+out_wq:
+ destroy_workqueue(virtio_vsock_workqueue);
return ret;
+
}
static void __exit virtio_vsock_exit(void)
{
+ vsock_core_exit();
unregister_virtio_driver(&virtio_vsock_driver);
destroy_workqueue(virtio_vsock_workqueue);
}
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c361ce782412..c3d5ab01fba7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work)
static void vmci_transport_destruct(struct vsock_sock *vsk)
{
+ /* transport can be NULL if we hit a failure at init() time */
+ if (!vmci_trans(vsk))
+ return;
+
/* Ensure that the detach callback doesn't use the sk/vsk
* we are about to destruct.
*/
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 882d97bdc6bf..550ac9d827fe 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
cfg80211_sched_dfs_chan_update(rdev);
}
+ schedule_work(&cfg80211_disconnect_work);
+
return err;
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 623dfe5e211c..b36ad8efb5e5 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
ASSERT_RTNL();
+ flush_work(&wdev->pmsr_free_wk);
+
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
list_del_rcu(&wdev->list);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f257a2b32ba4..84d36ca7a7ab 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -456,6 +456,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev);
bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
u32 center_freq_khz, u32 bw_khz);
+extern struct work_struct cfg80211_disconnect_work;
+
/**
* cfg80211_chandef_dfs_usable - checks if chandef is DFS usable
* @wiphy: the wiphy to validate against
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c5df5211d29a..25a9e3b5c154 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -238,7 +238,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
NLA_POLICY_MAX(NLA_U8, 15),
[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
- NLA_POLICY_MAX(NLA_U8, 15),
+ NLA_POLICY_MAX(NLA_U8, 31),
[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 },
[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG },
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 21139b82749f..5e2ab01d325c 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
if (err)
goto out_err;
} else {
- memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]),
- ETH_ALEN);
+ memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN);
eth_broadcast_addr(req->mac_addr_mask);
}
@@ -272,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
req->n_peers = count;
req->cookie = cfg80211_assign_cookie(rdev);
+ req->nl_portid = info->snd_portid;
err = rdev_start_pmsr(rdev, wdev, req);
if (err)
@@ -530,14 +530,14 @@ free:
}
EXPORT_SYMBOL_GPL(cfg80211_pmsr_report);
-void cfg80211_pmsr_free_wk(struct work_struct *work)
+static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev)
{
- struct wireless_dev *wdev = container_of(work, struct wireless_dev,
- pmsr_free_wk);
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_pmsr_request *req, *tmp;
LIST_HEAD(free_list);
+ lockdep_assert_held(&wdev->mtx);
+
spin_lock_bh(&wdev->pmsr_lock);
list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) {
if (req->nl_portid)
@@ -547,14 +547,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work)
spin_unlock_bh(&wdev->pmsr_lock);
list_for_each_entry_safe(req, tmp, &free_list, list) {
- wdev_lock(wdev);
rdev_abort_pmsr(rdev, wdev, req);
- wdev_unlock(wdev);
kfree(req);
}
}
+void cfg80211_pmsr_free_wk(struct work_struct *work)
+{
+ struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+ pmsr_free_wk);
+
+ wdev_lock(wdev);
+ cfg80211_pmsr_process_abort(wdev);
+ wdev_unlock(wdev);
+}
+
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
{
struct cfg80211_pmsr_request *req;
@@ -568,8 +576,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
spin_unlock_bh(&wdev->pmsr_lock);
if (found)
- schedule_work(&wdev->pmsr_free_wk);
- flush_work(&wdev->pmsr_free_wk);
+ cfg80211_pmsr_process_abort(wdev);
+
WARN_ON(!list_empty(&wdev->pmsr_list));
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f741d8376a46..7d34cb884840 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work)
rtnl_unlock();
}
-static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
+DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
/*
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 61fa33d0019e..e4b8db5e81ec 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -19,6 +19,7 @@
#include <linux/mpls.h>
#include <linux/gcd.h>
#include <linux/bitfield.h>
+#include <linux/nospec.h>
#include "core.h"
#include "rdev-ops.h"
@@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
{
unsigned int dscp;
unsigned char vlan_priority;
+ unsigned int ret;
/* skb->priority values from 256->263 are magic values to
* directly indicate a specific 802.1d priority. This is used
* to allow 802.1d priority to be passed directly in from VLAN
* tags, etc.
*/
- if (skb->priority >= 256 && skb->priority <= 263)
- return skb->priority - 256;
+ if (skb->priority >= 256 && skb->priority <= 263) {
+ ret = skb->priority - 256;
+ goto out;
+ }
if (skb_vlan_tag_present(skb)) {
vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK)
>> VLAN_PRIO_SHIFT;
- if (vlan_priority > 0)
- return vlan_priority;
+ if (vlan_priority > 0) {
+ ret = vlan_priority;
+ goto out;
+ }
}
switch (skb->protocol) {
@@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
if (!mpls)
return 0;
- return (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
+ ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
>> MPLS_LS_TC_SHIFT;
+ goto out;
}
case htons(ETH_P_80221):
/* 802.21 is always network control traffic */
@@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
unsigned int i, tmp_dscp = dscp >> 2;
for (i = 0; i < qos_map->num_des; i++) {
- if (tmp_dscp == qos_map->dscp_exception[i].dscp)
- return qos_map->dscp_exception[i].up;
+ if (tmp_dscp == qos_map->dscp_exception[i].dscp) {
+ ret = qos_map->dscp_exception[i].up;
+ goto out;
+ }
}
for (i = 0; i < 8; i++) {
if (tmp_dscp >= qos_map->up[i].low &&
- tmp_dscp <= qos_map->up[i].high)
- return i;
+ tmp_dscp <= qos_map->up[i].high) {
+ ret = i;
+ goto out;
+ }
}
}
- return dscp >> 5;
+ ret = dscp >> 5;
+out:
+ return array_index_nospec(ret, IEEE80211_NUM_TIDS);
}
EXPORT_SYMBOL(cfg80211_classify8021d);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5121729b8b63..ec3a828672ef 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb)
unsigned int lci = 1;
struct sock *sk;
- read_lock_bh(&x25_list_lock);
-
- while ((sk = __x25_find_socket(lci, nb)) != NULL) {
+ while ((sk = x25_find_socket(lci, nb)) != NULL) {
sock_put(sk);
if (++lci == 4096) {
lci = 0;
break;
}
+ cond_resched();
}
- read_unlock_bh(&x25_list_lock);
return lci;
}
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 5ab236c5c9a5..77520eacee8f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -129,9 +129,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
return 0;
err_unreg_umem:
- xdp_clear_umem_at_qid(dev, queue_id);
if (!force_zc)
err = 0; /* fallback to copy mode */
+ if (err)
+ xdp_clear_umem_at_qid(dev, queue_id);
out_rtnl_unlock:
rtnl_unlock();
return err;
@@ -265,10 +266,10 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
if (!umem->pgs)
return -ENOMEM;
- down_write(&current->mm->mmap_sem);
- npgs = get_user_pages(umem->address, umem->npgs,
- gup_flags, &umem->pgs[0], NULL);
- up_write(&current->mm->mmap_sem);
+ down_read(&current->mm->mmap_sem);
+ npgs = get_user_pages_longterm(umem->address, umem->npgs,
+ gup_flags, &umem->pgs[0], NULL);
+ up_read(&current->mm->mmap_sem);
if (npgs != umem->npgs) {
if (npgs >= 0) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 949d3bbccb2f..41731c9bb26f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -669,6 +669,8 @@ static int xsk_mmap(struct file *file, struct socket *sock,
if (!umem)
return -EINVAL;
+ /* Matches the smp_wmb() in XDP_UMEM_REG */
+ smp_rmb();
if (offset == XDP_UMEM_PGOFF_FILL_RING)
q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
@@ -678,6 +680,8 @@ static int xsk_mmap(struct file *file, struct socket *sock,
if (!q)
return -EINVAL;
+ /* Matches the smp_wmb() in xsk_init_queue */
+ smp_rmb();
qpg = virt_to_head_page(q->ring);
if (size > (PAGE_SIZE << compound_order(qpg)))
return -EINVAL;