summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/Kconfig57
-rw-r--r--net/6lowpan/Makefile13
-rw-r--r--net/6lowpan/iphc.c200
-rw-r--r--net/6lowpan/nhc.c241
-rw-r--r--net/6lowpan/nhc.h146
-rw-r--r--net/6lowpan/nhc_dest.c28
-rw-r--r--net/6lowpan/nhc_fragment.c27
-rw-r--r--net/6lowpan/nhc_hop.c27
-rw-r--r--net/6lowpan/nhc_ipv6.c27
-rw-r--r--net/6lowpan/nhc_mobility.c27
-rw-r--r--net/6lowpan/nhc_routing.c27
-rw-r--r--net/6lowpan/nhc_udp.c157
-rw-r--r--net/802/fc.c21
-rw-r--r--net/802/fddi.c26
-rw-r--r--net/802/hippi.c28
-rw-r--r--net/8021q/vlan_dev.c37
-rw-r--r--net/Makefile2
-rw-r--r--net/appletalk/aarp.c6
-rw-r--r--net/appletalk/ddp.c7
-rw-r--r--net/atm/common.c7
-rw-r--r--net/atm/common.h7
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/atm/signaling.c24
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_ip.c30
-rw-r--r--net/batman-adv/gateway_client.c19
-rw-r--r--net/bluetooth/Kconfig8
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/a2mp.c44
-rw-r--r--net/bluetooth/a2mp.h8
-rw-r--r--net/bluetooth/af_bluetooth.c11
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/hci_conn.c20
-rw-r--r--net/bluetooth/hci_core.c60
-rw-r--r--net/bluetooth/hci_debugfs.h22
-rw-r--r--net/bluetooth/hci_event.c42
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c55
-rw-r--r--net/bluetooth/l2cap_core.c40
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/mgmt.c18
-rw-r--r--net/bluetooth/rfcomm/sock.c10
-rw-r--r--net/bluetooth/sco.c31
-rw-r--r--net/bluetooth/smp.c34
-rw-r--r--net/bridge/br_device.c5
-rw-r--r--net/bridge/br_forward.c7
-rw-r--r--net/bridge/br_input.c17
-rw-r--r--net/bridge/br_netfilter.c78
-rw-r--r--net/bridge/br_netlink.c95
-rw-r--r--net/bridge/br_nf_core.c1
-rw-r--r--net/bridge/br_private.h6
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c84
-rw-r--r--net/caif/caif_socket.c17
-rw-r--r--net/can/bcm.c9
-rw-r--r--net/can/raw.c13
-rw-r--r--net/compat.c10
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c116
-rw-r--r--net/core/neighbour.c88
-rw-r--r--net/core/skbuff.c54
-rw-r--r--net/core/sock.c15
-rw-r--r--net/dcb/dcbnl.c44
-rw-r--r--net/dccp/dccp.h8
-rw-r--r--net/dccp/probe.c3
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_neigh.c112
-rw-r--r--net/decnet/dn_route.c10
-rw-r--r--net/dsa/Kconfig1
-rw-r--r--net/dsa/dsa.c235
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/slave.c161
-rw-r--r--net/ethernet/eth.c38
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ieee802154/core.c1
-rw-r--r--net/ieee802154/socket.c21
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/arp.c112
-rw-r--r--net/ipv4/devinet.c31
-rw-r--r--net/ipv4/fib_frontend.c65
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c3
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c1590
-rw-r--r--net/ipv4/igmp.c65
-rw-r--r--net/ipv4/inet_diag.c55
-rw-r--r--net/ipv4/netfilter/Kconfig38
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c17
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c23
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c3
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_fastopen.c1
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_offload.c4
-rw-r--r--net/ipv4/tcp_output.c79
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/udp.c19
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/addrconf.c38
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/mcast.c60
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/netfilter/Kconfig18
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c35
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/tcpv6_offload.c4
-rw-r--r--net/ipv6/udp.c10
-rw-r--r--net/ipv6/udp_impl.h7
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/ipx/af_ipx.c7
-rw-r--r--net/irda/af_irda.c29
-rw-r--r--net/iucv/af_iucv.c8
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/l2tp/l2tp_ppp.c7
-rw-r--r--net/llc/af_llc.c7
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ibss.c2
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mlme.c10
-rw-r--r--net/mpls/Kconfig23
-rw-r--r--net/mpls/Makefile1
-rw-r--r--net/mpls/af_mpls.c1008
-rw-r--r--net/mpls/internal.h59
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c69
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c182
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c102
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c21
-rw-r--r--net/netfilter/nf_conntrack_amanda.c10
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nf_tables_core.c105
-rw-r--r--net/netfilter/nft_compat.c9
-rw-r--r--net/netfilter/nft_reject_inet.c6
-rw-r--r--net/netfilter/xt_set.c4
-rw-r--r--net/netfilter/xt_string.c3
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/netrom/af_netrom.c7
-rw-r--r--net/netrom/nr_dev.c31
-rw-r--r--net/nfc/llcp_sock.c8
-rw-r--r--net/nfc/rawsock.c7
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/packet/af_packet.c46
-rw-r--r--net/phonet/datagram.c8
-rw-r--r--net/phonet/pep.c8
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/rds.h7
-rw-r--r--net/rds/recv.c4
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rose/af_rose.c7
-rw-r--r--net/rose/rose_dev.c53
-rw-r--r--net/rxrpc/af_rxrpc.c7
-rw-r--r--net/rxrpc/ar-internal.h10
-rw-r--r--net/rxrpc/ar-output.c20
-rw-r--r--net/rxrpc/ar-recvmsg.c6
-rw-r--r--net/sched/cls_api.c14
-rw-r--r--net/sched/cls_basic.c6
-rw-r--r--net/sched/cls_bpf.c212
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_fw.c34
-rw-r--r--net/sched/cls_route.c26
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_tcindex.c6
-rw-r--r--net/sched/cls_u32.c25
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/socket.c82
-rw-r--r--net/switchdev/switchdev.c183
-rw-r--r--net/tipc/Kconfig8
-rw-r--r--net/tipc/Makefile1
-rw-r--r--net/tipc/bearer.c15
-rw-r--r--net/tipc/bearer.h17
-rw-r--r--net/tipc/discover.c7
-rw-r--r--net/tipc/eth_media.c8
-rw-r--r--net/tipc/ib_media.c2
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/socket.c117
-rw-r--r--net/tipc/subscr.c23
-rw-r--r--net/tipc/udp_media.c442
-rw-r--r--net/unix/af_unix.c50
-rw-r--r--net/vmw_vsock/af_vsock.c20
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/wireless/ibss.c2
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/trace.h9
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c6
204 files changed, 5790 insertions, 2860 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
index e4a02ef55102..7fa0f382e7d1 100644
--- a/net/6lowpan/Kconfig
+++ b/net/6lowpan/Kconfig
@@ -1,6 +1,61 @@
-config 6LOWPAN
+menuconfig 6LOWPAN
tristate "6LoWPAN Support"
depends on IPV6
---help---
This enables IPv6 over Low power Wireless Personal Area Network -
"6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks.
+
+menuconfig 6LOWPAN_NHC
+ tristate "Next Header Compression Support"
+ depends on 6LOWPAN
+ default y
+ ---help---
+ Support for next header compression.
+
+if 6LOWPAN_NHC
+
+config 6LOWPAN_NHC_DEST
+ tristate "Destination Options Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Destination Options Header compression according to
+ RFC6282.
+
+config 6LOWPAN_NHC_FRAGMENT
+ tristate "Fragment Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Fragment Header compression according to RFC6282.
+
+config 6LOWPAN_NHC_HOP
+ tristate "Hop-by-Hop Options Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to
+ RFC6282.
+
+config 6LOWPAN_NHC_IPV6
+ tristate "IPv6 Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Header compression according to RFC6282.
+
+config 6LOWPAN_NHC_MOBILITY
+ tristate "Mobility Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Mobility Header compression according to RFC6282.
+
+config 6LOWPAN_NHC_ROUTING
+ tristate "Routing Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 Routing Header compression according to RFC6282.
+
+config 6LOWPAN_NHC_UDP
+ tristate "UDP Header Support"
+ default y
+ ---help---
+ 6LoWPAN IPv6 UDP Header compression according to RFC6282.
+
+endif
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index 415886bb456a..eb8baa72adc8 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,3 +1,12 @@
-obj-$(CONFIG_6LOWPAN) := 6lowpan.o
+obj-$(CONFIG_6LOWPAN) += 6lowpan.o
-6lowpan-y := iphc.o
+6lowpan-y := iphc.o nhc.o
+
+#rfc6282 nhcs
+obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o
+obj-$(CONFIG_6LOWPAN_NHC_FRAGMENT) += nhc_fragment.o
+obj-$(CONFIG_6LOWPAN_NHC_HOP) += nhc_hop.o
+obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o
+obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o
+obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o
+obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 32ffec6ef164..94a375c04f21 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -54,6 +54,8 @@
#include <net/ipv6.h>
#include <net/af_ieee802154.h>
+#include "nhc.h"
+
/* Uncompress address function for source and
* destination address(non-multicast).
*
@@ -224,77 +226,6 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
return 0;
}
-static int uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
-{
- bool fail;
- u8 tmp = 0, val = 0;
-
- fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp));
-
- if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) {
- pr_debug("UDP header uncompression\n");
- switch (tmp & LOWPAN_NHC_UDP_CS_P_11) {
- case LOWPAN_NHC_UDP_CS_P_00:
- fail |= lowpan_fetch_skb(skb, &uh->source,
- sizeof(uh->source));
- fail |= lowpan_fetch_skb(skb, &uh->dest,
- sizeof(uh->dest));
- break;
- case LOWPAN_NHC_UDP_CS_P_01:
- fail |= lowpan_fetch_skb(skb, &uh->source,
- sizeof(uh->source));
- fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
- uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
- break;
- case LOWPAN_NHC_UDP_CS_P_10:
- fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
- uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
- fail |= lowpan_fetch_skb(skb, &uh->dest,
- sizeof(uh->dest));
- break;
- case LOWPAN_NHC_UDP_CS_P_11:
- fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
- uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT +
- (val >> 4));
- uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT +
- (val & 0x0f));
- break;
- default:
- pr_debug("ERROR: unknown UDP format\n");
- goto err;
- }
-
- pr_debug("uncompressed UDP ports: src = %d, dst = %d\n",
- ntohs(uh->source), ntohs(uh->dest));
-
- /* checksum */
- if (tmp & LOWPAN_NHC_UDP_CS_C) {
- pr_debug_ratelimited("checksum elided currently not supported\n");
- goto err;
- } else {
- fail |= lowpan_fetch_skb(skb, &uh->check,
- sizeof(uh->check));
- }
-
- /* UDP length needs to be infered from the lower layers
- * here, we obtain the hint from the remaining size of the
- * frame
- */
- uh->len = htons(skb->len + sizeof(struct udphdr));
- pr_debug("uncompressed UDP length: src = %d", ntohs(uh->len));
- } else {
- pr_debug("ERROR: unsupported NH format\n");
- goto err;
- }
-
- if (fail)
- goto err;
-
- return 0;
-err:
- return -EINVAL;
-}
-
/* TTL uncompression values */
static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 };
@@ -425,29 +356,11 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
return -EINVAL;
}
- /* UDP data uncompression */
+ /* Next header data uncompression */
if (iphc0 & LOWPAN_IPHC_NH_C) {
- struct udphdr uh;
- const int needed = sizeof(struct udphdr) + sizeof(hdr);
-
- if (uncompress_udp_header(skb, &uh))
- return -EINVAL;
-
- /* replace the compressed UDP head by the uncompressed UDP
- * header
- */
- err = skb_cow(skb, needed);
- if (unlikely(err))
+ err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
+ if (err < 0)
return err;
-
- skb_push(skb, sizeof(struct udphdr));
- skb_reset_transport_header(skb);
- skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
-
- raw_dump_table(__func__, "raw UDP header dump",
- (u8 *)&uh, sizeof(uh));
-
- hdr.nexthdr = UIP_PROTO_UDP;
} else {
err = skb_cow(skb, sizeof(hdr));
if (unlikely(err))
@@ -500,71 +413,6 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift,
return rol8(val, shift);
}
-static void compress_udp_header(u8 **hc_ptr, struct sk_buff *skb)
-{
- struct udphdr *uh;
- u8 tmp;
-
- /* In the case of RAW sockets the transport header is not set by
- * the ip6 stack so we must set it ourselves
- */
- if (skb->transport_header == skb->network_header)
- skb_set_transport_header(skb, sizeof(struct ipv6hdr));
-
- uh = udp_hdr(skb);
-
- if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) ==
- LOWPAN_NHC_UDP_4BIT_PORT) &&
- ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) ==
- LOWPAN_NHC_UDP_4BIT_PORT)) {
- pr_debug("UDP header: both ports compression to 4 bits\n");
- /* compression value */
- tmp = LOWPAN_NHC_UDP_CS_P_11;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- /* source and destination port */
- tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT +
- ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4);
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) ==
- LOWPAN_NHC_UDP_8BIT_PORT) {
- pr_debug("UDP header: remove 8 bits of dest\n");
- /* compression value */
- tmp = LOWPAN_NHC_UDP_CS_P_01;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- /* source port */
- lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
- /* destination port */
- tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) ==
- LOWPAN_NHC_UDP_8BIT_PORT) {
- pr_debug("UDP header: remove 8 bits of source\n");
- /* compression value */
- tmp = LOWPAN_NHC_UDP_CS_P_10;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- /* source port */
- tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- /* destination port */
- lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
- } else {
- pr_debug("UDP header: can't compress\n");
- /* compression value */
- tmp = LOWPAN_NHC_UDP_CS_P_00;
- lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
- /* source port */
- lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
- /* destination port */
- lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
- }
-
- /* checksum is always inline */
- lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check));
-
- /* skip the UDP header */
- skb_pull(skb, sizeof(struct udphdr));
-}
-
int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
unsigned short type, const void *_daddr,
const void *_saddr, unsigned int len)
@@ -572,7 +420,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
u8 tmp, iphc0, iphc1, *hc_ptr;
struct ipv6hdr *hdr;
u8 head[100] = {};
- int addr_type;
+ int ret, addr_type;
if (type != ETH_P_IPV6)
return -EINVAL;
@@ -649,13 +497,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
/* NOTE: payload length is always compressed */
- /* Next Header is compress if UDP */
- if (hdr->nexthdr == UIP_PROTO_UDP)
- iphc0 |= LOWPAN_IPHC_NH_C;
-
- if ((iphc0 & LOWPAN_IPHC_NH_C) == 0)
- lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr,
- sizeof(hdr->nexthdr));
+ /* Check if we provide the nhc format for nexthdr and compression
+ * functionality. If not nexthdr is handled inline and not compressed.
+ */
+ ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0);
+ if (ret < 0)
+ return ret;
/* Hop limit
* if 1: compress, encoding is 01
@@ -741,9 +588,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
}
}
- /* UDP header compression */
- if (hdr->nexthdr == UIP_PROTO_UDP)
- compress_udp_header(&hc_ptr, skb);
+ /* next header compression */
+ if (iphc0 & LOWPAN_IPHC_NH_C) {
+ ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr);
+ if (ret < 0)
+ return ret;
+ }
head[0] = iphc0;
head[1] = iphc1;
@@ -761,4 +611,18 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(lowpan_header_compress);
+static int __init lowpan_module_init(void)
+{
+ request_module_nowait("nhc_dest");
+ request_module_nowait("nhc_fragment");
+ request_module_nowait("nhc_hop");
+ request_module_nowait("nhc_ipv6");
+ request_module_nowait("nhc_mobility");
+ request_module_nowait("nhc_routing");
+ request_module_nowait("nhc_udp");
+
+ return 0;
+}
+module_init(lowpan_module_init);
+
MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
new file mode 100644
index 000000000000..fd20fc51a7c4
--- /dev/null
+++ b/net/6lowpan/nhc.c
@@ -0,0 +1,241 @@
+/*
+ * 6LoWPAN next header compression
+ *
+ *
+ * Authors:
+ * Alexander Aring <aar@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+
+#include <net/ipv6.h>
+
+#include "nhc.h"
+
+static struct rb_root rb_root = RB_ROOT;
+static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX];
+static DEFINE_SPINLOCK(lowpan_nhc_lock);
+
+static int lowpan_nhc_insert(struct lowpan_nhc *nhc)
+{
+ struct rb_node **new = &rb_root.rb_node, *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct lowpan_nhc *this = container_of(*new, struct lowpan_nhc,
+ node);
+ int result, len_dif, len;
+
+ len_dif = nhc->idlen - this->idlen;
+
+ if (nhc->idlen < this->idlen)
+ len = nhc->idlen;
+ else
+ len = this->idlen;
+
+ result = memcmp(nhc->id, this->id, len);
+ if (!result)
+ result = len_dif;
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&nhc->node, parent, new);
+ rb_insert_color(&nhc->node, &rb_root);
+
+ return 0;
+}
+
+static void lowpan_nhc_remove(struct lowpan_nhc *nhc)
+{
+ rb_erase(&nhc->node, &rb_root);
+}
+
+static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
+{
+ struct rb_node *node = rb_root.rb_node;
+ const u8 *nhcid_skb_ptr = skb->data;
+
+ while (node) {
+ struct lowpan_nhc *nhc = container_of(node, struct lowpan_nhc,
+ node);
+ u8 nhcid_skb_ptr_masked[LOWPAN_NHC_MAX_ID_LEN];
+ int result, i;
+
+ if (nhcid_skb_ptr + nhc->idlen > skb->data + skb->len)
+ return NULL;
+
+ /* copy and mask afterwards the nhid value from skb */
+ memcpy(nhcid_skb_ptr_masked, nhcid_skb_ptr, nhc->idlen);
+ for (i = 0; i < nhc->idlen; i++)
+ nhcid_skb_ptr_masked[i] &= nhc->idmask[i];
+
+ result = memcmp(nhcid_skb_ptr_masked, nhc->id, nhc->idlen);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return nhc;
+ }
+
+ return NULL;
+}
+
+int lowpan_nhc_check_compression(struct sk_buff *skb,
+ const struct ipv6hdr *hdr, u8 **hc_ptr,
+ u8 *iphc0)
+{
+ struct lowpan_nhc *nhc;
+
+ spin_lock_bh(&lowpan_nhc_lock);
+
+ nhc = lowpan_nexthdr_nhcs[hdr->nexthdr];
+ if (nhc && nhc->compress)
+ *iphc0 |= LOWPAN_IPHC_NH_C;
+ else
+ lowpan_push_hc_data(hc_ptr, &hdr->nexthdr,
+ sizeof(hdr->nexthdr));
+
+ spin_unlock_bh(&lowpan_nhc_lock);
+
+ return 0;
+}
+
+int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
+ u8 **hc_ptr)
+{
+ int ret;
+ struct lowpan_nhc *nhc;
+
+ spin_lock_bh(&lowpan_nhc_lock);
+
+ nhc = lowpan_nexthdr_nhcs[hdr->nexthdr];
+ /* check if the nhc module was removed in unlocked part.
+ * TODO: this is a workaround we should prevent unloading
+ * of nhc modules while unlocked part, this will always drop
+ * the lowpan packet but it's very unlikely.
+ *
+ * Solution isn't easy because we need to decide at
+ * lowpan_nhc_check_compression if we do a compression or not.
+ * Because the inline data which is added to skb, we can't move this
+ * handling.
+ */
+ if (unlikely(!nhc || !nhc->compress)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* In the case of RAW sockets the transport header is not set by
+ * the ip6 stack so we must set it ourselves
+ */
+ if (skb->transport_header == skb->network_header)
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ ret = nhc->compress(skb, hc_ptr);
+ if (ret < 0)
+ goto out;
+
+ /* skip the transport header */
+ skb_pull(skb, nhc->nexthdrlen);
+
+out:
+ spin_unlock_bh(&lowpan_nhc_lock);
+
+ return ret;
+}
+
+int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+ struct ipv6hdr *hdr)
+{
+ struct lowpan_nhc *nhc;
+ int ret;
+
+ spin_lock_bh(&lowpan_nhc_lock);
+
+ nhc = lowpan_nhc_by_nhcid(skb);
+ if (nhc) {
+ if (nhc->uncompress) {
+ ret = nhc->uncompress(skb, sizeof(struct ipv6hdr) +
+ nhc->nexthdrlen);
+ if (ret < 0) {
+ spin_unlock_bh(&lowpan_nhc_lock);
+ return ret;
+ }
+ } else {
+ spin_unlock_bh(&lowpan_nhc_lock);
+ netdev_warn(dev, "received nhc id for %s which is not implemented.\n",
+ nhc->name);
+ return -ENOTSUPP;
+ }
+ } else {
+ spin_unlock_bh(&lowpan_nhc_lock);
+ netdev_warn(dev, "received unknown nhc id which was not found.\n");
+ return -ENOENT;
+ }
+
+ hdr->nexthdr = nhc->nexthdr;
+ skb_reset_transport_header(skb);
+ raw_dump_table(__func__, "raw transport header dump",
+ skb_transport_header(skb), nhc->nexthdrlen);
+
+ spin_unlock_bh(&lowpan_nhc_lock);
+
+ return 0;
+}
+
+int lowpan_nhc_add(struct lowpan_nhc *nhc)
+{
+ int ret;
+
+ if (!nhc->idlen || !nhc->idsetup)
+ return -EINVAL;
+
+ WARN_ONCE(nhc->idlen > LOWPAN_NHC_MAX_ID_LEN,
+ "LOWPAN_NHC_MAX_ID_LEN should be updated to %zd.\n",
+ nhc->idlen);
+
+ nhc->idsetup(nhc);
+
+ spin_lock_bh(&lowpan_nhc_lock);
+
+ if (lowpan_nexthdr_nhcs[nhc->nexthdr]) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ ret = lowpan_nhc_insert(nhc);
+ if (ret < 0)
+ goto out;
+
+ lowpan_nexthdr_nhcs[nhc->nexthdr] = nhc;
+out:
+ spin_unlock_bh(&lowpan_nhc_lock);
+ return ret;
+}
+EXPORT_SYMBOL(lowpan_nhc_add);
+
+void lowpan_nhc_del(struct lowpan_nhc *nhc)
+{
+ spin_lock_bh(&lowpan_nhc_lock);
+
+ lowpan_nhc_remove(nhc);
+ lowpan_nexthdr_nhcs[nhc->nexthdr] = NULL;
+
+ spin_unlock_bh(&lowpan_nhc_lock);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(lowpan_nhc_del);
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
new file mode 100644
index 000000000000..ed44938eb5de
--- /dev/null
+++ b/net/6lowpan/nhc.h
@@ -0,0 +1,146 @@
+#ifndef __6LOWPAN_NHC_H
+#define __6LOWPAN_NHC_H
+
+#include <linux/skbuff.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+
+#include <net/6lowpan.h>
+#include <net/ipv6.h>
+
+#define LOWPAN_NHC_MAX_ID_LEN 1
+
+/**
+ * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct
+ *
+ * @__nhc: variable name of the lowpan_nhc struct.
+ * @_name: const char * of common header compression name.
+ * @_nexthdr: ipv6 nexthdr field for the header compression.
+ * @_nexthdrlen: ipv6 nexthdr len for the reserved space.
+ * @_idsetup: callback to setup id and mask values.
+ * @_idlen: len for the next header id and mask, should be always the same.
+ * @_uncompress: callback for uncompression call.
+ * @_compress: callback for compression call.
+ */
+#define LOWPAN_NHC(__nhc, _name, _nexthdr, \
+ _hdrlen, _idsetup, _idlen, \
+ _uncompress, _compress) \
+static u8 __nhc##_val[_idlen]; \
+static u8 __nhc##_mask[_idlen]; \
+static struct lowpan_nhc __nhc = { \
+ .name = _name, \
+ .nexthdr = _nexthdr, \
+ .nexthdrlen = _hdrlen, \
+ .id = __nhc##_val, \
+ .idmask = __nhc##_mask, \
+ .idlen = _idlen, \
+ .idsetup = _idsetup, \
+ .uncompress = _uncompress, \
+ .compress = _compress, \
+}
+
+#define module_lowpan_nhc(__nhc) \
+static int __init __nhc##_init(void) \
+{ \
+ return lowpan_nhc_add(&(__nhc)); \
+} \
+module_init(__nhc##_init); \
+static void __exit __nhc##_exit(void) \
+{ \
+ lowpan_nhc_del(&(__nhc)); \
+} \
+module_exit(__nhc##_exit);
+
+/**
+ * struct lowpan_nhc - hold 6lowpan next hdr compression ifnformation
+ *
+ * @node: holder for the rbtree.
+ * @name: name of the specific next header compression
+ * @nexthdr: next header value of the protocol which should be compressed.
+ * @nexthdrlen: ipv6 nexthdr len for the reserved space.
+ * @id: array for nhc id. Note this need to be in network byteorder.
+ * @mask: array for nhc id mask. Note this need to be in network byteorder.
+ * @len: the length of the next header id and mask.
+ * @setup: callback to setup fill the next header id value and mask.
+ * @compress: callback to do the header compression.
+ * @uncompress: callback to do the header uncompression.
+ */
+struct lowpan_nhc {
+ struct rb_node node;
+ const char *name;
+ const u8 nexthdr;
+ const size_t nexthdrlen;
+ u8 *id;
+ u8 *idmask;
+ const size_t idlen;
+
+ void (*idsetup)(struct lowpan_nhc *nhc);
+ int (*uncompress)(struct sk_buff *skb, size_t needed);
+ int (*compress)(struct sk_buff *skb, u8 **hc_ptr);
+};
+
+/**
+ * lowpan_nhc_by_nexthdr - return the 6lowpan nhc by ipv6 nexthdr.
+ *
+ * @nexthdr: ipv6 nexthdr value.
+ */
+struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr);
+
+/**
+ * lowpan_nhc_check_compression - checks if we support compression format. If
+ * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be
+ * set. If we don't support nexthdr will be added as inline data to the
+ * 6LoWPAN header.
+ *
+ * @skb: skb of 6LoWPAN header to read nhc and replace header.
+ * @hdr: ipv6hdr to check the nexthdr value
+ * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of
+ * replaced header.
+ * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit
+ */
+int lowpan_nhc_check_compression(struct sk_buff *skb,
+ const struct ipv6hdr *hdr, u8 **hc_ptr,
+ u8 *iphc0);
+
+/**
+ * lowpan_nhc_do_compression - calling compress callback for nhc
+ *
+ * @skb: skb of 6LoWPAN header to read nhc and replace header.
+ * @hdr: ipv6hdr to set the nexthdr value
+ * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of
+ * replaced header.
+ */
+int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
+ u8 **hc_ptr);
+
+/**
+ * lowpan_nhc_do_uncompression - calling uncompress callback for nhc
+ *
+ * @nhc: 6LoWPAN nhc context, get by lowpan_nhc_by_ functions.
+ * @skb: skb of 6LoWPAN header, skb->data should be pointed to nhc id value.
+ * @dev: netdevice for print logging information.
+ * @hdr: ipv6hdr for setting nexthdr value.
+ */
+int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+ struct ipv6hdr *hdr);
+
+/**
+ * lowpan_nhc_add - register a next header compression to framework
+ *
+ * @nhc: nhc which should be add.
+ */
+int lowpan_nhc_add(struct lowpan_nhc *nhc);
+
+/**
+ * lowpan_nhc_del - delete a next header compression from framework
+ *
+ * @nhc: nhc which should be delete.
+ */
+void lowpan_nhc_del(struct lowpan_nhc *nhc);
+
+/**
+ * lowpan_nhc_init - adding all default nhcs
+ */
+void lowpan_nhc_init(void);
+
+#endif /* __6LOWPAN_NHC_H */
diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c
new file mode 100644
index 000000000000..0b292c9646eb
--- /dev/null
+++ b/net/6lowpan/nhc_dest.c
@@ -0,0 +1,28 @@
+/*
+ * 6LoWPAN IPv6 Destination Options Header compression according to
+ * RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_DEST_IDLEN 1
+#define LOWPAN_NHC_DEST_ID_0 0xe6
+#define LOWPAN_NHC_DEST_MASK_0 0xfe
+
+static void dest_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_DEST_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_DEST_MASK_0;
+}
+
+LOWPAN_NHC(nhc_dest, "RFC6282 Destination Options", NEXTHDR_DEST, 0,
+ dest_nhid_setup, LOWPAN_NHC_DEST_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_dest);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Destination Options compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c
new file mode 100644
index 000000000000..473dbc58ef84
--- /dev/null
+++ b/net/6lowpan/nhc_fragment.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN IPv6 Fragment Header compression according to RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_FRAGMENT_IDLEN 1
+#define LOWPAN_NHC_FRAGMENT_ID_0 0xe4
+#define LOWPAN_NHC_FRAGMENT_MASK_0 0xfe
+
+static void fragment_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_FRAGMENT_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_FRAGMENT_MASK_0;
+}
+
+LOWPAN_NHC(nhc_fragment, "RFC6282 Fragment", NEXTHDR_FRAGMENT, 0,
+ fragment_nhid_setup, LOWPAN_NHC_FRAGMENT_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_fragment);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Fragment compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c
new file mode 100644
index 000000000000..1eb66be16f19
--- /dev/null
+++ b/net/6lowpan/nhc_hop.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_HOP_IDLEN 1
+#define LOWPAN_NHC_HOP_ID_0 0xe0
+#define LOWPAN_NHC_HOP_MASK_0 0xfe
+
+static void hop_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_HOP_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_HOP_MASK_0;
+}
+
+LOWPAN_NHC(nhc_hop, "RFC6282 Hop-by-Hop Options", NEXTHDR_HOP, 0,
+ hop_nhid_setup, LOWPAN_NHC_HOP_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_hop);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Hop-by-Hop Options compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c
new file mode 100644
index 000000000000..2313d1600af3
--- /dev/null
+++ b/net/6lowpan/nhc_ipv6.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN IPv6 Header compression according to RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_IPV6_IDLEN 1
+#define LOWPAN_NHC_IPV6_ID_0 0xee
+#define LOWPAN_NHC_IPV6_MASK_0 0xfe
+
+static void ipv6_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_IPV6_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_IPV6_MASK_0;
+}
+
+LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, ipv6_nhid_setup,
+ LOWPAN_NHC_IPV6_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_ipv6);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 IPv6 compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c
new file mode 100644
index 000000000000..60d3f3886c98
--- /dev/null
+++ b/net/6lowpan/nhc_mobility.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN IPv6 Mobility Header compression according to RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_MOBILITY_IDLEN 1
+#define LOWPAN_NHC_MOBILITY_ID_0 0xe8
+#define LOWPAN_NHC_MOBILITY_MASK_0 0xfe
+
+static void mobility_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_MOBILITY_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_MOBILITY_MASK_0;
+}
+
+LOWPAN_NHC(nhc_mobility, "RFC6282 Mobility", NEXTHDR_MOBILITY, 0,
+ mobility_nhid_setup, LOWPAN_NHC_MOBILITY_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_mobility);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Mobility compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c
new file mode 100644
index 000000000000..c393280f11c4
--- /dev/null
+++ b/net/6lowpan/nhc_routing.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN IPv6 Routing Header compression according to RFC6282
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_ROUTING_IDLEN 1
+#define LOWPAN_NHC_ROUTING_ID_0 0xe2
+#define LOWPAN_NHC_ROUTING_MASK_0 0xfe
+
+static void routing_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_ROUTING_ID_0;
+ nhc->idmask[0] = LOWPAN_NHC_ROUTING_MASK_0;
+}
+
+LOWPAN_NHC(nhc_routing, "RFC6282 Routing", NEXTHDR_ROUTING, 0,
+ routing_nhid_setup, LOWPAN_NHC_ROUTING_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(nhc_routing);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Routing compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
new file mode 100644
index 000000000000..c6bcaeb428ae
--- /dev/null
+++ b/net/6lowpan/nhc_udp.c
@@ -0,0 +1,157 @@
+/*
+ * 6LoWPAN IPv6 UDP compression according to RFC6282
+ *
+ *
+ * Authors:
+ * Alexander Aring <aar@pengutronix.de>
+ *
+ * Orignal written by:
+ * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ * Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_NHC_UDP_IDLEN 1
+
+static int udp_uncompress(struct sk_buff *skb, size_t needed)
+{
+ u8 tmp = 0, val = 0;
+ struct udphdr uh;
+ bool fail;
+ int err;
+
+ fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp));
+
+ pr_debug("UDP header uncompression\n");
+ switch (tmp & LOWPAN_NHC_UDP_CS_P_11) {
+ case LOWPAN_NHC_UDP_CS_P_00:
+ fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source));
+ fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest));
+ break;
+ case LOWPAN_NHC_UDP_CS_P_01:
+ fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source));
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
+ uh.dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
+ break;
+ case LOWPAN_NHC_UDP_CS_P_10:
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
+ uh.source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
+ fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest));
+ break;
+ case LOWPAN_NHC_UDP_CS_P_11:
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
+ uh.source = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val >> 4));
+ uh.dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val & 0x0f));
+ break;
+ default:
+ BUG();
+ }
+
+ pr_debug("uncompressed UDP ports: src = %d, dst = %d\n",
+ ntohs(uh.source), ntohs(uh.dest));
+
+ /* checksum */
+ if (tmp & LOWPAN_NHC_UDP_CS_C) {
+ pr_debug_ratelimited("checksum elided currently not supported\n");
+ fail = true;
+ } else {
+ fail |= lowpan_fetch_skb(skb, &uh.check, sizeof(uh.check));
+ }
+
+ if (fail)
+ return -EINVAL;
+
+ /* UDP length needs to be infered from the lower layers
+ * here, we obtain the hint from the remaining size of the
+ * frame
+ */
+ uh.len = htons(skb->len + sizeof(struct udphdr));
+ pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len));
+
+ /* replace the compressed UDP head by the uncompressed UDP
+ * header
+ */
+ err = skb_cow(skb, needed);
+ if (unlikely(err))
+ return err;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
+
+ return 0;
+}
+
+static int udp_compress(struct sk_buff *skb, u8 **hc_ptr)
+{
+ const struct udphdr *uh = udp_hdr(skb);
+ u8 tmp;
+
+ if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) ==
+ LOWPAN_NHC_UDP_4BIT_PORT) &&
+ ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) ==
+ LOWPAN_NHC_UDP_4BIT_PORT)) {
+ pr_debug("UDP header: both ports compression to 4 bits\n");
+ /* compression value */
+ tmp = LOWPAN_NHC_UDP_CS_P_11;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ /* source and destination port */
+ tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT +
+ ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4);
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) ==
+ LOWPAN_NHC_UDP_8BIT_PORT) {
+ pr_debug("UDP header: remove 8 bits of dest\n");
+ /* compression value */
+ tmp = LOWPAN_NHC_UDP_CS_P_01;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ /* source port */
+ lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
+ /* destination port */
+ tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) ==
+ LOWPAN_NHC_UDP_8BIT_PORT) {
+ pr_debug("UDP header: remove 8 bits of source\n");
+ /* compression value */
+ tmp = LOWPAN_NHC_UDP_CS_P_10;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ /* source port */
+ tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ /* destination port */
+ lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
+ } else {
+ pr_debug("UDP header: can't compress\n");
+ /* compression value */
+ tmp = LOWPAN_NHC_UDP_CS_P_00;
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
+ /* source port */
+ lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
+ /* destination port */
+ lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
+ }
+
+ /* checksum is always inline */
+ lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check));
+
+ return 0;
+}
+
+static void udp_nhid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_NHC_UDP_ID;
+ nhc->idmask[0] = LOWPAN_NHC_UDP_MASK;
+}
+
+LOWPAN_NHC(nhc_udp, "RFC6282 UDP", NEXTHDR_UDP, sizeof(struct udphdr),
+ udp_nhid_setup, LOWPAN_NHC_UDP_IDLEN, udp_uncompress, udp_compress);
+
+module_lowpan_nhc(nhc_udp);
+MODULE_DESCRIPTION("6LoWPAN next header RFC6282 UDP compression");
+MODULE_LICENSE("GPL");
diff --git a/net/802/fc.c b/net/802/fc.c
index 7c174b6750cd..7b9219022418 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -75,29 +75,8 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
return -hdr_len;
}
-/*
- * A neighbour discovery of some species (eg arp) has completed. We
- * can now send the packet.
- */
-
-static int fc_rebuild_header(struct sk_buff *skb)
-{
-#ifdef CONFIG_INET
- struct fch_hdr *fch=(struct fch_hdr *)skb->data;
- struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
- if(fcllc->ethertype != htons(ETH_P_IP)) {
- printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
- return 0;
- }
- return arp_find(fch->daddr, skb);
-#else
- return 0;
-#endif
-}
-
static const struct header_ops fc_header_ops = {
.create = fc_header,
- .rebuild = fc_rebuild_header,
};
static void fc_setup(struct net_device *dev)
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 59e7346f1193..7d3a0af954e8 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -87,31 +87,6 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
return -hl;
}
-
-/*
- * Rebuild the FDDI MAC header. This is called after an ARP
- * (or in future other address resolution) has completed on
- * this sk_buff. We now let ARP fill in the other fields.
- */
-
-static int fddi_rebuild_header(struct sk_buff *skb)
-{
- struct fddihdr *fddi = (struct fddihdr *)skb->data;
-
-#ifdef CONFIG_INET
- if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
- /* Try to get ARP to resolve the header and fill destination address */
- return arp_find(fddi->daddr, skb);
- else
-#endif
- {
- printk("%s: Don't know how to resolve type %04X addresses.\n",
- skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
- return 0;
- }
-}
-
-
/*
* Determine the packet's protocol ID and fill in skb fields.
* This routine is called before an incoming packet is passed
@@ -177,7 +152,6 @@ EXPORT_SYMBOL(fddi_change_mtu);
static const struct header_ops fddi_header_ops = {
.create = fddi_header,
- .rebuild = fddi_rebuild_header,
};
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 2e03f8259dd5..ade1a52cdcff 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -91,33 +91,6 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
/*
- * Rebuild the HIPPI MAC header. This is called after an ARP has
- * completed on this sk_buff. We now let ARP fill in the other fields.
- */
-
-static int hippi_rebuild_header(struct sk_buff *skb)
-{
- struct hippi_hdr *hip = (struct hippi_hdr *)skb->data;
-
- /*
- * Only IP is currently supported
- */
-
- if(hip->snap.ethertype != htons(ETH_P_IP))
- {
- printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
- return 0;
- }
-
- /*
- * We don't support dynamic ARP on HIPPI, but we use the ARP
- * static ARP tables to hold the I-FIELDs.
- */
- return arp_find(hip->le.daddr, skb);
-}
-
-
-/*
* Determine the packet's protocol ID.
*/
@@ -186,7 +159,6 @@ EXPORT_SYMBOL(hippi_neigh_setup_dev);
static const struct header_ops hippi_header_ops = {
.create = hippi_header,
- .rebuild = hippi_rebuild_header,
};
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 118956448cf6..f196552ec3c4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -37,39 +37,6 @@
#include <linux/netpoll.h>
/*
- * Rebuild the Ethernet MAC header. This is called after an ARP
- * (or in future other address resolution) has completed on this
- * sk_buff. We now let ARP fill in the other fields.
- *
- * This routine CANNOT use cached dst->neigh!
- * Really, it is used only when dst->neigh is wrong.
- *
- * TODO: This needs a checkup, I'm ignorant here. --BLG
- */
-static int vlan_dev_rebuild_header(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
-
- switch (veth->h_vlan_encapsulated_proto) {
-#ifdef CONFIG_INET
- case htons(ETH_P_IP):
-
- /* TODO: Confirm this will work with VLAN headers... */
- return arp_find(veth->h_dest, skb);
-#endif
- default:
- pr_debug("%s: unable to resolve type %X addresses\n",
- dev->name, ntohs(veth->h_vlan_encapsulated_proto));
-
- ether_addr_copy(veth->h_source, dev->dev_addr);
- break;
- }
-
- return 0;
-}
-
-/*
* Create the VLAN header for an arbitrary protocol layer
*
* saddr=NULL means use device source address
@@ -534,7 +501,6 @@ static int vlan_dev_get_lock_subclass(struct net_device *dev)
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
- .rebuild = vlan_dev_rebuild_header,
.parse = eth_header_parse,
};
@@ -554,7 +520,6 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev
static const struct header_ops vlan_passthru_header_ops = {
.create = vlan_passthru_hard_header,
- .rebuild = dev_rebuild_header,
.parse = eth_header_parse,
};
@@ -827,5 +792,5 @@ void vlan_setup(struct net_device *dev)
dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
- memset(dev->broadcast, 0, ETH_ALEN);
+ eth_zero_addr(dev->broadcast);
}
diff --git a/net/Makefile b/net/Makefile
index 38704bdf941a..3995613e5510 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/
obj-$(CONFIG_NFC) += nfc/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
-obj-$(CONFIG_NET_MPLS_GSO) += mpls/
+obj-$(CONFIG_MPLS) += mpls/
obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
obj-y += switchdev/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d1c55d8dd0a2..8ad3ec2610b6 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -141,7 +141,7 @@ static void __aarp_send_query(struct aarp_entry *a)
eah->pa_src_net = sat->s_net;
eah->pa_src_node = sat->s_node;
- memset(eah->hw_dst, '\0', ETH_ALEN);
+ eth_zero_addr(eah->hw_dst);
eah->pa_dst_zero = 0;
eah->pa_dst_net = a->target_addr.s_net;
@@ -189,7 +189,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
eah->pa_src_node = us->s_node;
if (!sha)
- memset(eah->hw_dst, '\0', ETH_ALEN);
+ eth_zero_addr(eah->hw_dst);
else
ether_addr_copy(eah->hw_dst, sha);
@@ -239,7 +239,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
eah->pa_src_net = us->s_net;
eah->pa_src_node = us->s_node;
- memset(eah->hw_dst, '\0', ETH_ALEN);
+ eth_zero_addr(eah->hw_dst);
eah->pa_dst_zero = 0;
eah->pa_dst_net = us->s_net;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0d0766ea5ab1..3b7ad43c7dad 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1559,8 +1559,7 @@ freeit:
return 0;
}
-static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t len)
+static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct atalk_sock *at = at_sk(sk);
@@ -1728,8 +1727,8 @@ out:
return err ? : len;
}
-static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct ddpehdr *ddp;
diff --git a/net/atm/common.c b/net/atm/common.c
index b84057e41bd6..ed0466637e13 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
return 0;
}
-int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc;
@@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
return copied;
}
-int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t size)
+int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
{
struct sock *sk = sock->sk;
DEFINE_WAIT(wait);
diff --git a/net/atm/common.h b/net/atm/common.h
index cc3c2dae4d79..4d6f5b2068ac 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -13,10 +13,9 @@
int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
int vcc_release(struct socket *sock);
int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
-int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int flags);
-int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t total_len);
+int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags);
+int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4b98f897044a..cd3b37989057 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -2001,7 +2001,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
if (entry == NULL)
goto out;
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- memset(entry->mac_addr, 0, ETH_ALEN);
+ eth_zero_addr(entry->mac_addr);
entry->recv_vcc = vcc;
entry->old_recv_push = old_push;
entry->status = ESI_UNKNOWN;
@@ -2086,7 +2086,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->vcc = vcc;
entry->old_push = old_push;
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
- memset(entry->mac_addr, 0, ETH_ALEN);
+ eth_zero_addr(entry->mac_addr);
entry->status = ESI_UNKNOWN;
hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
entry->timer.expires = jiffies + priv->vcc_timeout_period;
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 523bce72f698..4fd6af47383a 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -19,36 +19,15 @@
#include "resources.h"
#include "signaling.h"
-#undef WAIT_FOR_DEMON /* #define this if system calls on SVC sockets
- should block until the demon runs.
- Danger: may cause nasty hangs if the demon
- crashes. */
-
struct atm_vcc *sigd = NULL;
-#ifdef WAIT_FOR_DEMON
-static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep);
-#endif
static void sigd_put_skb(struct sk_buff *skb)
{
-#ifdef WAIT_FOR_DEMON
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&sigd_sleep, &wait);
- while (!sigd) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- pr_debug("atmsvc: waiting for signaling daemon...\n");
- schedule();
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(&sigd_sleep, &wait);
-#else
if (!sigd) {
pr_debug("atmsvc: no signaling daemon\n");
kfree_skb(skb);
return;
}
-#endif
atm_force_charge(sigd, skb->truesize);
skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb);
sk_atm(sigd)->sk_data_ready(sk_atm(sigd));
@@ -261,8 +240,5 @@ int sigd_attach(struct atm_vcc *vcc)
vcc_insert_socket(sk_atm(vcc));
set_bit(ATM_VF_META, &vcc->flags);
set_bit(ATM_VF_READY, &vcc->flags);
-#ifdef WAIT_FOR_DEMON
- wake_up(&sigd_sleep);
-#endif
return 0;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ca049a7c9287..330c1f4a5a0b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1432,8 +1432,7 @@ out:
return err;
}
-static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name);
struct sock *sk = sock->sk;
@@ -1599,8 +1598,8 @@ out:
return err;
}
-static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 67de6b33f2c3..7c646bb2c6f7 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -46,9 +46,9 @@
#ifdef CONFIG_INET
-int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *daddr,
- const void *saddr, unsigned int len)
+static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
unsigned char *buff;
@@ -100,7 +100,7 @@ int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
return -AX25_HEADER_LEN; /* Unfinished header */
}
-int ax25_rebuild_header(struct sk_buff *skb)
+netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
{
struct sk_buff *ourskb;
unsigned char *bp = skb->data;
@@ -115,9 +115,6 @@ int ax25_rebuild_header(struct sk_buff *skb)
dst = (ax25_address *)(bp + 1);
src = (ax25_address *)(bp + 8);
- if (arp_find(bp + 1, skb))
- return 1;
-
route = ax25_get_route(dst, NULL);
if (route) {
digipeat = route->digipeat;
@@ -129,6 +126,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
dev = skb->dev;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
+ kfree_skb(skb);
goto put;
}
@@ -212,31 +210,29 @@ put:
if (route)
ax25_put_route(route);
- return 1;
+ return NETDEV_TX_OK;
}
#else /* INET */
-int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *daddr,
- const void *saddr, unsigned int len)
+static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
return -AX25_HEADER_LEN;
}
-int ax25_rebuild_header(struct sk_buff *skb)
+netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
{
- return 1;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
-
#endif
const struct header_ops ax25_header_ops = {
.create = ax25_hard_header,
- .rebuild = ax25_rebuild_header,
};
-EXPORT_SYMBOL(ax25_hard_header);
-EXPORT_SYMBOL(ax25_rebuild_header);
EXPORT_SYMBOL(ax25_header_ops);
+EXPORT_SYMBOL(ax25_ip_xmit);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 27649e85f3f6..090828cf1fa7 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -592,15 +592,16 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
- (curr_gw == gw_node ? "=>" : " "),
- gw_node->orig_node->orig,
- router_ifinfo->bat_iv.tq_avg, router->addr,
- router->if_incoming->net_dev->name,
- gw_node->bandwidth_down / 10,
- gw_node->bandwidth_down % 10,
- gw_node->bandwidth_up / 10,
- gw_node->bandwidth_up % 10);
+ seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+ (curr_gw == gw_node ? "=>" : " "),
+ gw_node->orig_node->orig,
+ router_ifinfo->bat_iv.tq_avg, router->addr,
+ router->if_incoming->net_dev->name,
+ gw_node->bandwidth_down / 10,
+ gw_node->bandwidth_down % 10,
+ gw_node->bandwidth_up / 10,
+ gw_node->bandwidth_up % 10);
+ ret = seq_has_overflowed(seq) ? -1 : 0;
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 7de74635a110..b8c794b87523 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -91,4 +91,12 @@ config BT_SELFTEST_SMP
Run test cases for SMP cryptographic functionality, including both
legacy SMP as well as the Secure Connections features.
+config BT_DEBUGFS
+ bool "Export Bluetooth internals in debugfs"
+ depends on BT && DEBUG_FS
+ default y
+ help
+ Provide extensive information about internal Bluetooth states
+ in debugfs.
+
source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 8e96e3072266..5d608799717e 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,8 +13,9 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
- a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o
+ a2mp.o amp.o ecc.o hci_request.o
+bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index cedfbda15dad..5a04eb1a7e57 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -19,9 +19,11 @@
#include "a2mp.h"
#include "amp.h"
+#define A2MP_FEAT_EXT 0x8000
+
/* Global AMP Manager list */
-LIST_HEAD(amp_mgr_list);
-DEFINE_MUTEX(amp_mgr_list_lock);
+static LIST_HEAD(amp_mgr_list);
+static DEFINE_MUTEX(amp_mgr_list_lock);
/* A2MP build & send command helper functions */
static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data)
@@ -43,7 +45,7 @@ static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data)
return cmd;
}
-void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
+static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
{
struct l2cap_chan *chan = mgr->a2mp_chan;
struct a2mp_cmd *cmd;
@@ -67,7 +69,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
kfree(cmd);
}
-u8 __next_ident(struct amp_mgr *mgr)
+static u8 __next_ident(struct amp_mgr *mgr)
{
if (++mgr->ident == 0)
mgr->ident = 1;
@@ -75,6 +77,23 @@ u8 __next_ident(struct amp_mgr *mgr)
return mgr->ident;
}
+static struct amp_mgr *amp_mgr_lookup_by_state(u8 state)
+{
+ struct amp_mgr *mgr;
+
+ mutex_lock(&amp_mgr_list_lock);
+ list_for_each_entry(mgr, &amp_mgr_list, list) {
+ if (test_and_clear_bit(state, &mgr->state)) {
+ amp_mgr_get(mgr);
+ mutex_unlock(&amp_mgr_list_lock);
+ return mgr;
+ }
+ }
+ mutex_unlock(&amp_mgr_list_lock);
+
+ return NULL;
+}
+
/* hci_dev_list shall be locked */
static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl)
{
@@ -860,23 +879,6 @@ struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
return mgr->a2mp_chan;
}
-struct amp_mgr *amp_mgr_lookup_by_state(u8 state)
-{
- struct amp_mgr *mgr;
-
- mutex_lock(&amp_mgr_list_lock);
- list_for_each_entry(mgr, &amp_mgr_list, list) {
- if (test_and_clear_bit(state, &mgr->state)) {
- amp_mgr_get(mgr);
- mutex_unlock(&amp_mgr_list_lock);
- return mgr;
- }
- }
- mutex_unlock(&amp_mgr_list_lock);
-
- return NULL;
-}
-
void a2mp_send_getinfo_rsp(struct hci_dev *hdev)
{
struct amp_mgr *mgr;
diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h
index 487b54c1308f..296f665adb09 100644
--- a/net/bluetooth/a2mp.h
+++ b/net/bluetooth/a2mp.h
@@ -17,8 +17,6 @@
#include <net/bluetooth/l2cap.h>
-#define A2MP_FEAT_EXT 0x8000
-
enum amp_mgr_state {
READ_LOC_AMP_INFO,
READ_LOC_AMP_ASSOC,
@@ -131,16 +129,10 @@ struct a2mp_physlink_rsp {
#define A2MP_STATUS_PHYS_LINK_EXISTS 0x05
#define A2MP_STATUS_SECURITY_VIOLATION 0x06
-extern struct list_head amp_mgr_list;
-extern struct mutex amp_mgr_list_lock;
-
struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr);
int amp_mgr_put(struct amp_mgr *mgr);
-u8 __next_ident(struct amp_mgr *mgr);
struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
struct sk_buff *skb);
-struct amp_mgr *amp_mgr_lookup_by_state(u8 state);
-void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data);
void a2mp_discover_amp(struct l2cap_chan *chan);
void a2mp_send_getinfo_rsp(struct hci_dev *hdev);
void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ce22e0cfa923..20a4698e2255 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
}
EXPORT_SYMBOL(bt_accept_dequeue);
-int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
@@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
return timeo;
}
-int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
int err = 0;
@@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs);
static int __init bt_init(void)
{
- struct sk_buff *skb;
int err;
- BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(struct bt_skb_cb));
BT_INFO("Core ver %s", VERSION);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 4b488ec26105..6ceb5d36a32b 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -218,7 +218,7 @@ static const struct net_device_ops bnep_netdev_ops = {
void bnep_net_setup(struct net_device *dev)
{
- memset(dev->broadcast, 0xff, ETH_ALEN);
+ eth_broadcast_addr(dev->broadcast);
dev->addr_len = ETH_ALEN;
ether_setup(dev);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index c9b8fa544785..91ebb9cb31de 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -309,7 +309,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status)
else
hci_add_sco(sco, conn->handle);
} else {
- hci_proto_connect_cfm(sco, status);
+ hci_connect_cfm(sco, status);
hci_conn_del(sco);
}
}
@@ -618,7 +618,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type,
status);
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_del(conn);
@@ -733,6 +733,14 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_request req;
int err;
+ /* Let's make sure that le is enabled.*/
+ if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+ if (lmp_le_capable(hdev))
+ return ERR_PTR(-ECONNREFUSED);
+
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
/* Some devices send ATT messages as soon as the physical link is
* established. To be able to handle these ATT messages, the user-
* space first establishes the connection and then starts the pairing
@@ -856,8 +864,12 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
{
struct hci_conn *acl;
- if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+ if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+ if (lmp_bredr_capable(hdev))
+ return ERR_PTR(-ECONNREFUSED);
+
return ERR_PTR(-EOPNOTSUPP);
+ }
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
@@ -1139,7 +1151,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
list_for_each_entry_safe(c, n, &h->list, list) {
c->state = BT_CLOSED;
- hci_proto_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
+ hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
hci_conn_del(c);
}
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3322d3f4c85a..bba4c344c6e0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -51,7 +51,7 @@ DEFINE_RWLOCK(hci_dev_list_lock);
/* HCI callback list */
LIST_HEAD(hci_cb_list);
-DEFINE_RWLOCK(hci_cb_list_lock);
+DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
@@ -390,7 +390,7 @@ static void bredr_init(struct hci_request *req)
hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
}
-static void amp_init(struct hci_request *req)
+static void amp_init1(struct hci_request *req)
{
req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
@@ -400,9 +400,6 @@ static void amp_init(struct hci_request *req)
/* Read Local Supported Commands */
hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
- /* Read Local Supported Features */
- hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
-
/* Read Local AMP Info */
hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
@@ -416,6 +413,16 @@ static void amp_init(struct hci_request *req)
hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL);
}
+static void amp_init2(struct hci_request *req)
+{
+ /* Read Local Supported Features. Not all AMP controllers
+ * support this so it's placed conditionally in the second
+ * stage init.
+ */
+ if (req->hdev->commands[14] & 0x20)
+ hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
+}
+
static void hci_init1_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
@@ -432,7 +439,7 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
break;
case HCI_AMP:
- amp_init(req);
+ amp_init1(req);
break;
default:
@@ -578,6 +585,9 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
+ if (hdev->dev_type == HCI_AMP)
+ return amp_init2(req);
+
if (lmp_bredr_capable(hdev))
bredr_setup(req);
else
@@ -896,17 +906,17 @@ static int __hci_init(struct hci_dev *hdev)
&dut_mode_fops);
}
+ err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
/* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
* BR/EDR/LE type controllers. AMP controllers only need the
- * first stage init.
+ * first two stages of init.
*/
if (hdev->dev_type != HCI_BREDR)
return 0;
- err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
- if (err < 0)
- return err;
-
err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
if (err < 0)
return err;
@@ -1591,6 +1601,12 @@ static int hci_dev_do_close(struct hci_dev *hdev)
{
BT_DBG("%s %p", hdev->name, hdev);
+ if (!test_bit(HCI_UNREGISTER, &hdev->dev_flags)) {
+ /* Execute vendor specific shutdown routine */
+ if (hdev->shutdown)
+ hdev->shutdown(hdev);
+ }
+
cancel_delayed_work(&hdev->power_off);
hci_req_cancel(hdev, ENODEV);
@@ -3448,9 +3464,9 @@ int hci_register_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- write_lock(&hci_cb_list_lock);
- list_add(&cb->list, &hci_cb_list);
- write_unlock(&hci_cb_list_lock);
+ mutex_lock(&hci_cb_list_lock);
+ list_add_tail(&cb->list, &hci_cb_list);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
@@ -3460,9 +3476,9 @@ int hci_unregister_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- write_lock(&hci_cb_list_lock);
+ mutex_lock(&hci_cb_list_lock);
list_del(&cb->list);
- write_unlock(&hci_cb_list_lock);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
@@ -3517,7 +3533,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = 1;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -4195,7 +4211,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
if (!skb)
return true;
- return bt_cb(skb)->req.start;
+ return bt_cb(skb)->req_start;
}
static void hci_resend_last(struct hci_dev *hdev)
@@ -4255,14 +4271,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
* command queue (hdev->cmd_q).
*/
if (hdev->sent_cmd) {
- req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+ req_complete = bt_cb(hdev->sent_cmd)->req_complete;
if (req_complete) {
/* We must set the complete callback to NULL to
* avoid calling the callback more than once if
* this function gets called again.
*/
- bt_cb(hdev->sent_cmd)->req.complete = NULL;
+ bt_cb(hdev->sent_cmd)->req_complete = NULL;
goto call_complete;
}
@@ -4271,12 +4287,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
/* Remove all pending commands belonging to this request */
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
while ((skb = __skb_dequeue(&hdev->cmd_q))) {
- if (bt_cb(skb)->req.start) {
+ if (bt_cb(skb)->req_start) {
__skb_queue_head(&hdev->cmd_q, skb);
break;
}
- req_complete = bt_cb(skb)->req.complete;
+ req_complete = bt_cb(skb)->req_complete;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h
index fb68efe083c5..4444dc8cedc2 100644
--- a/net/bluetooth/hci_debugfs.h
+++ b/net/bluetooth/hci_debugfs.h
@@ -20,7 +20,29 @@
SOFTWARE IS DISCLAIMED.
*/
+#if IS_ENABLED(CONFIG_BT_DEBUGFS)
+
void hci_debugfs_create_common(struct hci_dev *hdev);
void hci_debugfs_create_bredr(struct hci_dev *hdev);
void hci_debugfs_create_le(struct hci_dev *hdev);
void hci_debugfs_create_conn(struct hci_conn *conn);
+
+#else
+
+static inline void hci_debugfs_create_common(struct hci_dev *hdev)
+{
+}
+
+static inline void hci_debugfs_create_bredr(struct hci_dev *hdev)
+{
+}
+
+static inline void hci_debugfs_create_le(struct hci_dev *hdev)
+{
+}
+
+static inline void hci_debugfs_create_conn(struct hci_conn *conn)
+{
+}
+
+#endif
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3fb094822b6..39653d46932b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1537,7 +1537,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
if (conn && conn->state == BT_CONNECT) {
if (status != 0x0c || conn->attempt > 2) {
conn->state = BT_CLOSED;
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_del(conn);
} else
conn->state = BT_CONNECT2;
@@ -1581,7 +1581,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
if (sco) {
sco->state = BT_CLOSED;
- hci_proto_connect_cfm(sco, status);
+ hci_connect_cfm(sco, status);
hci_conn_del(sco);
}
}
@@ -1608,7 +1608,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
if (conn->state == BT_CONFIG) {
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_drop(conn);
}
}
@@ -1635,7 +1635,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
if (conn->state == BT_CONFIG) {
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_drop(conn);
}
}
@@ -1811,7 +1811,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
if (conn->state == BT_CONFIG) {
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_drop(conn);
}
}
@@ -1838,7 +1838,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
if (conn->state == BT_CONFIG) {
- hci_proto_connect_cfm(conn, status);
+ hci_connect_cfm(conn, status);
hci_conn_drop(conn);
}
}
@@ -1873,7 +1873,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
if (sco) {
sco->state = BT_CLOSED;
- hci_proto_connect_cfm(sco, status);
+ hci_connect_cfm(sco, status);
hci_conn_del(sco);
}
}
@@ -2255,10 +2255,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_sco_setup(conn, ev->status);
if (ev->status) {
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_del(conn);
} else if (ev->link_type != ACL_LINK)
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
unlock:
hci_dev_unlock(hdev);
@@ -2366,7 +2366,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
&cp);
} else {
conn->state = BT_CONNECT2;
- hci_proto_connect_cfm(conn, 0);
+ hci_connect_cfm(conn, 0);
}
}
@@ -2444,7 +2444,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
type = conn->type;
- hci_proto_disconn_cfm(conn, ev->reason);
+ hci_disconn_cfm(conn, ev->reason);
hci_conn_del(conn);
/* Re-enable advertising if necessary, since it might
@@ -2501,7 +2501,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
&cp);
} else {
conn->state = BT_CONNECTED;
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
}
} else {
@@ -2629,12 +2629,12 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
(!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
- hci_proto_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
}
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
} else
hci_encrypt_cfm(conn, ev->status, ev->encrypt);
@@ -2707,7 +2707,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
}
@@ -3106,7 +3106,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
cancel_delayed_work(&hdev->cmd_timer);
if (ev->status ||
- (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+ (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event))
hci_req_cmd_complete(hdev, opcode, ev->status);
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
@@ -3679,7 +3679,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
}
@@ -3738,7 +3738,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
break;
}
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
if (ev->status)
hci_conn_del(conn);
@@ -3849,7 +3849,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
if (!ev->status)
conn->state = BT_CONNECTED;
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
} else {
hci_auth_cfm(conn, ev->status);
@@ -4512,7 +4512,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_debugfs_create_conn(conn);
hci_conn_add_sysfs(conn);
- hci_proto_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, ev->status);
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
conn->dst_type);
@@ -5039,7 +5039,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
skb_pull(skb, HCI_EVENT_HDR_SIZE);
- if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+ if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) {
struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(cmd_hdr->opcode);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b59f92c6df0c..f857e765e081 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
return -ENODATA;
skb = skb_peek_tail(&req->cmd_q);
- bt_cb(skb)->req.complete = complete;
+ bt_cb(skb)->req_complete = complete;
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}
if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = 1;
- bt_cb(skb)->req.event = event;
+ bt_cb(skb)->req_event = event;
skb_queue_tail(&req->cmd_q, skb);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d65c5be7c82..cb4bc4883350 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -46,9 +46,9 @@ struct hci_pinfo {
unsigned short channel;
};
-static inline int hci_test_bit(int nr, void *addr)
+static inline int hci_test_bit(int nr, const void *addr)
{
- return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
+ return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
}
/* Security filter */
@@ -183,12 +183,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb_copy);
}
-/* Send frame to control socket */
-void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
+/* Send frame to sockets with specific channel */
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+ struct sock *skip_sk)
{
struct sock *sk;
- BT_DBG("len %d", skb->len);
+ BT_DBG("channel %u len %d", channel, skb->len);
read_lock(&hci_sk_list.lock);
@@ -202,35 +203,7 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
if (sk->sk_state != BT_BOUND)
continue;
- if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL)
- continue;
-
- nskb = skb_clone(skb, GFP_ATOMIC);
- if (!nskb)
- continue;
-
- if (sock_queue_rcv_skb(sk, nskb))
- kfree_skb(nskb);
- }
-
- read_unlock(&hci_sk_list.lock);
-}
-
-static void queue_monitor_skb(struct sk_buff *skb)
-{
- struct sock *sk;
-
- BT_DBG("len %d", skb->len);
-
- read_lock(&hci_sk_list.lock);
-
- sk_for_each(sk, &hci_sk_list.head) {
- struct sk_buff *nskb;
-
- if (sk->sk_state != BT_BOUND)
- continue;
-
- if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR)
+ if (hci_pi(sk)->channel != channel)
continue;
nskb = skb_clone(skb, GFP_ATOMIC);
@@ -290,7 +263,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
hdr->index = cpu_to_le16(hdev->id);
hdr->len = cpu_to_le16(skb->len);
- queue_monitor_skb(skb_copy);
+ hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, NULL);
kfree_skb(skb_copy);
}
@@ -397,7 +370,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
skb = create_monitor_event(hdev, event);
if (skb) {
- queue_monitor_skb(skb);
+ hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, NULL);
kfree_skb(skb);
}
}
@@ -826,8 +799,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
}
}
-static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
@@ -871,8 +844,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
return err ? : copied;
}
-static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct hci_dev *hdev;
@@ -965,7 +938,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = 1;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 6ba33f9631e8..91c682846bcf 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1244,6 +1244,13 @@ static void l2cap_move_done(struct l2cap_chan *chan)
static void l2cap_chan_ready(struct l2cap_chan *chan)
{
+ /* The channel may have already been flagged as connected in
+ * case of receiving data before the L2CAP info req/rsp
+ * procedure is complete.
+ */
+ if (chan->state == BT_CONNECTED)
+ return;
+
/* This clears all conf flags, including CONF_NOT_COMPLETE */
chan->conf_state = 0;
__clear_chan_timer(chan);
@@ -6785,6 +6792,13 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
BT_DBG("chan %p, len %d", chan, skb->len);
+ /* If we receive data on a fixed channel before the info req/rsp
+ * procdure is done simply assume that the channel is supported
+ * and mark it as ready.
+ */
+ if (chan->chan_type == L2CAP_CHAN_FIXED)
+ l2cap_chan_ready(chan);
+
if (chan->state != BT_CONNECTED)
goto drop;
@@ -7238,13 +7252,16 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
return NULL;
}
-void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct hci_dev *hdev = hcon->hdev;
struct l2cap_conn *conn;
struct l2cap_chan *pchan;
u8 dst_type;
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
if (status) {
@@ -7307,8 +7324,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
return conn->disc_reason;
}
-void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
@@ -7331,13 +7351,13 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
}
}
-int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
+static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
{
struct l2cap_conn *conn = hcon->l2cap_data;
struct l2cap_chan *chan;
if (!conn)
- return 0;
+ return;
BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt);
@@ -7420,8 +7440,6 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
}
mutex_unlock(&conn->chan_lock);
-
- return 0;
}
int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
@@ -7529,6 +7547,13 @@ drop:
return 0;
}
+static struct hci_cb l2cap_cb = {
+ .name = "L2CAP",
+ .connect_cfm = l2cap_connect_cfm,
+ .disconn_cfm = l2cap_disconn_cfm,
+ .security_cfm = l2cap_security_cfm,
+};
+
static int l2cap_debugfs_show(struct seq_file *f, void *p)
{
struct l2cap_chan *c;
@@ -7570,6 +7595,8 @@ int __init l2cap_init(void)
if (err < 0)
return err;
+ hci_register_cb(&l2cap_cb);
+
if (IS_ERR_OR_NULL(bt_debugfs))
return 0;
@@ -7587,6 +7614,7 @@ int __init l2cap_init(void)
void l2cap_exit(void)
{
debugfs_remove(l2cap_debugfs);
+ hci_unregister_cb(&l2cap_cb);
l2cap_cleanup_sockets();
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 60694f0f4c73..9070720eedc8 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
return err;
}
-static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return err;
}
-static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
struct sock *sk = sock->sk;
struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
release_sock(sk);
if (sock->type == SOCK_STREAM)
- err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
+ err = bt_sock_stream_recvmsg(sock, msg, len, flags);
else
- err = bt_sock_recvmsg(iocb, sock, msg, len, flags);
+ err = bt_sock_recvmsg(sock, msg, len, flags);
if (pi->chan->mode != L2CAP_MODE_ERTM)
return err;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9ec5390c85eb..1e4635a3374d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,6 +29,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
@@ -242,7 +243,7 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len,
/* Time stamp */
__net_timestamp(skb);
- hci_send_to_control(skb, skip_sk);
+ hci_send_to_channel(HCI_CHANNEL_CONTROL, skb, skip_sk);
kfree_skb(skb);
return 0;
@@ -2116,8 +2117,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
goto failed;
}
- if (mgmt_pending_find(MGMT_OP_SET_SSP, hdev) ||
- mgmt_pending_find(MGMT_OP_SET_HS, hdev)) {
+ if (mgmt_pending_find(MGMT_OP_SET_SSP, hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
MGMT_STATUS_BUSY);
goto failed;
@@ -2176,6 +2176,12 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
+ if (mgmt_pending_find(MGMT_OP_SET_SSP, hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+ MGMT_STATUS_BUSY);
+ goto unlock;
+ }
+
if (cp->val) {
changed = !test_and_set_bit(HCI_HS_ENABLED, &hdev->dev_flags);
} else {
@@ -3249,6 +3255,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
if (PTR_ERR(conn) == -EBUSY)
status = MGMT_STATUS_BUSY;
+ else if (PTR_ERR(conn) == -EOPNOTSUPP)
+ status = MGMT_STATUS_NOT_SUPPORTED;
+ else if (PTR_ERR(conn) == -ECONNREFUSED)
+ status = MGMT_STATUS_REJECTED;
else
status = MGMT_STATUS_CONNECT_FAILED;
@@ -6654,7 +6664,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
- ev.key.master = csrk->master;
+ ev.key.type = csrk->type;
memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 3c6d2c8ac1a4..825e8fb5114b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
return 0;
}
-static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
@@ -615,8 +615,8 @@ done:
return sent;
}
-static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
@@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
return 0;
}
- len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags);
+ len = bt_sock_stream_recvmsg(sock, msg, size, flags);
lock_sock(sk);
if (!(flags & MSG_PEEK) && len > 0)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 76321b546e84..54279ac28120 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len
return 0;
}
-static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
int err;
@@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
}
}
-static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
struct sock *sk = sock->sk;
struct sco_pinfo *pi = sco_pi(sk);
@@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
release_sock(sk);
- return bt_sock_recvmsg(iocb, sock, msg, len, flags);
+ return bt_sock_recvmsg(sock, msg, len, flags);
}
static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
@@ -1083,9 +1083,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
return lm;
}
-void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+
if (!status) {
struct sco_conn *conn;
@@ -1096,8 +1100,11 @@ void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
sco_conn_del(hcon, bt_to_errno(status));
}
-void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
@@ -1122,6 +1129,12 @@ drop:
return 0;
}
+static struct hci_cb sco_cb = {
+ .name = "SCO",
+ .connect_cfm = sco_connect_cfm,
+ .disconn_cfm = sco_disconn_cfm,
+};
+
static int sco_debugfs_show(struct seq_file *f, void *p)
{
struct sock *sk;
@@ -1203,6 +1216,8 @@ int __init sco_init(void)
BT_INFO("SCO socket layer initialized");
+ hci_register_cb(&sco_cb);
+
if (IS_ERR_OR_NULL(bt_debugfs))
return 0;
@@ -1222,6 +1237,8 @@ void __exit sco_exit(void)
debugfs_remove(sco_debugfs);
+ hci_unregister_cb(&sco_cb);
+
bt_sock_unregister(BTPROTO_SCO);
proto_unregister(&sco_proto);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c09a821f381d..c91c19bfc0a8 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1252,7 +1252,10 @@ static void smp_distribute_keys(struct smp_chan *smp)
csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
if (csrk) {
- csrk->master = 0x00;
+ if (hcon->sec_level > BT_SECURITY_MEDIUM)
+ csrk->type = MGMT_CSRK_LOCAL_AUTHENTICATED;
+ else
+ csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED;
memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
}
smp->slave_csrk = csrk;
@@ -2352,7 +2355,10 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
if (csrk) {
- csrk->master = 0x01;
+ if (conn->hcon->sec_level > BT_SECURITY_MEDIUM)
+ csrk->type = MGMT_CSRK_REMOTE_AUTHENTICATED;
+ else
+ csrk->type = MGMT_CSRK_REMOTE_UNAUTHENTICATED;
memcpy(csrk->val, rp->csrk, sizeof(csrk->val));
}
smp->csrk = csrk;
@@ -2951,24 +2957,14 @@ create_chan:
l2cap_chan_set_defaults(chan);
if (cid == L2CAP_CID_SMP) {
- /* If usage of static address is forced or if the devices
- * does not have a public address, then listen on the static
- * address.
- *
- * In case BR/EDR has been disabled on a dual-mode controller
- * and a static address has been configued, then listen on
- * the static address instead.
- */
- if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
- !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
- (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
- bacmp(&hdev->static_addr, BDADDR_ANY))) {
- bacpy(&chan->src, &hdev->static_addr);
- chan->src_type = BDADDR_LE_RANDOM;
- } else {
- bacpy(&chan->src, &hdev->bdaddr);
+ u8 bdaddr_type;
+
+ hci_copy_identity_address(hdev, &chan->src, &bdaddr_type);
+
+ if (bdaddr_type == ADDR_LE_DEV_PUBLIC)
chan->src_type = BDADDR_LE_PUBLIC;
- }
+ else
+ chan->src_type = BDADDR_LE_RANDOM;
} else {
bacpy(&chan->src, &hdev->bdaddr);
chan->src_type = BDADDR_BREDR;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ffd379db5938..294cbcc49263 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vid = 0;
rcu_read_lock();
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
- br_nf_pre_routing_finish_bridge_slow(skb);
+ if (br_nf_prerouting_finish_bridge(skb)) {
rcu_read_unlock();
return NETDEV_TX_OK;
}
-#endif
u64_stats_update_begin(&brstats->syncp);
brstats->tx_packets++;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a823e3..3304a5442331 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
- /* ip_fragment doesn't copy the MAC header */
- if (nf_bridge_maybe_copy_header(skb) ||
- !is_skb_forwardable(skb->dev, skb)) {
+ if (!is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
} else {
skb_push(skb, ETH_HLEN);
@@ -188,6 +186,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
+ if ((p->flags & BR_PROXYARP_WIFI) &&
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied)
+ continue;
prev = maybe_deliver(prev, p, skb, __packet_hook);
if (IS_ERR(prev))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2aa7be3a847..052c5ebbc947 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
}
static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
- u16 vid)
+ u16 vid, struct net_bridge_port *p)
{
struct net_device *dev = br->dev;
struct neighbour *n;
@@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
u8 *arpptr, *sha;
__be32 sip, tip;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
if (dev->flags & IFF_NOARP)
return;
@@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
}
f = __br_fdb_get(br, n->ha, vid);
- if (f)
+ if (f && ((p->flags & BR_PROXYARP) ||
+ (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
sha, n->ha, sha);
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
neigh_release(n);
}
@@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
dst = NULL;
- if (is_broadcast_ether_addr(dest)) {
- if (IS_ENABLED(CONFIG_INET) &&
- p->flags & BR_PROXYARP &&
- skb->protocol == htons(ETH_P_ARP))
- br_do_proxy_arp(skb, br, vid);
+ if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
+ br_do_proxy_arp(skb, br, vid, p);
+ if (is_broadcast_ether_addr(dest)) {
skb2 = skb;
unicast = false;
} else if (is_multicast_ether_addr(dest)) {
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0ee453fad3de..a8361c7cdf81 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -239,6 +239,14 @@ drop:
return -1;
}
+static void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_8021Q)
+ skb->protocol = htons(ETH_P_8021Q);
+ else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ skb->protocol = htons(ETH_P_PPP_SES);
+}
+
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
@@ -764,23 +772,53 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
}
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+static bool nf_bridge_copy_header(struct sk_buff *skb)
+{
+ int err;
+ unsigned int header_size;
+
+ nf_bridge_update_protocol(skb);
+ header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+ err = skb_cow_head(skb, header_size);
+ if (err)
+ return false;
+
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
+ return true;
+}
+
+static int br_nf_push_frag_xmit(struct sk_buff *skb)
+{
+ if (!nf_bridge_copy_header(skb)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return br_dev_queue_push_xmit(skb);
+}
+
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
int frag_max_size;
+ unsigned int mtu_reserved;
+ if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
+ return br_dev_queue_push_xmit(skb);
+
+ mtu_reserved = nf_bridge_mtu_reduction(skb);
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->protocol == htons(ETH_P_IP) &&
- skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
- !skb_is_gso(skb)) {
+ if (skb->len + mtu_reserved > skb->dev->mtu) {
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
- ret = ip_fragment(skb, br_dev_queue_push_xmit);
+ ret = ip_fragment(skb, br_nf_push_frag_xmit);
} else
ret = br_dev_queue_push_xmit(skb);
@@ -854,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
+/* This is called when br_netfilter has called into iptables/netfilter,
+ * and DNAT has taken place on a bridge-forwarded packet.
+ *
+ * neigh->output has created a new MAC header, with local br0 MAC
+ * as saddr.
+ *
+ * This restores the original MAC saddr of the bridged packet
+ * before invoking bridge forward logic to transmit the packet.
+ */
+static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+ skb_pull(skb, ETH_HLEN);
+ nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+ skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ skb->dev = nf_bridge->physindev;
+ br_handle_frame_finish(skb);
+}
+
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+ if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+ br_nf_pre_routing_finish_bridge_slow(skb);
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge);
+
void br_netfilter_enable(void)
{
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4fbcea0e7ecb..8bc6b67457dc 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -22,6 +22,85 @@
#include "br_private.h"
#include "br_private_stp.h"
+static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
+ u32 filter_mask)
+{
+ u16 vid_range_start = 0, vid_range_end = 0;
+ u16 vid_range_flags = 0;
+ u16 pvid, vid, flags;
+ int num_vlans = 0;
+
+ if (filter_mask & RTEXT_FILTER_BRVLAN)
+ return pv->num_vlans;
+
+ if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
+ return 0;
+
+ /* Count number of vlan info's
+ */
+ pvid = br_get_pvid(pv);
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ flags = 0;
+ if (vid == pvid)
+ flags |= BRIDGE_VLAN_INFO_PVID;
+
+ if (test_bit(vid, pv->untagged_bitmap))
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (vid_range_start == 0) {
+ goto initvars;
+ } else if ((vid - vid_range_end) == 1 &&
+ flags == vid_range_flags) {
+ vid_range_end = vid;
+ continue;
+ } else {
+ if ((vid_range_end - vid_range_start) > 0)
+ num_vlans += 2;
+ else
+ num_vlans += 1;
+ }
+initvars:
+ vid_range_start = vid;
+ vid_range_end = vid;
+ vid_range_flags = flags;
+ }
+
+ if (vid_range_start != 0) {
+ if ((vid_range_end - vid_range_start) > 0)
+ num_vlans += 2;
+ else
+ num_vlans += 1;
+ }
+
+ return num_vlans;
+}
+
+static size_t br_get_link_af_size_filtered(const struct net_device *dev,
+ u32 filter_mask)
+{
+ struct net_port_vlans *pv;
+ int num_vlan_infos;
+
+ rcu_read_lock();
+ if (br_port_exists(dev))
+ pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+ else if (dev->priv_flags & IFF_EBRIDGE)
+ pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
+ else
+ pv = NULL;
+ if (pv)
+ num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask);
+ else
+ num_vlan_infos = 0;
+ rcu_read_unlock();
+
+ if (!num_vlan_infos)
+ return 0;
+
+ /* Each VLAN is returned in bridge_vlan_info along with flags */
+ return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
+}
+
static inline size_t br_port_info_size(void)
{
return nla_total_size(1) /* IFLA_BRPORT_STATE */
@@ -36,7 +115,7 @@ static inline size_t br_port_info_size(void)
+ 0;
}
-static inline size_t br_nlmsg_size(void)
+static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -45,7 +124,9 @@ static inline size_t br_nlmsg_size(void)
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
+ nla_total_size(1) /* IFLA_OPERSTATE */
- + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */
+ + nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */
+ + nla_total_size(br_get_link_af_size_filtered(dev,
+ filter_mask)); /* IFLA_AF_SPEC */
}
static int br_port_fill_attrs(struct sk_buff *skb,
@@ -62,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
- nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)))
+ nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
+ nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
+ !!(p->flags & BR_PROXYARP_WIFI)))
return -EMSGSIZE;
return 0;
@@ -280,6 +363,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
struct net *net;
struct sk_buff *skb;
int err = -ENOBUFS;
+ u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
if (!port)
return;
@@ -288,11 +372,11 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
br_debug(port->br, "port %u(%s) event %d\n",
(unsigned int)port->port_no, port->dev->name, event);
- skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
+ skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC);
if (skb == NULL)
goto errout;
- err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
+ err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev);
if (err < 0) {
/* -EMSGSIZE implies BUG in br_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -471,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
+ br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
if (tb[IFLA_BRPORT_COST]) {
err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 387cb3bd017c..20cbb727df4d 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -54,7 +54,6 @@ static unsigned int fake_mtu(const struct dst_entry *dst)
static struct dst_ops fake_dst_ops = {
.family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
.update_pmtu = fake_update_pmtu,
.redirect = fake_redirect,
.cow_metrics = fake_cow_metrics,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de0919975a25..f0a0438dbd6d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -305,6 +305,7 @@ struct br_input_skb_cb {
#endif
u16 frag_max_size;
+ bool proxyarp_replied;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool vlan_filtered;
@@ -764,10 +765,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
/* br_netfilter.c */
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb);
int br_nf_core_init(void);
void br_nf_core_fini(void);
void br_netfilter_rtable_init(struct net_bridge *);
#else
+static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+ return 0;
+}
static inline int br_nf_core_init(void) { return 0; }
static inline void br_nf_core_fini(void) {}
#define br_netfilter_rtable_init(x)
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 2de5d91199e8..4905845a94e9 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
BRPORT_ATTR_FLAG(learning, BR_LEARNING);
BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
+BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_multicast_fast_leave,
#endif
&brport_attr_proxyarp,
+ &brport_attr_proxyarp_wifi,
NULL
};
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 3244aead0926..5c6c96585acd 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -21,6 +21,7 @@
#include <net/ip.h>
#include <net/ip6_checksum.h>
#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
#include "../br_private.h"
static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
@@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
skb_pull(nskb, ETH_HLEN);
}
-static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook)
+/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
+ * or the bridge port (NF_BRIDGE PREROUTING).
+ */
+static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook)
{
struct sk_buff *nskb;
struct iphdr *niph;
@@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook)
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
-static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
- u8 code)
+static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code)
{
struct sk_buff *nskb;
struct iphdr *niph;
@@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
unsigned int len;
void *payload;
__wsum csum;
+ u8 proto;
- if (!nft_bridge_iphdr_validate(oldskb))
+ if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
return;
/* IP header checks: fragment. */
@@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
if (!pskb_may_pull(oldskb, len))
return;
- if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0))
+ if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len)))
+ return;
+
+ if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
+ ip_hdr(oldskb)->protocol == IPPROTO_UDP)
+ proto = ip_hdr(oldskb)->protocol;
+ else
+ proto = 0;
+
+ if (!skb_csum_unnecessary(oldskb) &&
+ nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return;
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) +
@@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
static void nft_reject_br_send_v6_tcp_reset(struct net *net,
- struct sk_buff *oldskb, int hook)
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook)
{
struct sk_buff *nskb;
const struct tcphdr *oth;
@@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
+}
+
+static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int thoff;
+ __be16 fo;
+ u8 proto = ip6h->nexthdr;
+
+ if (skb->csum_bad)
+ return false;
+
+ if (skb_csum_unnecessary(skb))
+ return true;
+
+ if (ip6h->payload_len &&
+ pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
+ return false;
+
+ thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+ if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+ return false;
+
+ return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
}
static void nft_reject_br_send_v6_unreach(struct net *net,
- struct sk_buff *oldskb, int hook,
- u8 code)
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code)
{
struct sk_buff *nskb;
struct ipv6hdr *nip6h;
@@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
if (!pskb_may_pull(oldskb, len))
return;
+ if (!reject6_br_csum_ok(oldskb, hook))
+ return;
+
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) +
LL_MAX_HEADER + len, GFP_ATOMIC);
if (!nskb)
@@ -205,7 +253,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
static void nft_reject_bridge_eval(const struct nft_expr *expr,
@@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
case htons(ETH_P_IP):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->skb,
+ nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
pkt->ops->hooknum,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v4_tcp_reset(pkt->skb,
+ nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->skb,
+ nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
pkt->ops->hooknum,
nft_reject_icmp_code(priv->icmp_code));
break;
@@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
case htons(ETH_P_IPV6):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb,
+ nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
pkt->ops->hooknum,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v6_tcp_reset(net, pkt->skb,
+ nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in,
pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb,
+ nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
pkt->ops->hooknum,
nft_reject_icmpv6_code(priv->icmp_code));
break;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 769b185fefbd..b6bf51bb187d 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk)
* Copied from unix_dgram_recvmsg, but removed credit checks,
* changed locking, address handling and added MSG_TRUNC.
*/
-static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t len, int flags)
+static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
+ size_t len, int flags)
{
struct sock *sk = sock->sk;
@@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
* Copied from unix_stream_recvmsg, but removed credit checks,
* changed locking calls, changed address handling.
*/
-static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
- int flags)
+static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
int copied = 0;
@@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
}
/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
-static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -586,8 +585,8 @@ err:
* Changed removed permission handling and added waiting for flow on
* and other minor adaptations.
*/
-static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ee9ffd956552..b523453585be 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/
- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
@@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
/*
* bcm_sendmsg - process BCM commands (opcodes) from the userspace
*/
-static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size)
+static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct bcm_sock *bo = bcm_sk(sk);
@@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
return 0;
}
-static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
diff --git a/net/can/raw.c b/net/can/raw.c
index 00c13ef23661..63ffdb0f3a23 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -95,8 +95,8 @@ struct raw_sock {
*/
static inline unsigned int *raw_flags(struct sk_buff *skb)
{
- BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
- sizeof(unsigned int)));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+ sizeof(unsigned int));
/* return pointer after struct sockaddr_can */
return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
@@ -135,7 +135,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
* containing the interface index.
*/
- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
@@ -658,8 +658,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size)
+static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
@@ -728,8 +727,8 @@ send_failed:
return err;
}
-static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
diff --git a/net/compat.c b/net/compat.c
index 94d3d5e97883..478443182bbe 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -508,25 +508,25 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
struct compat_group_req {
__u32 gr_interface;
struct __kernel_sockaddr_storage gr_group
- __attribute__ ((aligned(4)));
+ __aligned(4);
} __packed;
struct compat_group_source_req {
__u32 gsr_interface;
struct __kernel_sockaddr_storage gsr_group
- __attribute__ ((aligned(4)));
+ __aligned(4);
struct __kernel_sockaddr_storage gsr_source
- __attribute__ ((aligned(4)));
+ __aligned(4);
} __packed;
struct compat_group_filter {
__u32 gf_interface;
struct __kernel_sockaddr_storage gf_group
- __attribute__ ((aligned(4)));
+ __aligned(4);
__u32 gf_fmode;
__u32 gf_numsrc;
struct __kernel_sockaddr_storage gf_slist[1]
- __attribute__ ((aligned(4)));
+ __aligned(4);
} __packed;
#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aa378ecef186..1d00b8922902 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -790,7 +790,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ops->get_rxfh_indir_size)
dev_indir_size = ops->get_rxfh_indir_size(dev);
if (ops->get_rxfh_key_size)
- dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+ dev_key_size = ops->get_rxfh_key_size(dev);
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
diff --git a/net/core/filter.c b/net/core/filter.c
index f6bdc2b1ba01..7a4eb7030dba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -814,7 +814,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
static void __bpf_prog_release(struct bpf_prog *prog)
{
- if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+ if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
} else {
bpf_release_orig_filter(prog);
@@ -1019,6 +1019,32 @@ void bpf_prog_destroy(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+{
+ struct sk_filter *fp, *old_fp;
+
+ fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ if (!fp)
+ return -ENOMEM;
+
+ fp->prog = prog;
+ atomic_set(&fp->refcnt, 0);
+
+ if (!sk_filter_charge(sk, fp)) {
+ kfree(fp);
+ return -ENOMEM;
+ }
+
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
+ rcu_assign_pointer(sk->sk_filter, fp);
+
+ if (old_fp)
+ sk_filter_uncharge(sk, old_fp);
+
+ return 0;
+}
+
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
@@ -1031,7 +1057,6 @@ EXPORT_SYMBOL_GPL(bpf_prog_destroy);
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct sk_filter *fp, *old_fp;
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
struct bpf_prog *prog;
@@ -1068,36 +1093,20 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- fp = kmalloc(sizeof(*fp), GFP_KERNEL);
- if (!fp) {
+ err = __sk_attach_prog(prog, sk);
+ if (err < 0) {
__bpf_prog_release(prog);
- return -ENOMEM;
- }
- fp->prog = prog;
-
- atomic_set(&fp->refcnt, 0);
-
- if (!sk_filter_charge(sk, fp)) {
- __sk_filter_release(fp);
- return -ENOMEM;
+ return err;
}
- old_fp = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
- rcu_assign_pointer(sk->sk_filter, fp);
-
- if (old_fp)
- sk_filter_uncharge(sk, old_fp);
-
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
-#ifdef CONFIG_BPF_SYSCALL
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
- struct sk_filter *fp, *old_fp;
struct bpf_prog *prog;
+ int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1106,40 +1115,22 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
- /* valid fd, but invalid program type */
+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
return -EINVAL;
}
- fp = kmalloc(sizeof(*fp), GFP_KERNEL);
- if (!fp) {
+ err = __sk_attach_prog(prog, sk);
+ if (err < 0) {
bpf_prog_put(prog);
- return -ENOMEM;
- }
- fp->prog = prog;
-
- atomic_set(&fp->refcnt, 0);
-
- if (!sk_filter_charge(sk, fp)) {
- __sk_filter_release(fp);
- return -ENOMEM;
+ return err;
}
- old_fp = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
- rcu_assign_pointer(sk->sk_filter, fp);
-
- if (old_fp)
- sk_filter_uncharge(sk, old_fp);
-
return 0;
}
-/* allow socket filters to call
- * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
- */
-static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -1153,34 +1144,37 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func
}
}
-static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool sk_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
{
/* skb fields cannot be accessed yet */
return false;
}
-static struct bpf_verifier_ops sock_filter_ops = {
- .get_func_proto = sock_filter_func_proto,
- .is_valid_access = sock_filter_is_valid_access,
+static const struct bpf_verifier_ops sk_filter_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
};
-static struct bpf_prog_type_list tl = {
- .ops = &sock_filter_ops,
+static struct bpf_prog_type_list sk_filter_type __read_mostly = {
+ .ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};
-static int __init register_sock_filter_ops(void)
+static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+ .ops = &sk_filter_ops,
+ .type = BPF_PROG_TYPE_SCHED_CLS,
+};
+
+static int __init register_sk_filter_ops(void)
{
- bpf_register_prog_type(&tl);
+ bpf_register_prog_type(&sk_filter_type);
+ bpf_register_prog_type(&sched_cls_type);
+
return 0;
}
-late_initcall(register_sock_filter_ops);
-#else
-int sk_attach_bpf(u32 ufd, struct sock *sk)
-{
- return -EOPNOTSUPP;
-}
-#endif
+late_initcall(register_sk_filter_ops);
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 70fe9e10ac86..ad07990e943d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
struct neighbour *n;
- int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- if (!atomic_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
+ n = __neigh_lookup_noref(tbl, pkey, dev);
+ if (n) {
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
+ NEIGH_CACHE_STAT_INC(tbl, hits);
}
rcu_read_unlock_bh();
@@ -1263,10 +1253,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
EXPORT_SYMBOL(neigh_event_ns);
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
+static void neigh_hh_init(struct neighbour *n)
{
- struct net_device *dev = dst->dev;
- __be16 prot = dst->ops->protocol;
+ struct net_device *dev = n->dev;
+ __be16 prot = n->tbl->protocol;
struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
@@ -1280,43 +1270,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
write_unlock_bh(&n->lock);
}
-/* This function can be used in contexts, where only old dev_queue_xmit
- * worked, f.e. if you want to override normal output path (eql, shaper),
- * but resolution is not made yet.
- */
-
-int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
-
- __skb_pull(skb, skb_network_offset(skb));
-
- if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
- skb->len) < 0 &&
- dev_rebuild_header(skb))
- return 0;
-
- return dev_queue_xmit(skb);
-}
-EXPORT_SYMBOL(neigh_compat_output);
-
/* Slow and careful. */
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
int rc = 0;
- if (!dst)
- goto discard;
-
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
if (dev->header_ops->cache && !neigh->hh.hh_len)
- neigh_hh_init(neigh, dst);
+ neigh_hh_init(neigh);
do {
__skb_pull(skb, skb_network_offset(skb));
@@ -1332,8 +1298,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
}
out:
return rc;
-discard:
- neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -2427,6 +2391,40 @@ void __neigh_for_each_release(struct neigh_table *tbl,
}
EXPORT_SYMBOL(__neigh_for_each_release);
+int neigh_xmit(int index, struct net_device *dev,
+ const void *addr, struct sk_buff *skb)
+{
+ int err = -EAFNOSUPPORT;
+ if (likely(index < NEIGH_NR_TABLES)) {
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
+
+ tbl = neigh_tables[index];
+ if (!tbl)
+ goto out;
+ neigh = __neigh_lookup_noref(tbl, addr, dev);
+ if (!neigh)
+ neigh = __neigh_create(tbl, addr, dev, false);
+ err = PTR_ERR(neigh);
+ if (IS_ERR(neigh))
+ goto out_kfree_skb;
+ err = neigh->output(neigh, skb);
+ }
+ else if (index == NEIGH_LINK_TABLE) {
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ addr, NULL, skb->len);
+ if (err < 0)
+ goto out_kfree_skb;
+ err = dev_queue_xmit(skb);
+ }
+out:
+ return err;
+out_kfree_skb:
+ kfree_skb(skb);
+ goto out;
+}
+EXPORT_SYMBOL(neigh_xmit);
+
#ifdef CONFIG_PROC_FS
static struct neighbour *neigh_get_first(struct seq_file *seq)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f80507823531..47c32413d5b9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
* @from: search offset
* @to: search limit
* @config: textsearch configuration
- * @state: uninitialized textsearch state variable
*
* Finds a pattern in the skb data according to the specified
* textsearch configuration. Use textsearch_next() to retrieve
@@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
* to the first occurrence or UINT_MAX if no match was found.
*/
unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
- unsigned int to, struct ts_config *config,
- struct ts_state *state)
+ unsigned int to, struct ts_config *config)
{
+ struct ts_state state;
unsigned int ret;
config->get_next_block = skb_ts_get_next_block;
config->finish = skb_ts_finish;
- skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
- ret = textsearch_find(config, state);
+ ret = textsearch_find(config, &state);
return (ret <= to - from ? ret : UINT_MAX);
}
EXPORT_SYMBOL(skb_find_text);
@@ -3207,10 +3206,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
unsigned int offset = skb_gro_offset(skb);
unsigned int headlen = skb_headlen(skb);
- struct sk_buff *nskb, *lp, *p = *head;
unsigned int len = skb_gro_len(skb);
+ struct sk_buff *lp, *p = *head;
unsigned int delta_truesize;
- unsigned int headroom;
if (unlikely(p->len + len >= 65536))
return -E2BIG;
@@ -3277,48 +3275,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
- /* switch back to head shinfo */
- pinfo = skb_shinfo(p);
-
- if (pinfo->frag_list)
- goto merge;
- if (skb_gro_len(p) != pinfo->gso_size)
- return -E2BIG;
-
- headroom = skb_headroom(p);
- nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
- if (unlikely(!nskb))
- return -ENOMEM;
-
- __copy_skb_header(nskb, p);
- nskb->mac_len = p->mac_len;
-
- skb_reserve(nskb, headroom);
- __skb_put(nskb, skb_gro_offset(p));
-
- skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
- skb_set_network_header(nskb, skb_network_offset(p));
- skb_set_transport_header(nskb, skb_transport_offset(p));
-
- __skb_pull(p, skb_gro_offset(p));
- memcpy(skb_mac_header(nskb), skb_mac_header(p),
- p->data - skb_mac_header(p));
-
- skb_shinfo(nskb)->frag_list = p;
- skb_shinfo(nskb)->gso_size = pinfo->gso_size;
- pinfo->gso_size = 0;
- __skb_header_release(p);
- NAPI_GRO_CB(nskb)->last = p;
-
- nskb->data_len += p->len;
- nskb->truesize += p->truesize;
- nskb->len += p->len;
-
- *head = nskb;
- nskb->next = p->next;
- p->next = NULL;
-
- p = nskb;
merge:
delta_truesize = skb->truesize;
diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20c91e4..726e1f99aa8d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb_dst_force(skb);
spin_lock_irqsave(&list->lock, flags);
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);
@@ -2163,15 +2163,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_no_getsockopt);
-int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t len)
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_sendmsg);
-int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t len, int flags)
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
+ int flags)
{
return -EOPNOTSUPP;
}
@@ -2543,14 +2542,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
EXPORT_SYMBOL(compat_sock_common_getsockopt);
#endif
-int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
int addr_len = 0;
int err;
- err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93ea80196f0e..5b21f6f88e97 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
[DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)},
[DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED},
[DCB_ATTR_IEEE_MAXRATE] = {.len = sizeof(struct ieee_maxrate)},
+ [DCB_ATTR_IEEE_QCN] = {.len = sizeof(struct ieee_qcn)},
+ [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)},
};
static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
@@ -1030,7 +1032,7 @@ nla_put_failure:
return err;
}
-/* Handle IEEE 802.1Qaz GET commands. */
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */
static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
{
struct nlattr *ieee, *app;
@@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
}
}
+ if (ops->ieee_getqcn) {
+ struct ieee_qcn qcn;
+
+ memset(&qcn, 0, sizeof(qcn));
+ err = ops->ieee_getqcn(netdev, &qcn);
+ if (!err) {
+ err = nla_put(skb, DCB_ATTR_IEEE_QCN,
+ sizeof(qcn), &qcn);
+ if (err)
+ return -EMSGSIZE;
+ }
+ }
+
+ if (ops->ieee_getqcnstats) {
+ struct ieee_qcn_stats qcn_stats;
+
+ memset(&qcn_stats, 0, sizeof(qcn_stats));
+ err = ops->ieee_getqcnstats(netdev, &qcn_stats);
+ if (!err) {
+ err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS,
+ sizeof(qcn_stats), &qcn_stats);
+ if (err)
+ return -EMSGSIZE;
+ }
+ }
+
if (ops->ieee_getpfc) {
struct ieee_pfc pfc;
memset(&pfc, 0, sizeof(pfc));
@@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
}
EXPORT_SYMBOL(dcbnl_cee_notify);
-/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
- * be completed the entire msg is aborted and error value is returned.
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands.
+ * If any requested operation can not be completed
+ * the entire msg is aborted and error value is returned.
* No attempt is made to reconcile the case where only part of the
* cmd can be completed.
*/
@@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
goto err;
}
+ if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) {
+ struct ieee_qcn *qcn =
+ nla_data(ieee[DCB_ATTR_IEEE_QCN]);
+
+ err = ops->ieee_setqcn(netdev, qcn);
+ if (err)
+ goto err;
+ }
+
if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
err = ops->ieee_setpfc(netdev, pfc);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e4c144fa706f..3b1d64d6e093 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -310,11 +310,9 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
#endif
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t size);
-int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int nonblock, int flags,
- int *addr_len);
+int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+ int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
unsigned int dccp_poll(struct file *file, struct socket *sock,
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 595ddf0459db..d8346d0eadeb 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -72,8 +72,7 @@ static void printl(const char *fmt, ...)
wake_up(&dccpw.wait);
}
-static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
const struct inet_sock *inet = inet_sk(sk);
struct ccid3_hc_tx_sock *hc = NULL;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e171b780b499..52a94016526d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
return 0;
}
-int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
+int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
const struct dccp_sock *dp = dccp_sk(sk);
const int flags = msg->msg_flags;
@@ -806,8 +805,8 @@ out_discard:
EXPORT_SYMBOL_GPL(dccp_sendmsg);
-int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags, int *addr_len)
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+ int flags, int *addr_len)
{
const struct dccp_hdr *dh;
long timeo;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 810228646de3..754484b3cd0e 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int
}
-static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
@@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
return skb;
}
-static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size)
+static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7ca7c3143da3..be1f08cdad29 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -49,41 +49,17 @@
#include <net/dn_route.h>
static int dn_neigh_construct(struct neighbour *);
-static void dn_long_error_report(struct neighbour *, struct sk_buff *);
-static void dn_short_error_report(struct neighbour *, struct sk_buff *);
-static int dn_long_output(struct neighbour *, struct sk_buff *);
-static int dn_short_output(struct neighbour *, struct sk_buff *);
-static int dn_phase3_output(struct neighbour *, struct sk_buff *);
-
-
-/*
- * For talking to broadcast devices: Ethernet & PPP
- */
-static const struct neigh_ops dn_long_ops = {
- .family = AF_DECnet,
- .error_report = dn_long_error_report,
- .output = dn_long_output,
- .connected_output = dn_long_output,
-};
+static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
+static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
/*
- * For talking to pointopoint and multidrop devices: DDCMP and X.25
+ * Operations for adding the link layer header.
*/
-static const struct neigh_ops dn_short_ops = {
+static const struct neigh_ops dn_neigh_ops = {
.family = AF_DECnet,
- .error_report = dn_short_error_report,
- .output = dn_short_output,
- .connected_output = dn_short_output,
-};
-
-/*
- * For talking to DECnet phase III nodes
- */
-static const struct neigh_ops dn_phase3_ops = {
- .family = AF_DECnet,
- .error_report = dn_short_error_report, /* Can use short version here */
- .output = dn_phase3_output,
- .connected_output = dn_phase3_output,
+ .error_report = dn_neigh_error_report,
+ .output = dn_neigh_output,
+ .connected_output = dn_neigh_output,
};
static u32 dn_neigh_hash(const void *pkey,
@@ -93,11 +69,18 @@ static u32 dn_neigh_hash(const void *pkey,
return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
}
+static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
+{
+ return neigh_key_eq16(neigh, pkey);
+}
+
struct neigh_table dn_neigh_table = {
.family = PF_DECnet,
.entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
.key_len = sizeof(__le16),
+ .protocol = cpu_to_be16(ETH_P_DNA_RT),
.hash = dn_neigh_hash,
+ .key_eq = dn_key_eq,
.constructor = dn_neigh_construct,
.id = "dn_neigh_cache",
.parms ={
@@ -146,16 +129,9 @@ static int dn_neigh_construct(struct neighbour *neigh)
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
-
- if (dn_db->use_long)
- neigh->ops = &dn_long_ops;
- else
- neigh->ops = &dn_short_ops;
rcu_read_unlock();
- if (dn->flags & DN_NDFLAG_P3)
- neigh->ops = &dn_phase3_ops;
-
+ neigh->ops = &dn_neigh_ops;
neigh->nud_state = NUD_NOARP;
neigh->output = neigh->ops->connected_output;
@@ -187,24 +163,16 @@ static int dn_neigh_construct(struct neighbour *neigh)
return 0;
}
-static void dn_long_error_report(struct neighbour *neigh, struct sk_buff *skb)
+static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
- printk(KERN_DEBUG "dn_long_error_report: called\n");
+ printk(KERN_DEBUG "dn_neigh_error_report: called\n");
kfree_skb(skb);
}
-
-static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb)
-{
- printk(KERN_DEBUG "dn_short_error_report: called\n");
- kfree_skb(skb);
-}
-
-static int dn_neigh_output_packet(struct sk_buff *skb)
+static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = rt->n;
struct net_device *dev = neigh->dev;
char mac_addr[ETH_ALEN];
unsigned int seq;
@@ -226,6 +194,18 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
return err;
}
+static int dn_neigh_output_packet(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct dn_route *rt = (struct dn_route *)dst;
+ struct neighbour *neigh = rt->n;
+
+ return neigh->output(neigh, skb);
+}
+
+/*
+ * For talking to broadcast devices: Ethernet & PPP
+ */
static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
@@ -269,6 +249,9 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
neigh->dev, dn_neigh_output_packet);
}
+/*
+ * For talking to pointopoint and multidrop devices: DDCMP and X.25
+ */
static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
@@ -306,7 +289,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
}
/*
- * Phase 3 output is the same is short output, execpt that
+ * For talking to DECnet phase III nodes
+ * Phase 3 output is the same as short output, execpt that
* it clears the area bits before transmission.
*/
static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
@@ -344,6 +328,32 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
neigh->dev, dn_neigh_output_packet);
}
+int dn_to_neigh_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *neigh = rt->n;
+ struct dn_neigh *dn = (struct dn_neigh *)neigh;
+ struct dn_dev *dn_db;
+ bool use_long;
+
+ rcu_read_lock();
+ dn_db = rcu_dereference(neigh->dev->dn_ptr);
+ if (dn_db == NULL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ use_long = dn_db->use_long;
+ rcu_read_unlock();
+
+ if (dn->flags & DN_NDFLAG_P3)
+ return dn_phase3_output(neigh, skb);
+ if (use_long)
+ return dn_long_output(neigh, skb);
+ else
+ return dn_short_output(neigh, skb);
+}
+
/*
* Unfortunately, the neighbour code uses the device in its hash
* function, so we don't get any advantage from it. This function
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 3b81092771f8..9ab0c4ba297f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -136,7 +136,6 @@ int decnet_dst_gc_interval = 2;
static struct dst_ops dn_dst_ops = {
.family = PF_DECnet,
- .protocol = cpu_to_be16(ETH_P_DNA_RT),
.gc_thresh = 128,
.gc = dn_dst_gc,
.check = dn_dst_check,
@@ -743,15 +742,6 @@ out:
return NET_RX_DROP;
}
-static int dn_to_neigh_output(struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
-
- return n->output(n, skb);
-}
-
static int dn_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 5f8ac404535b..b45206e8dd3e 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -8,6 +8,7 @@ config NET_DSA
tristate
depends on HAVE_NET_DSA
select PHYLIB
+ select NET_SWITCHDEV
if NET_DSA
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2173402d87e0..b40f11bb419c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -175,43 +175,14 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
#endif /* CONFIG_NET_DSA_HWMON */
/* basic switch operations **************************************************/
-static struct dsa_switch *
-dsa_switch_setup(struct dsa_switch_tree *dst, int index,
- struct device *parent, struct device *host_dev)
+static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
{
- struct dsa_chip_data *pd = dst->pd->chip + index;
- struct dsa_switch_driver *drv;
- struct dsa_switch *ds;
- int ret;
- char *name;
- int i;
+ struct dsa_switch_driver *drv = ds->drv;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_chip_data *pd = ds->pd;
bool valid_name_found = false;
-
- /*
- * Probe for switch model.
- */
- drv = dsa_switch_probe(host_dev, pd->sw_addr, &name);
- if (drv == NULL) {
- netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
- index);
- return ERR_PTR(-EINVAL);
- }
- netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
- index, name);
-
-
- /*
- * Allocate and initialise switch state.
- */
- ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL);
- if (ds == NULL)
- return ERR_PTR(-ENOMEM);
-
- ds->dst = dst;
- ds->index = index;
- ds->pd = dst->pd->chip + index;
- ds->drv = drv;
- ds->master_dev = host_dev;
+ int index = ds->index;
+ int i, ret;
/*
* Validate supplied switch configuration.
@@ -256,7 +227,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
* switch.
*/
if (dst->cpu_switch == index) {
- switch (drv->tag_protocol) {
+ switch (ds->tag_protocol) {
#ifdef CONFIG_NET_DSA_TAG_DSA
case DSA_TAG_PROTO_DSA:
dst->rcv = dsa_netdev_ops.rcv;
@@ -284,7 +255,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
goto out;
}
- dst->tag_protocol = drv->tag_protocol;
+ dst->tag_protocol = ds->tag_protocol;
}
/*
@@ -314,19 +285,15 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
* Create network devices for physical switch ports.
*/
for (i = 0; i < DSA_MAX_PORTS; i++) {
- struct net_device *slave_dev;
-
if (!(ds->phys_port_mask & (1 << i)))
continue;
- slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]);
- if (slave_dev == NULL) {
+ ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
+ if (ret < 0) {
netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n",
index, i, pd->port_names[i]);
- continue;
+ ret = 0;
}
-
- ds->ports[i] = slave_dev;
}
#ifdef CONFIG_NET_DSA_HWMON
@@ -354,13 +321,57 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
}
#endif /* CONFIG_NET_DSA_HWMON */
- return ds;
+ return ret;
out_free:
mdiobus_free(ds->slave_mii_bus);
out:
kfree(ds);
- return ERR_PTR(ret);
+ return ret;
+}
+
+static struct dsa_switch *
+dsa_switch_setup(struct dsa_switch_tree *dst, int index,
+ struct device *parent, struct device *host_dev)
+{
+ struct dsa_chip_data *pd = dst->pd->chip + index;
+ struct dsa_switch_driver *drv;
+ struct dsa_switch *ds;
+ int ret;
+ char *name;
+
+ /*
+ * Probe for switch model.
+ */
+ drv = dsa_switch_probe(host_dev, pd->sw_addr, &name);
+ if (drv == NULL) {
+ netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+ index);
+ return ERR_PTR(-EINVAL);
+ }
+ netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+ index, name);
+
+
+ /*
+ * Allocate and initialise switch state.
+ */
+ ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL);
+ if (ds == NULL)
+ return NULL;
+
+ ds->dst = dst;
+ ds->index = index;
+ ds->pd = pd;
+ ds->drv = drv;
+ ds->tag_protocol = drv->tag_protocol;
+ ds->master_dev = host_dev;
+
+ ret = dsa_switch_setup_one(ds, parent);
+ if (ret)
+ return NULL;
+
+ return ds;
}
static void dsa_switch_destroy(struct dsa_switch *ds)
@@ -378,7 +389,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
/* Suspend slave network devices */
for (i = 0; i < DSA_MAX_PORTS; i++) {
- if (!(ds->phys_port_mask & (1 << i)))
+ if (!dsa_is_port_initialized(ds, i))
continue;
ret = dsa_slave_suspend(ds->ports[i]);
@@ -404,7 +415,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
/* Resume slave network devices */
for (i = 0; i < DSA_MAX_PORTS; i++) {
- if (!(ds->phys_port_mask & (1 << i)))
+ if (!dsa_is_port_initialized(ds, i))
continue;
ret = dsa_slave_resume(ds->ports[i]);
@@ -567,9 +578,9 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
kfree(pd->chip);
}
-static int dsa_of_probe(struct platform_device *pdev)
+static int dsa_of_probe(struct device *dev)
{
- struct device_node *np = pdev->dev.of_node;
+ struct device_node *np = dev->of_node;
struct device_node *child, *mdio, *ethernet, *port, *link;
struct mii_bus *mdio_bus;
struct platform_device *ethernet_dev;
@@ -587,7 +598,7 @@ static int dsa_of_probe(struct platform_device *pdev)
mdio_bus = of_mdio_find_bus(mdio);
if (!mdio_bus)
- return -EINVAL;
+ return -EPROBE_DEFER;
ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
if (!ethernet)
@@ -595,13 +606,13 @@ static int dsa_of_probe(struct platform_device *pdev)
ethernet_dev = of_find_device_by_node(ethernet);
if (!ethernet_dev)
- return -ENODEV;
+ return -EPROBE_DEFER;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return -ENOMEM;
- pdev->dev.platform_data = pd;
+ dev->platform_data = pd;
pd->netdev = &ethernet_dev->dev;
pd->nr_chips = of_get_available_child_count(np);
if (pd->nr_chips > DSA_MAX_SWITCHES)
@@ -674,43 +685,86 @@ out_free_chip:
dsa_of_free_platform_data(pd);
out_free:
kfree(pd);
- pdev->dev.platform_data = NULL;
+ dev->platform_data = NULL;
return ret;
}
-static void dsa_of_remove(struct platform_device *pdev)
+static void dsa_of_remove(struct device *dev)
{
- struct dsa_platform_data *pd = pdev->dev.platform_data;
+ struct dsa_platform_data *pd = dev->platform_data;
- if (!pdev->dev.of_node)
+ if (!dev->of_node)
return;
dsa_of_free_platform_data(pd);
kfree(pd);
}
#else
-static inline int dsa_of_probe(struct platform_device *pdev)
+static inline int dsa_of_probe(struct device *dev)
{
return 0;
}
-static inline void dsa_of_remove(struct platform_device *pdev)
+static inline void dsa_of_remove(struct device *dev)
{
}
#endif
+static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
+ struct device *parent, struct dsa_platform_data *pd)
+{
+ int i;
+
+ dst->pd = pd;
+ dst->master_netdev = dev;
+ dst->cpu_switch = -1;
+ dst->cpu_port = -1;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ struct dsa_switch *ds;
+
+ ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev);
+ if (IS_ERR(ds)) {
+ netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
+ i, PTR_ERR(ds));
+ continue;
+ }
+
+ dst->ds[i] = ds;
+ if (ds->drv->poll_link != NULL)
+ dst->link_poll_needed = 1;
+ }
+
+ /*
+ * If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+ dev->dsa_ptr = (void *)dst;
+
+ if (dst->link_poll_needed) {
+ INIT_WORK(&dst->link_poll_work, dsa_link_poll_work);
+ init_timer(&dst->link_poll_timer);
+ dst->link_poll_timer.data = (unsigned long)dst;
+ dst->link_poll_timer.function = dsa_link_poll_timer;
+ dst->link_poll_timer.expires = round_jiffies(jiffies + HZ);
+ add_timer(&dst->link_poll_timer);
+ }
+}
+
static int dsa_probe(struct platform_device *pdev)
{
struct dsa_platform_data *pd = pdev->dev.platform_data;
struct net_device *dev;
struct dsa_switch_tree *dst;
- int i, ret;
+ int ret;
pr_notice_once("Distributed Switch Architecture driver version %s\n",
dsa_driver_version);
if (pdev->dev.of_node) {
- ret = dsa_of_probe(pdev);
+ ret = dsa_of_probe(&pdev->dev);
if (ret)
return ret;
@@ -722,7 +776,7 @@ static int dsa_probe(struct platform_device *pdev)
dev = dev_to_net_device(pd->netdev);
if (dev == NULL) {
- ret = -EINVAL;
+ ret = -EPROBE_DEFER;
goto out;
}
@@ -741,54 +795,18 @@ static int dsa_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, dst);
- dst->pd = pd;
- dst->master_netdev = dev;
- dst->cpu_switch = -1;
- dst->cpu_port = -1;
-
- for (i = 0; i < pd->nr_chips; i++) {
- struct dsa_switch *ds;
-
- ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev);
- if (IS_ERR(ds)) {
- netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
- i, PTR_ERR(ds));
- continue;
- }
-
- dst->ds[i] = ds;
- if (ds->drv->poll_link != NULL)
- dst->link_poll_needed = 1;
- }
-
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = (void *)dst;
-
- if (dst->link_poll_needed) {
- INIT_WORK(&dst->link_poll_work, dsa_link_poll_work);
- init_timer(&dst->link_poll_timer);
- dst->link_poll_timer.data = (unsigned long)dst;
- dst->link_poll_timer.function = dsa_link_poll_timer;
- dst->link_poll_timer.expires = round_jiffies(jiffies + HZ);
- add_timer(&dst->link_poll_timer);
- }
+ dsa_setup_dst(dst, dev, &pdev->dev, pd);
return 0;
out:
- dsa_of_remove(pdev);
+ dsa_of_remove(&pdev->dev);
return ret;
}
-static int dsa_remove(struct platform_device *pdev)
+static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
int i;
if (dst->link_poll_needed)
@@ -802,8 +820,14 @@ static int dsa_remove(struct platform_device *pdev)
if (ds != NULL)
dsa_switch_destroy(ds);
}
+}
+
+static int dsa_remove(struct platform_device *pdev)
+{
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
- dsa_of_remove(pdev);
+ dsa_remove_dst(dst);
+ dsa_of_remove(&pdev->dev);
return 0;
}
@@ -830,6 +854,10 @@ static struct packet_type dsa_pack_type __read_mostly = {
.func = dsa_switch_rcv,
};
+static struct notifier_block dsa_netdevice_nb __read_mostly = {
+ .notifier_call = dsa_slave_netdevice_event,
+};
+
#ifdef CONFIG_PM_SLEEP
static int dsa_suspend(struct device *d)
{
@@ -888,6 +916,8 @@ static int __init dsa_init_module(void)
{
int rc;
+ register_netdevice_notifier(&dsa_netdevice_nb);
+
rc = platform_driver_register(&dsa_driver);
if (rc)
return rc;
@@ -900,6 +930,7 @@ module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
+ unregister_netdevice_notifier(&dsa_netdevice_nb);
dev_remove_pack(&dsa_pack_type);
platform_driver_unregister(&dsa_driver);
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dc9756d3154c..d5f1f9b862ea 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -45,6 +45,8 @@ struct dsa_slave_priv {
int old_link;
int old_pause;
int old_duplex;
+
+ struct net_device *bridge_dev;
};
/* dsa.c */
@@ -53,11 +55,12 @@ extern char dsa_driver_version[];
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-struct net_device *dsa_slave_create(struct dsa_switch *ds,
- struct device *parent,
- int port, char *name);
+int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
+ int port, char *name);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f23deadf42a0..a47305c72fcc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -10,10 +10,13 @@
#include <linux/list.h>
#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
#include <linux/phy.h>
#include <linux/phy_fixed.h>
#include <linux/of_net.h>
#include <linux/of_mdio.h>
+#include <net/rtnetlink.h>
+#include <linux/if_bridge.h>
#include "dsa_priv.h"
/* slave mii_bus handling ***************************************************/
@@ -60,11 +63,18 @@ static int dsa_slave_init(struct net_device *dev)
return 0;
}
+static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
+{
+ return !!p->bridge_dev;
+}
+
static int dsa_slave_open(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct net_device *master = p->parent->dst->master_netdev;
struct dsa_switch *ds = p->parent;
+ u8 stp_state = dsa_port_is_bridged(p) ?
+ BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
if (!(master->flags & IFF_UP))
@@ -93,6 +103,9 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_promisc;
}
+ if (ds->drv->port_stp_update)
+ ds->drv->port_stp_update(ds, p->port, stp_state);
+
if (p->phy)
phy_start(p->phy);
@@ -133,6 +146,9 @@ static int dsa_slave_close(struct net_device *dev)
if (ds->drv->port_disable)
ds->drv->port_disable(ds, p->port, p->phy);
+ if (ds->drv->port_stp_update)
+ ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED);
+
return 0;
}
@@ -194,6 +210,92 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
+/* Return a bitmask of all ports being currently bridged within a given bridge
+ * device. Note that on leave, the mask will still return the bitmask of ports
+ * currently bridged, prior to port removal, and this is exactly what we want.
+ */
+static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
+ struct net_device *bridge)
+{
+ struct dsa_slave_priv *p;
+ unsigned int port;
+ u32 mask = 0;
+
+ for (port = 0; port < DSA_MAX_PORTS; port++) {
+ if (!dsa_is_port_initialized(ds, port))
+ continue;
+
+ p = netdev_priv(ds->ports[port]);
+
+ if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT &&
+ p->bridge_dev == bridge)
+ mask |= 1 << port;
+ }
+
+ return mask;
+}
+
+static int dsa_slave_stp_update(struct net_device *dev, u8 state)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (ds->drv->port_stp_update)
+ ret = ds->drv->port_stp_update(ds, p->port, state);
+
+ return ret;
+}
+
+static int dsa_slave_bridge_port_join(struct net_device *dev,
+ struct net_device *br)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ p->bridge_dev = br;
+
+ if (ds->drv->port_join_bridge)
+ ret = ds->drv->port_join_bridge(ds, p->port,
+ dsa_slave_br_port_mask(ds, br));
+
+ return ret;
+}
+
+static int dsa_slave_bridge_port_leave(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+
+ if (ds->drv->port_leave_bridge)
+ ret = ds->drv->port_leave_bridge(ds, p->port,
+ dsa_slave_br_port_mask(ds, p->bridge_dev));
+
+ p->bridge_dev = NULL;
+
+ /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+ * so allow it to be in BR_STATE_FORWARDING to be kept functional
+ */
+ dsa_slave_stp_update(dev, BR_STATE_FORWARDING);
+
+ return ret;
+}
+
+static int dsa_slave_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ psid->id_len = sizeof(ds->index);
+ memcpy(&psid->id, &ds->index, psid->id_len);
+
+ return 0;
+}
+
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -470,6 +572,8 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
+ .ndo_switch_parent_id_get = dsa_slave_parent_id_get,
+ .ndo_switch_port_stp_update = dsa_slave_stp_update,
};
static void dsa_slave_adjust_link(struct net_device *dev)
@@ -605,9 +709,8 @@ int dsa_slave_resume(struct net_device *slave_dev)
return 0;
}
-struct net_device *
-dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, char *name)
+int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
+ int port, char *name)
{
struct net_device *master = ds->dst->master_netdev;
struct net_device *slave_dev;
@@ -617,7 +720,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
NET_NAME_UNKNOWN, ether_setup);
if (slave_dev == NULL)
- return slave_dev;
+ return -ENOMEM;
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
@@ -667,19 +770,63 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
ret = dsa_slave_phy_setup(p, slave_dev);
if (ret) {
free_netdev(slave_dev);
- return NULL;
+ return ret;
}
+ ds->ports[port] = slave_dev;
ret = register_netdev(slave_dev);
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
ret, slave_dev->name);
phy_disconnect(p->phy);
+ ds->ports[port] = NULL;
free_netdev(slave_dev);
- return NULL;
+ return ret;
}
netif_carrier_off(slave_dev);
- return slave_dev;
+ return 0;
+}
+
+static bool dsa_slave_dev_check(struct net_device *dev)
+{
+ return dev->netdev_ops == &dsa_slave_netdev_ops;
+}
+
+static int dsa_slave_master_changed(struct net_device *dev)
+{
+ struct net_device *master = netdev_master_upper_dev_get(dev);
+ int err = 0;
+
+ if (master && master->rtnl_link_ops &&
+ !strcmp(master->rtnl_link_ops->kind, "bridge"))
+ err = dsa_slave_bridge_port_join(dev, master);
+ else
+ err = dsa_slave_bridge_port_leave(dev);
+
+ return err;
+}
+
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev;
+ int err = 0;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ dev = netdev_notifier_info_to_dev(ptr);
+ if (!dsa_slave_dev_check(dev))
+ goto out;
+
+ err = dsa_slave_master_changed(dev);
+ if (err)
+ netdev_warn(dev, "failed to reflect master change\n");
+
+ break;
+ }
+
+out:
+ return NOTIFY_DONE;
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 238f38d21641..f3bad41d725f 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -104,7 +104,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
*/
if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
- memset(eth->h_dest, 0, ETH_ALEN);
+ eth_zero_addr(eth->h_dest);
return ETH_HLEN;
}
@@ -113,39 +113,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
EXPORT_SYMBOL(eth_header);
/**
- * eth_rebuild_header- rebuild the Ethernet MAC header.
- * @skb: socket buffer to update
- *
- * This is called after an ARP or IPV6 ndisc it's resolution on this
- * sk_buff. We now let protocol (ARP) fill in the other fields.
- *
- * This routine CANNOT use cached dst->neigh!
- * Really, it is used only when dst->neigh is wrong.
- */
-int eth_rebuild_header(struct sk_buff *skb)
-{
- struct ethhdr *eth = (struct ethhdr *)skb->data;
- struct net_device *dev = skb->dev;
-
- switch (eth->h_proto) {
-#ifdef CONFIG_INET
- case htons(ETH_P_IP):
- return arp_find(eth->h_dest, skb);
-#endif
- default:
- netdev_dbg(dev,
- "%s: unable to resolve type %X addresses.\n",
- dev->name, ntohs(eth->h_proto));
-
- memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
- break;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(eth_rebuild_header);
-
-/**
* eth_get_headlen - determine the the length of header for an ethernet frame
* @data: pointer to start of frame
* @len: total length of frame
@@ -369,7 +336,6 @@ EXPORT_SYMBOL(eth_validate_addr);
const struct header_ops eth_header_ops ____cacheline_aligned = {
.create = eth_header,
.parse = eth_header_parse,
- .rebuild = eth_rebuild_header,
.cache = eth_header_cache,
.cache_update = eth_header_cache_update,
};
@@ -391,7 +357,7 @@ void ether_setup(struct net_device *dev)
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
dev->priv_flags |= IFF_TX_SKB_SHARING;
- memset(dev->broadcast, 0xFF, ETH_ALEN);
+ eth_broadcast_addr(dev->broadcast);
}
EXPORT_SYMBOL(ether_setup);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 055fbb71ba6f..dfd3c6007f60 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -126,6 +126,7 @@ static void lowpan_setup(struct net_device *dev)
dev->header_ops = &lowpan_header_ops;
dev->ml_priv = &lowpan_mlme;
dev->destructor = free_netdev;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -148,10 +149,11 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
pr_debug("adding new link\n");
- if (!tb[IFLA_LINK])
+ if (!tb[IFLA_LINK] ||
+ !net_eq(dev_net(dev), &init_net))
return -EINVAL;
/* find and hold real wpan device */
- real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+ real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
if (!real_dev)
return -ENODEV;
if (real_dev->type != ARPHRD_IEEE802154) {
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 18bc7e738507..888d0991c761 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -225,6 +225,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
switch (state) {
/* TODO NETDEV_DEVTYPE */
case NETDEV_REGISTER:
+ dev->features |= NETIF_F_NETNS_LOCAL;
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 2878d8ca6d3b..b60c65f70346 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock)
return 0;
}
-static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
- return sk->sk_prot->sendmsg(iocb, sk, msg, len);
+ return sk->sk_prot->sendmsg(sk, msg, len);
}
static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
@@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags)
return 0;
}
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct net_device *dev;
unsigned int mtu;
@@ -327,8 +326,8 @@ out:
return err;
}
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
@@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags)
return 0;
}
-static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct net_device *dev;
unsigned int mtu;
@@ -715,9 +713,8 @@ out:
return err;
}
-static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d2e49baaff63..64a9c0fdc4aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
}
EXPORT_SYMBOL(inet_getname);
-int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size)
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
inet_autobind(sk))
return -EAGAIN;
- return sk->sk_prot->sendmsg(iocb, sk, msg, size);
+ return sk->sk_prot->sendmsg(sk, msg, size);
}
EXPORT_SYMBOL(inet_sendmsg);
@@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(inet_sendpage);
-int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
int addr_len = 0;
@@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
sock_rps_record_flow(sk);
- err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
@@ -1675,7 +1674,7 @@ static int __init inet_init(void)
struct list_head *r;
int rc = -EINVAL;
- BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+ sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
rc = proto_register(&tcp_prot, 1);
if (rc)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 205e1472aa78..5f5c674e130a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -122,6 +122,7 @@
* Interface to generic neighbour cache.
*/
static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd);
+static bool arp_key_eq(const struct neighbour *n, const void *pkey);
static int arp_constructor(struct neighbour *neigh);
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -149,18 +150,12 @@ static const struct neigh_ops arp_direct_ops = {
.connected_output = neigh_direct_output,
};
-static const struct neigh_ops arp_broken_ops = {
- .family = AF_INET,
- .solicit = arp_solicit,
- .error_report = arp_error_report,
- .output = neigh_compat_output,
- .connected_output = neigh_compat_output,
-};
-
struct neigh_table arp_tbl = {
.family = AF_INET,
.key_len = 4,
+ .protocol = cpu_to_be16(ETH_P_IP),
.hash = arp_hash,
+ .key_eq = arp_key_eq,
.constructor = arp_constructor,
.proxy_redo = parp_redo,
.id = "arp_cache",
@@ -216,7 +211,12 @@ static u32 arp_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd)
{
- return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd);
+ return arp_hashfn(pkey, dev, hash_rnd);
+}
+
+static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
+{
+ return neigh_key_eq32(neigh, pkey);
}
static int arp_constructor(struct neighbour *neigh)
@@ -260,35 +260,6 @@ static int arp_constructor(struct neighbour *neigh)
in old paradigm.
*/
-#if 1
- /* So... these "amateur" devices are hopeless.
- The only thing, that I can say now:
- It is very sad that we need to keep ugly obsolete
- code to make them happy.
-
- They should be moved to more reasonable state, now
- they use rebuild_header INSTEAD OF hard_start_xmit!!!
- Besides that, they are sort of out of date
- (a lot of redundant clones/copies, useless in 2.1),
- I wonder why people believe that they work.
- */
- switch (dev->type) {
- default:
- break;
- case ARPHRD_ROSE:
-#if IS_ENABLED(CONFIG_AX25)
- case ARPHRD_AX25:
-#if IS_ENABLED(CONFIG_NETROM)
- case ARPHRD_NETROM:
-#endif
- neigh->ops = &arp_broken_ops;
- neigh->output = neigh->ops->output;
- return 0;
-#else
- break;
-#endif
- }
-#endif
if (neigh->type == RTN_MULTICAST) {
neigh->nud_state = NUD_NOARP;
arp_mc_map(addr, neigh->ha, dev, 1);
@@ -433,71 +404,6 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
return flag;
}
-/* OBSOLETE FUNCTIONS */
-
-/*
- * Find an arp mapping in the cache. If not found, post a request.
- *
- * It is very UGLY routine: it DOES NOT use skb->dst->neighbour,
- * even if it exists. It is supposed that skb->dev was mangled
- * by a virtual device (eql, shaper). Nobody but broken devices
- * is allowed to use this function, it is scheduled to be removed. --ANK
- */
-
-static int arp_set_predefined(int addr_hint, unsigned char *haddr,
- __be32 paddr, struct net_device *dev)
-{
- switch (addr_hint) {
- case RTN_LOCAL:
- pr_debug("arp called for own IP address\n");
- memcpy(haddr, dev->dev_addr, dev->addr_len);
- return 1;
- case RTN_MULTICAST:
- arp_mc_map(paddr, haddr, dev, 1);
- return 1;
- case RTN_BROADCAST:
- memcpy(haddr, dev->broadcast, dev->addr_len);
- return 1;
- }
- return 0;
-}
-
-
-int arp_find(unsigned char *haddr, struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- __be32 paddr;
- struct neighbour *n;
-
- if (!skb_dst(skb)) {
- pr_debug("arp_find is called with dst==NULL\n");
- kfree_skb(skb);
- return 1;
- }
-
- paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr);
- if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
- paddr, dev))
- return 0;
-
- n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
-
- if (n) {
- n->used = jiffies;
- if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
- neigh_ha_snapshot(haddr, n, dev);
- neigh_release(n);
- return 0;
- }
- neigh_release(n);
- } else
- kfree_skb(skb);
- return 1;
-}
-EXPORT_SYMBOL(arp_find);
-
-/* END OF OBSOLETE FUNCTIONS */
-
/*
* Check if we can use proxy ARP for this path
*/
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3a8985c94581..5105759e4e00 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
return NULL;
}
+static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
+{
+ struct ip_mreqn mreq = {
+ .imr_multiaddr.s_addr = ifa->ifa_address,
+ .imr_ifindex = ifa->ifa_dev->dev->ifindex,
+ };
+ int ret;
+
+ ASSERT_RTNL();
+
+ lock_sock(sk);
+ if (join)
+ ret = __ip_mc_join_group(sk, &mreq);
+ else
+ ret = __ip_mc_leave_group(sk, &mreq);
+ release_sock(sk);
+
+ return ret;
+}
+
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -584,6 +604,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
!inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
continue;
+ if (ipv4_is_multicast(ifa->ifa_address))
+ ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
return 0;
}
@@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
* userspace already relies on not having to provide this.
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+ if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
+ int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
+ true, ifa);
+
+ if (ret < 0) {
+ inet_free_ifa(ifa);
+ return ret;
+ }
+ }
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
} else {
inet_free_ifa(ifa);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..e067770235bf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -89,17 +89,14 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
switch (id) {
case RT_TABLE_LOCAL:
- net->ipv4.fib_local = tb;
+ rcu_assign_pointer(net->ipv4.fib_local, tb);
break;
-
case RT_TABLE_MAIN:
- net->ipv4.fib_main = tb;
+ rcu_assign_pointer(net->ipv4.fib_main, tb);
break;
-
case RT_TABLE_DEFAULT:
- net->ipv4.fib_default = tb;
+ rcu_assign_pointer(net->ipv4.fib_default, tb);
break;
-
default:
break;
}
@@ -132,13 +129,14 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
static void fib_flush(struct net *net)
{
int flushed = 0;
- struct fib_table *tb;
- struct hlist_head *head;
unsigned int h;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
- head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist)
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
flushed += fib_table_flush(tb);
}
@@ -146,6 +144,19 @@ static void fib_flush(struct net *net)
rt_cache_flush(net);
}
+void fib_flush_external(struct net *net)
+{
+ struct fib_table *tb;
+ struct hlist_head *head;
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ head = &net->ipv4.fib_table_hash[h];
+ hlist_for_each_entry(tb, head, tb_hlist)
+ fib_table_flush_external(tb);
+ }
+}
+
/*
* Find address type as if only "dev" was present in the system. If
* on_dev is NULL then all interfaces are taken into consideration.
@@ -665,10 +676,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[0];
s_e = cb->args[1];
+ rcu_read_lock();
+
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
if (e < s_e)
goto next;
if (dumped)
@@ -682,6 +695,8 @@ next:
}
}
out:
+ rcu_read_unlock();
+
cb->args[1] = e;
cb->args[0] = h;
@@ -1117,14 +1132,34 @@ static void ip_fib_net_exit(struct net *net)
rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
- struct fib_table *tb;
- struct hlist_head *head;
+ struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ /* this is done in two passes as flushing the table could
+ * cause it to be reallocated in order to accommodate new
+ * tnodes at the root as the table shrinks.
+ */
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
+ fib_table_flush(tb);
- head = &net->ipv4.fib_table_hash[i];
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ switch (tb->tb_id) {
+ case RT_TABLE_LOCAL:
+ RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
+ break;
+ case RT_TABLE_MAIN:
+ RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
+ break;
+ case RT_TABLE_DEFAULT:
+ RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
+ break;
+ default:
+ break;
+ }
+#endif
hlist_del(&tb->tb_hlist);
- fib_table_flush(tb);
fib_free_table(tb);
}
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 825981b1049a..ae2e6eede46e 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -6,11 +6,12 @@
#include <net/ip_fib.h>
struct fib_alias {
- struct list_head fa_list;
+ struct hlist_node fa_list;
struct fib_info *fa_info;
u8 fa_tos;
u8 fa_type;
u8 fa_state;
+ u8 fa_slen;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d3db718be51d..190d0d00d744 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
+
err = 0;
errout:
return err;
@@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
}
static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1e2090ea663e..c6d267442dac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1163,12 +1163,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
void fib_select_default(struct fib_result *res)
{
struct fib_info *fi = NULL, *last_resort = NULL;
- struct list_head *fa_head = res->fa_head;
+ struct hlist_head *fa_head = res->fa_head;
struct fib_table *tb = res->table;
int order = -1, last_idx = -1;
struct fib_alias *fa;
- list_for_each_entry_rcu(fa, fa_head, fa_list) {
+ hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3daf0224ff2e..90955455884e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -79,6 +79,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/switchdev.h>
#include "fib_lookup.h"
#define MAX_STAT_DEPTH 32
@@ -88,38 +89,35 @@
typedef unsigned int t_key;
-#define IS_TNODE(n) ((n)->bits)
-#define IS_LEAF(n) (!(n)->bits)
+#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
+#define IS_TNODE(n) ((n)->bits)
+#define IS_LEAF(n) (!(n)->bits)
-#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
-
-struct tnode {
+struct key_vector {
t_key key;
- unsigned char bits; /* 2log(KEYLENGTH) bits needed */
unsigned char pos; /* 2log(KEYLENGTH) bits needed */
+ unsigned char bits; /* 2log(KEYLENGTH) bits needed */
unsigned char slen;
- struct tnode __rcu *parent;
- struct rcu_head rcu;
union {
- /* The fields in this struct are valid if bits > 0 (TNODE) */
- struct {
- t_key empty_children; /* KEYLENGTH bits needed */
- t_key full_children; /* KEYLENGTH bits needed */
- struct tnode __rcu *child[0];
- };
- /* This list pointer if valid if bits == 0 (LEAF) */
- struct hlist_head list;
+ /* This list pointer if valid if (pos | bits) == 0 (LEAF) */
+ struct hlist_head leaf;
+ /* This array is valid if (pos | bits) > 0 (TNODE) */
+ struct key_vector __rcu *tnode[0];
};
};
-struct leaf_info {
- struct hlist_node hlist;
- int plen;
- u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
- struct list_head falh;
+struct tnode {
struct rcu_head rcu;
+ t_key empty_children; /* KEYLENGTH bits needed */
+ t_key full_children; /* KEYLENGTH bits needed */
+ struct key_vector __rcu *parent;
+ struct key_vector kv[1];
+#define tn_bits kv[0].bits
};
+#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
+#define LEAF_SIZE TNODE_SIZE(1)
+
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats {
unsigned int gets;
@@ -142,13 +140,13 @@ struct trie_stat {
};
struct trie {
- struct tnode __rcu *trie;
+ struct key_vector kv[1];
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats __percpu *stats;
#endif
};
-static void resize(struct trie *t, struct tnode *tn);
+static struct key_vector *resize(struct trie *t, struct key_vector *tn);
static size_t tnode_free_size;
/*
@@ -161,41 +159,46 @@ static const int sync_pages = 128;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static inline struct tnode *tn_info(struct key_vector *kv)
+{
+ return container_of(kv, struct tnode, kv[0]);
+}
+
/* caller must hold RTNL */
-#define node_parent(n) rtnl_dereference((n)->parent)
+#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
+#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
/* caller must hold RCU read lock or RTNL */
-#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
+#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
+#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
/* wrapper for rcu_assign_pointer */
-static inline void node_set_parent(struct tnode *n, struct tnode *tp)
+static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
{
if (n)
- rcu_assign_pointer(n->parent, tp);
+ rcu_assign_pointer(tn_info(n)->parent, tp);
}
-#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
+#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
/* This provides us with the number of children in this node, in the case of a
* leaf this will return 0 meaning none of the children are accessible.
*/
-static inline unsigned long tnode_child_length(const struct tnode *tn)
+static inline unsigned long child_length(const struct key_vector *tn)
{
return (1ul << tn->bits) & ~(1ul);
}
-/* caller must hold RTNL */
-static inline struct tnode *tnode_get_child(const struct tnode *tn,
- unsigned long i)
-{
- return rtnl_dereference(tn->child[i]);
-}
+#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
-/* caller must hold RCU read lock or RTNL */
-static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
- unsigned long i)
+static inline unsigned long get_index(t_key key, struct key_vector *kv)
{
- return rcu_dereference_rtnl(tn->child[i]);
+ unsigned long index = key ^ kv->key;
+
+ if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
+ return 0;
+
+ return index >> kv->pos;
}
/* To understand this stuff, an understanding of keys and all their bits is
@@ -274,106 +277,104 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
}
#define TNODE_KMALLOC_MAX \
- ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *))
+ ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
+#define TNODE_VMALLOC_MAX \
+ ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
static void __node_free_rcu(struct rcu_head *head)
{
struct tnode *n = container_of(head, struct tnode, rcu);
- if (IS_LEAF(n))
+ if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n);
- else if (n->bits <= TNODE_KMALLOC_MAX)
+ else if (n->tn_bits <= TNODE_KMALLOC_MAX)
kfree(n);
else
vfree(n);
}
-#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
+#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
-static inline void free_leaf_info(struct leaf_info *leaf)
+static struct tnode *tnode_alloc(int bits)
{
- kfree_rcu(leaf, rcu);
-}
+ size_t size;
+
+ /* verify bits is within bounds */
+ if (bits > TNODE_VMALLOC_MAX)
+ return NULL;
+
+ /* determine size and verify it is non-zero and didn't overflow */
+ size = TNODE_SIZE(1ul << bits);
-static struct tnode *tnode_alloc(size_t size)
-{
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
else
return vzalloc(size);
}
-static inline void empty_child_inc(struct tnode *n)
+static inline void empty_child_inc(struct key_vector *n)
{
- ++n->empty_children ? : ++n->full_children;
+ ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
}
-static inline void empty_child_dec(struct tnode *n)
+static inline void empty_child_dec(struct key_vector *n)
{
- n->empty_children-- ? : n->full_children--;
+ tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
}
-static struct tnode *leaf_new(t_key key)
+static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
{
- struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
- if (l) {
- l->parent = NULL;
- /* set key and pos to reflect full key value
- * any trailing zeros in the key should be ignored
- * as the nodes are searched
- */
- l->key = key;
- l->slen = 0;
- l->pos = 0;
- /* set bits to 0 indicating we are not a tnode */
- l->bits = 0;
+ struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
+ struct key_vector *l = kv->kv;
- INIT_HLIST_HEAD(&l->list);
- }
- return l;
-}
+ if (!kv)
+ return NULL;
-static struct leaf_info *leaf_info_new(int plen)
-{
- struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
- if (li) {
- li->plen = plen;
- li->mask_plen = ntohl(inet_make_mask(plen));
- INIT_LIST_HEAD(&li->falh);
- }
- return li;
+ /* initialize key vector */
+ l->key = key;
+ l->pos = 0;
+ l->bits = 0;
+ l->slen = fa->fa_slen;
+
+ /* link leaf to fib alias */
+ INIT_HLIST_HEAD(&l->leaf);
+ hlist_add_head(&fa->fa_list, &l->leaf);
+
+ return l;
}
-static struct tnode *tnode_new(t_key key, int pos, int bits)
+static struct key_vector *tnode_new(t_key key, int pos, int bits)
{
- size_t sz = offsetof(struct tnode, child[1ul << bits]);
- struct tnode *tn = tnode_alloc(sz);
+ struct tnode *tnode = tnode_alloc(bits);
unsigned int shift = pos + bits;
+ struct key_vector *tn = tnode->kv;
/* verify bits and pos their msb bits clear and values are valid */
BUG_ON(!bits || (shift > KEYLENGTH));
- if (tn) {
- tn->parent = NULL;
- tn->slen = pos;
- tn->pos = pos;
- tn->bits = bits;
- tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
- if (bits == KEYLENGTH)
- tn->full_children = 1;
- else
- tn->empty_children = 1ul << bits;
- }
+ pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
+ sizeof(struct key_vector *) << bits);
+
+ if (!tnode)
+ return NULL;
+
+ if (bits == KEYLENGTH)
+ tnode->full_children = 1;
+ else
+ tnode->empty_children = 1ul << bits;
+
+ tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
+ tn->pos = pos;
+ tn->bits = bits;
+ tn->slen = pos;
- pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
- sizeof(struct tnode *) << bits);
return tn;
}
/* Check whether a tnode 'n' is "full", i.e. it is an internal node
* and no bits are skipped. See discussion in dyntree paper p. 6
*/
-static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
+static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
{
return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
}
@@ -381,12 +382,13 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
/* Add a child at position i overwriting the old value.
* Update the value of full_children and empty_children.
*/
-static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
+static void put_child(struct key_vector *tn, unsigned long i,
+ struct key_vector *n)
{
- struct tnode *chi = tnode_get_child(tn, i);
+ struct key_vector *chi = get_child(tn, i);
int isfull, wasfull;
- BUG_ON(i >= tnode_child_length(tn));
+ BUG_ON(i >= child_length(tn));
/* update emptyChildren, overflow into fullChildren */
if (n == NULL && chi != NULL)
@@ -399,23 +401,23 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
isfull = tnode_full(tn, n);
if (wasfull && !isfull)
- tn->full_children--;
+ tn_info(tn)->full_children--;
else if (!wasfull && isfull)
- tn->full_children++;
+ tn_info(tn)->full_children++;
if (n && (tn->slen < n->slen))
tn->slen = n->slen;
- rcu_assign_pointer(tn->child[i], n);
+ rcu_assign_pointer(tn->tnode[i], n);
}
-static void update_children(struct tnode *tn)
+static void update_children(struct key_vector *tn)
{
unsigned long i;
/* update all of the child parent pointers */
- for (i = tnode_child_length(tn); i;) {
- struct tnode *inode = tnode_get_child(tn, --i);
+ for (i = child_length(tn); i;) {
+ struct key_vector *inode = get_child(tn, --i);
if (!inode)
continue;
@@ -431,36 +433,37 @@ static void update_children(struct tnode *tn)
}
}
-static inline void put_child_root(struct tnode *tp, struct trie *t,
- t_key key, struct tnode *n)
+static inline void put_child_root(struct key_vector *tp, t_key key,
+ struct key_vector *n)
{
- if (tp)
- put_child(tp, get_index(key, tp), n);
+ if (IS_TRIE(tp))
+ rcu_assign_pointer(tp->tnode[0], n);
else
- rcu_assign_pointer(t->trie, n);
+ put_child(tp, get_index(key, tp), n);
}
-static inline void tnode_free_init(struct tnode *tn)
+static inline void tnode_free_init(struct key_vector *tn)
{
- tn->rcu.next = NULL;
+ tn_info(tn)->rcu.next = NULL;
}
-static inline void tnode_free_append(struct tnode *tn, struct tnode *n)
+static inline void tnode_free_append(struct key_vector *tn,
+ struct key_vector *n)
{
- n->rcu.next = tn->rcu.next;
- tn->rcu.next = &n->rcu;
+ tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
+ tn_info(tn)->rcu.next = &tn_info(n)->rcu;
}
-static void tnode_free(struct tnode *tn)
+static void tnode_free(struct key_vector *tn)
{
- struct callback_head *head = &tn->rcu;
+ struct callback_head *head = &tn_info(tn)->rcu;
while (head) {
head = head->next;
- tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
+ tnode_free_size += TNODE_SIZE(1ul << tn->bits);
node_free(tn);
- tn = container_of(head, struct tnode, rcu);
+ tn = container_of(head, struct tnode, rcu)->kv;
}
if (tnode_free_size >= PAGE_SIZE * sync_pages) {
@@ -469,14 +472,16 @@ static void tnode_free(struct tnode *tn)
}
}
-static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn)
+static struct key_vector *replace(struct trie *t,
+ struct key_vector *oldtnode,
+ struct key_vector *tn)
{
- struct tnode *tp = node_parent(oldtnode);
+ struct key_vector *tp = node_parent(oldtnode);
unsigned long i;
/* setup the parent pointer out of and back into this node */
NODE_INIT_PARENT(tn, tp);
- put_child_root(tp, t, tn->key, tn);
+ put_child_root(tp, tn->key, tn);
/* update all of the child parent pointers */
update_children(tn);
@@ -485,18 +490,21 @@ static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn)
tnode_free(oldtnode);
/* resize children now that oldtnode is freed */
- for (i = tnode_child_length(tn); i;) {
- struct tnode *inode = tnode_get_child(tn, --i);
+ for (i = child_length(tn); i;) {
+ struct key_vector *inode = get_child(tn, --i);
/* resize child node */
if (tnode_full(tn, inode))
- resize(t, inode);
+ tn = resize(t, inode);
}
+
+ return tp;
}
-static int inflate(struct trie *t, struct tnode *oldtnode)
+static struct key_vector *inflate(struct trie *t,
+ struct key_vector *oldtnode)
{
- struct tnode *tn;
+ struct key_vector *tn;
unsigned long i;
t_key m;
@@ -504,7 +512,7 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
if (!tn)
- return -ENOMEM;
+ goto notnode;
/* prepare oldtnode to be freed */
tnode_free_init(oldtnode);
@@ -514,9 +522,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
* point to existing tnodes and the links between our allocated
* nodes.
*/
- for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
- struct tnode *inode = tnode_get_child(oldtnode, --i);
- struct tnode *node0, *node1;
+ for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
+ struct key_vector *inode = get_child(oldtnode, --i);
+ struct key_vector *node0, *node1;
unsigned long j, k;
/* An empty child */
@@ -534,8 +542,8 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
/* An internal node with two children */
if (inode->bits == 1) {
- put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
- put_child(tn, 2 * i, tnode_get_child(inode, 0));
+ put_child(tn, 2 * i + 1, get_child(inode, 1));
+ put_child(tn, 2 * i, get_child(inode, 0));
continue;
}
@@ -564,11 +572,11 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
tnode_free_append(tn, node0);
/* populate child pointers in new nodes */
- for (k = tnode_child_length(inode), j = k / 2; j;) {
- put_child(node1, --j, tnode_get_child(inode, --k));
- put_child(node0, j, tnode_get_child(inode, j));
- put_child(node1, --j, tnode_get_child(inode, --k));
- put_child(node0, j, tnode_get_child(inode, j));
+ for (k = child_length(inode), j = k / 2; j;) {
+ put_child(node1, --j, get_child(inode, --k));
+ put_child(node0, j, get_child(inode, j));
+ put_child(node1, --j, get_child(inode, --k));
+ put_child(node0, j, get_child(inode, j));
}
/* link new nodes to parent */
@@ -581,25 +589,25 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
}
/* setup the parent pointers into and out of this node */
- replace(t, oldtnode, tn);
-
- return 0;
+ return replace(t, oldtnode, tn);
nomem:
/* all pointers should be clean so we are done */
tnode_free(tn);
- return -ENOMEM;
+notnode:
+ return NULL;
}
-static int halve(struct trie *t, struct tnode *oldtnode)
+static struct key_vector *halve(struct trie *t,
+ struct key_vector *oldtnode)
{
- struct tnode *tn;
+ struct key_vector *tn;
unsigned long i;
pr_debug("In halve\n");
tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
if (!tn)
- return -ENOMEM;
+ goto notnode;
/* prepare oldtnode to be freed */
tnode_free_init(oldtnode);
@@ -609,10 +617,10 @@ static int halve(struct trie *t, struct tnode *oldtnode)
* point to existing tnodes and the links between our allocated
* nodes.
*/
- for (i = tnode_child_length(oldtnode); i;) {
- struct tnode *node1 = tnode_get_child(oldtnode, --i);
- struct tnode *node0 = tnode_get_child(oldtnode, --i);
- struct tnode *inode;
+ for (i = child_length(oldtnode); i;) {
+ struct key_vector *node1 = get_child(oldtnode, --i);
+ struct key_vector *node0 = get_child(oldtnode, --i);
+ struct key_vector *inode;
/* At least one of the children is empty */
if (!node1 || !node0) {
@@ -622,10 +630,8 @@ static int halve(struct trie *t, struct tnode *oldtnode)
/* Two nonempty children */
inode = tnode_new(node0->key, oldtnode->pos, 1);
- if (!inode) {
- tnode_free(tn);
- return -ENOMEM;
- }
+ if (!inode)
+ goto nomem;
tnode_free_append(tn, inode);
/* initialize pointers out of node */
@@ -638,30 +644,36 @@ static int halve(struct trie *t, struct tnode *oldtnode)
}
/* setup the parent pointers into and out of this node */
- replace(t, oldtnode, tn);
-
- return 0;
+ return replace(t, oldtnode, tn);
+nomem:
+ /* all pointers should be clean so we are done */
+ tnode_free(tn);
+notnode:
+ return NULL;
}
-static void collapse(struct trie *t, struct tnode *oldtnode)
+static struct key_vector *collapse(struct trie *t,
+ struct key_vector *oldtnode)
{
- struct tnode *n, *tp;
+ struct key_vector *n, *tp;
unsigned long i;
/* scan the tnode looking for that one child that might still exist */
- for (n = NULL, i = tnode_child_length(oldtnode); !n && i;)
- n = tnode_get_child(oldtnode, --i);
+ for (n = NULL, i = child_length(oldtnode); !n && i;)
+ n = get_child(oldtnode, --i);
/* compress one level */
tp = node_parent(oldtnode);
- put_child_root(tp, t, oldtnode->key, n);
+ put_child_root(tp, oldtnode->key, n);
node_set_parent(n, tp);
/* drop dead node */
node_free(oldtnode);
+
+ return tp;
}
-static unsigned char update_suffix(struct tnode *tn)
+static unsigned char update_suffix(struct key_vector *tn)
{
unsigned char slen = tn->pos;
unsigned long stride, i;
@@ -671,8 +683,8 @@ static unsigned char update_suffix(struct tnode *tn)
* why we start with a stride of 2 since a stride of 1 would
* represent the nodes with suffix length equal to tn->pos
*/
- for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) {
- struct tnode *n = tnode_get_child(tn, i);
+ for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
+ struct key_vector *n = get_child(tn, i);
if (!n || (n->slen <= slen))
continue;
@@ -704,12 +716,12 @@ static unsigned char update_suffix(struct tnode *tn)
*
* 'high' in this instance is the variable 'inflate_threshold'. It
* is expressed as a percentage, so we multiply it with
- * tnode_child_length() and instead of multiplying by 2 (since the
+ * child_length() and instead of multiplying by 2 (since the
* child array will be doubled by inflate()) and multiplying
* the left-hand side by 100 (to handle the percentage thing) we
* multiply the left-hand side by 50.
*
- * The left-hand side may look a bit weird: tnode_child_length(tn)
+ * The left-hand side may look a bit weird: child_length(tn)
* - tn->empty_children is of course the number of non-null children
* in the current node. tn->full_children is the number of "full"
* children, that is non-null tnodes with a skip value of 0.
@@ -719,10 +731,10 @@ static unsigned char update_suffix(struct tnode *tn)
* A clearer way to write this would be:
*
* to_be_doubled = tn->full_children;
- * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
+ * not_to_be_doubled = child_length(tn) - tn->empty_children -
* tn->full_children;
*
- * new_child_length = tnode_child_length(tn) * 2;
+ * new_child_length = child_length(tn) * 2;
*
* new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
* new_child_length;
@@ -739,57 +751,57 @@ static unsigned char update_suffix(struct tnode *tn)
* inflate_threshold * new_child_length
*
* expand not_to_be_doubled and to_be_doubled, and shorten:
- * 100 * (tnode_child_length(tn) - tn->empty_children +
+ * 100 * (child_length(tn) - tn->empty_children +
* tn->full_children) >= inflate_threshold * new_child_length
*
* expand new_child_length:
- * 100 * (tnode_child_length(tn) - tn->empty_children +
+ * 100 * (child_length(tn) - tn->empty_children +
* tn->full_children) >=
- * inflate_threshold * tnode_child_length(tn) * 2
+ * inflate_threshold * child_length(tn) * 2
*
* shorten again:
- * 50 * (tn->full_children + tnode_child_length(tn) -
+ * 50 * (tn->full_children + child_length(tn) -
* tn->empty_children) >= inflate_threshold *
- * tnode_child_length(tn)
+ * child_length(tn)
*
*/
-static bool should_inflate(const struct tnode *tp, const struct tnode *tn)
+static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
{
- unsigned long used = tnode_child_length(tn);
+ unsigned long used = child_length(tn);
unsigned long threshold = used;
/* Keep root node larger */
- threshold *= tp ? inflate_threshold : inflate_threshold_root;
- used -= tn->empty_children;
- used += tn->full_children;
+ threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
+ used -= tn_info(tn)->empty_children;
+ used += tn_info(tn)->full_children;
/* if bits == KEYLENGTH then pos = 0, and will fail below */
return (used > 1) && tn->pos && ((50 * used) >= threshold);
}
-static bool should_halve(const struct tnode *tp, const struct tnode *tn)
+static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
{
- unsigned long used = tnode_child_length(tn);
+ unsigned long used = child_length(tn);
unsigned long threshold = used;
/* Keep root node larger */
- threshold *= tp ? halve_threshold : halve_threshold_root;
- used -= tn->empty_children;
+ threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
+ used -= tn_info(tn)->empty_children;
/* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */
return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
}
-static bool should_collapse(const struct tnode *tn)
+static inline bool should_collapse(struct key_vector *tn)
{
- unsigned long used = tnode_child_length(tn);
+ unsigned long used = child_length(tn);
- used -= tn->empty_children;
+ used -= tn_info(tn)->empty_children;
/* account for bits == KEYLENGTH case */
- if ((tn->bits == KEYLENGTH) && tn->full_children)
+ if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
used -= KEY_MAX;
/* One child or none, time to drop us from the trie */
@@ -797,10 +809,13 @@ static bool should_collapse(const struct tnode *tn)
}
#define MAX_WORK 10
-static void resize(struct trie *t, struct tnode *tn)
+static struct key_vector *resize(struct trie *t, struct key_vector *tn)
{
- struct tnode *tp = node_parent(tn);
- struct tnode __rcu **cptr;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+ struct trie_use_stats __percpu *stats = t->stats;
+#endif
+ struct key_vector *tp = node_parent(tn);
+ unsigned long cindex = get_index(tn->key, tp);
int max_work = MAX_WORK;
pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
@@ -810,183 +825,125 @@ static void resize(struct trie *t, struct tnode *tn)
* doing it ourselves. This way we can let RCU fully do its
* thing without us interfering
*/
- cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie;
- BUG_ON(tn != rtnl_dereference(*cptr));
+ BUG_ON(tn != get_child(tp, cindex));
/* Double as long as the resulting node has a number of
* nonempty nodes that are above the threshold.
*/
while (should_inflate(tp, tn) && max_work) {
- if (inflate(t, tn)) {
+ tp = inflate(t, tn);
+ if (!tp) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(t->stats->resize_node_skipped);
+ this_cpu_inc(stats->resize_node_skipped);
#endif
break;
}
max_work--;
- tn = rtnl_dereference(*cptr);
+ tn = get_child(tp, cindex);
}
/* Return if at least one inflate is run */
if (max_work != MAX_WORK)
- return;
+ return node_parent(tn);
/* Halve as long as the number of empty children in this
* node is above threshold.
*/
while (should_halve(tp, tn) && max_work) {
- if (halve(t, tn)) {
+ tp = halve(t, tn);
+ if (!tp) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(t->stats->resize_node_skipped);
+ this_cpu_inc(stats->resize_node_skipped);
#endif
break;
}
max_work--;
- tn = rtnl_dereference(*cptr);
+ tn = get_child(tp, cindex);
}
/* Only one child remains */
- if (should_collapse(tn)) {
- collapse(t, tn);
- return;
- }
+ if (should_collapse(tn))
+ return collapse(t, tn);
+
+ /* update parent in case inflate or halve failed */
+ tp = node_parent(tn);
/* Return if at least one deflate was run */
if (max_work != MAX_WORK)
- return;
+ return tp;
/* push the suffix length to the parent node */
if (tn->slen > tn->pos) {
unsigned char slen = update_suffix(tn);
- if (tp && (slen > tp->slen))
+ if (slen > tp->slen)
tp->slen = slen;
}
-}
-
-/* readside must use rcu_read_lock currently dump routines
- via get_fa_head and dump */
-
-static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
-{
- struct hlist_head *head = &l->list;
- struct leaf_info *li;
- hlist_for_each_entry_rcu(li, head, hlist)
- if (li->plen == plen)
- return li;
-
- return NULL;
+ return tp;
}
-static inline struct list_head *get_fa_head(struct tnode *l, int plen)
+static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l)
{
- struct leaf_info *li = find_leaf_info(l, plen);
-
- if (!li)
- return NULL;
-
- return &li->falh;
-}
-
-static void leaf_pull_suffix(struct tnode *l)
-{
- struct tnode *tp = node_parent(l);
-
- while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) {
+ while ((tp->slen > tp->pos) && (tp->slen > l->slen)) {
if (update_suffix(tp) > l->slen)
break;
tp = node_parent(tp);
}
}
-static void leaf_push_suffix(struct tnode *l)
+static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l)
{
- struct tnode *tn = node_parent(l);
-
/* if this is a new leaf then tn will be NULL and we can sort
* out parent suffix lengths as a part of trie_rebalance
*/
- while (tn && (tn->slen < l->slen)) {
+ while (tn->slen < l->slen) {
tn->slen = l->slen;
tn = node_parent(tn);
}
}
-static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
-{
- /* record the location of the previous list_info entry */
- struct hlist_node **pprev = old->hlist.pprev;
- struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next);
-
- /* remove the leaf info from the list */
- hlist_del_rcu(&old->hlist);
-
- /* only access li if it is pointing at the last valid hlist_node */
- if (hlist_empty(&l->list) || (*pprev))
- return;
-
- /* update the trie with the latest suffix length */
- l->slen = KEYLENGTH - li->plen;
- leaf_pull_suffix(l);
-}
-
-static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
+/* rcu_read_lock needs to be hold by caller from readside */
+static struct key_vector *fib_find_node(struct trie *t,
+ struct key_vector **tp, u32 key)
{
- struct hlist_head *head = &l->list;
- struct leaf_info *li = NULL, *last = NULL;
+ struct key_vector *pn, *n = t->kv;
+ unsigned long index = 0;
- if (hlist_empty(head)) {
- hlist_add_head_rcu(&new->hlist, head);
- } else {
- hlist_for_each_entry(li, head, hlist) {
- if (new->plen > li->plen)
- break;
-
- last = li;
- }
- if (last)
- hlist_add_behind_rcu(&new->hlist, &last->hlist);
- else
- hlist_add_before_rcu(&new->hlist, &li->hlist);
- }
-
- /* if we added to the tail node then we need to update slen */
- if (l->slen < (KEYLENGTH - new->plen)) {
- l->slen = KEYLENGTH - new->plen;
- leaf_push_suffix(l);
- }
-}
+ do {
+ pn = n;
+ n = get_child_rcu(n, index);
-/* rcu_read_lock needs to be hold by caller from readside */
-static struct tnode *fib_find_node(struct trie *t, u32 key)
-{
- struct tnode *n = rcu_dereference_rtnl(t->trie);
+ if (!n)
+ break;
- while (n) {
- unsigned long index = get_index(key, n);
+ index = get_cindex(key, n);
/* This bit of code is a bit tricky but it combines multiple
* checks into a single check. The prefix consists of the
* prefix plus zeros for the bits in the cindex. The index
* is the difference between the key and this value. From
* this we can actually derive several pieces of data.
- * if (index & (~0ul << bits))
+ * if (index >= (1ul << bits))
* we have a mismatch in skip bits and failed
* else
* we know the value is cindex
+ *
+ * This check is safe even if bits == KEYLENGTH due to the
+ * fact that we can only allocate a node with 32 bits if a
+ * long is greater than 32 bits.
*/
- if (index & (~0ul << n->bits))
- return NULL;
-
- /* we have found a leaf. Prefixes have already been compared */
- if (IS_LEAF(n))
+ if (index >= (1ul << n->bits)) {
+ n = NULL;
break;
+ }
- n = tnode_get_child_rcu(n, index);
- }
+ /* keep searching until we find a perfect match leaf or NULL */
+ } while (IS_TNODE(n));
+
+ *tp = pn;
return n;
}
@@ -994,14 +951,19 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
/* Return the first fib alias matching TOS with
* priority less than or equal to PRIO.
*/
-static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
+static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
+ u8 tos, u32 prio)
{
struct fib_alias *fa;
if (!fah)
return NULL;
- list_for_each_entry(fa, fah, fa_list) {
+ hlist_for_each_entry(fa, fah, fa_list) {
+ if (fa->fa_slen < slen)
+ continue;
+ if (fa->fa_slen != slen)
+ break;
if (fa->fa_tos > tos)
continue;
if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1011,77 +973,23 @@ static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
return NULL;
}
-static void trie_rebalance(struct trie *t, struct tnode *tn)
+static void trie_rebalance(struct trie *t, struct key_vector *tn)
{
- struct tnode *tp;
-
- while ((tp = node_parent(tn)) != NULL) {
- resize(t, tn);
- tn = tp;
- }
-
- /* Handle last (top) tnode */
- if (IS_TNODE(tn))
- resize(t, tn);
+ while (!IS_TRIE(tn))
+ tn = resize(t, tn);
}
-/* only used from updater-side */
-
-static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
+static int fib_insert_node(struct trie *t, struct key_vector *tp,
+ struct fib_alias *new, t_key key)
{
- struct list_head *fa_head = NULL;
- struct tnode *l, *n, *tp = NULL;
- struct leaf_info *li;
-
- li = leaf_info_new(plen);
- if (!li)
- return NULL;
- fa_head = &li->falh;
-
- n = rtnl_dereference(t->trie);
-
- /* If we point to NULL, stop. Either the tree is empty and we should
- * just put a new leaf in if, or we have reached an empty child slot,
- * and we should just put our new leaf in that.
- *
- * If we hit a node with a key that does't match then we should stop
- * and create a new tnode to replace that node and insert ourselves
- * and the other node into the new tnode.
- */
- while (n) {
- unsigned long index = get_index(key, n);
-
- /* This bit of code is a bit tricky but it combines multiple
- * checks into a single check. The prefix consists of the
- * prefix plus zeros for the "bits" in the prefix. The index
- * is the difference between the key and this value. From
- * this we can actually derive several pieces of data.
- * if !(index >> bits)
- * we know the value is child index
- * else
- * we have a mismatch in skip bits and failed
- */
- if (index >> n->bits)
- break;
+ struct key_vector *n, *l;
- /* we have found a leaf. Prefixes have already been compared */
- if (IS_LEAF(n)) {
- /* Case 1: n is a leaf, and prefixes match*/
- insert_leaf_info(n, li);
- return fa_head;
- }
-
- tp = n;
- n = tnode_get_child_rcu(n, index);
- }
-
- l = leaf_new(key);
- if (!l) {
- free_leaf_info(li);
- return NULL;
- }
+ l = leaf_new(key, new);
+ if (!l)
+ goto noleaf;
- insert_leaf_info(l, li);
+ /* retrieve child from parent node */
+ n = get_child(tp, get_index(key, tp));
/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
*
@@ -1090,21 +998,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
* leaves us in position for handling as case 3
*/
if (n) {
- struct tnode *tn;
+ struct key_vector *tn;
tn = tnode_new(key, __fls(key ^ n->key), 1);
- if (!tn) {
- free_leaf_info(li);
- node_free(l);
- return NULL;
- }
+ if (!tn)
+ goto notnode;
/* initialize routes out of node */
NODE_INIT_PARENT(tn, tp);
put_child(tn, get_index(key, tn) ^ 1, n);
/* start adding routes into the node */
- put_child_root(tp, t, key, tn);
+ put_child_root(tp, key, tn);
node_set_parent(n, tn);
/* parent now has a NULL spot where the leaf can go */
@@ -1112,69 +1017,89 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
}
/* Case 3: n is NULL, and will just insert a new leaf */
- if (tp) {
- NODE_INIT_PARENT(l, tp);
- put_child(tp, get_index(key, tp), l);
- trie_rebalance(t, tp);
+ NODE_INIT_PARENT(l, tp);
+ put_child_root(tp, key, l);
+ trie_rebalance(t, tp);
+
+ return 0;
+notnode:
+ node_free(l);
+noleaf:
+ return -ENOMEM;
+}
+
+static int fib_insert_alias(struct trie *t, struct key_vector *tp,
+ struct key_vector *l, struct fib_alias *new,
+ struct fib_alias *fa, t_key key)
+{
+ if (!l)
+ return fib_insert_node(t, tp, new, key);
+
+ if (fa) {
+ hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
} else {
- rcu_assign_pointer(t->trie, l);
+ struct fib_alias *last;
+
+ hlist_for_each_entry(last, &l->leaf, fa_list) {
+ if (new->fa_slen < last->fa_slen)
+ break;
+ fa = last;
+ }
+
+ if (fa)
+ hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
+ else
+ hlist_add_head_rcu(&new->fa_list, &l->leaf);
+ }
+
+ /* if we added to the tail node then we need to update slen */
+ if (l->slen < new->fa_slen) {
+ l->slen = new->fa_slen;
+ leaf_push_suffix(tp, l);
}
- return fa_head;
+ return 0;
}
-/*
- * Caller must hold RTNL.
- */
+/* Caller must hold RTNL. */
int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
{
- struct trie *t = (struct trie *) tb->tb_data;
+ struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
- struct list_head *fa_head = NULL;
+ struct key_vector *l, *tp;
struct fib_info *fi;
- int plen = cfg->fc_dst_len;
+ u8 plen = cfg->fc_dst_len;
+ u8 slen = KEYLENGTH - plen;
u8 tos = cfg->fc_tos;
- u32 key, mask;
+ u32 key;
int err;
- struct tnode *l;
- if (plen > 32)
+ if (plen > KEYLENGTH)
return -EINVAL;
key = ntohl(cfg->fc_dst);
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
- mask = ntohl(inet_make_mask(plen));
-
- if (key & ~mask)
+ if ((plen < KEYLENGTH) && (key << plen))
return -EINVAL;
- key = key & mask;
-
fi = fib_create_info(cfg);
if (IS_ERR(fi)) {
err = PTR_ERR(fi);
goto err;
}
- l = fib_find_node(t, key);
- fa = NULL;
-
- if (l) {
- fa_head = get_fa_head(l, plen);
- fa = fib_find_alias(fa_head, tos, fi->fib_priority);
- }
+ l = fib_find_node(t, &tp, key);
+ fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
* with the same keys [prefix,tos,priority], if such key already
* exists or to the node before which we will insert new one.
*
* If fa is NULL, we will need to allocate a new one and
- * insert to the head of f.
- *
- * If f is NULL, no fib node matched the destination key
- * and we need to allocate a new one of those as well.
+ * insert to the tail of the section matching the suffix length
+ * of the new alias.
*/
if (fa && fa->fa_tos == tos &&
@@ -1192,9 +1117,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
*/
fa_match = NULL;
fa_first = fa;
- fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
- list_for_each_entry_continue(fa, fa_head, fa_list) {
- if (fa->fa_tos != tos)
+ hlist_for_each_entry_from(fa, fa_list) {
+ if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
@@ -1226,8 +1150,20 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_type = cfg->fc_type;
state = fa->fa_state;
new_fa->fa_state = state & ~FA_S_ACCESSED;
+ new_fa->fa_slen = fa->fa_slen;
+
+ err = netdev_switch_fib_ipv4_add(key, plen, fi,
+ new_fa->fa_tos,
+ cfg->fc_type,
+ tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ kmem_cache_free(fn_alias_kmem, new_fa);
+ goto out;
+ }
+
+ hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
- list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
alias_free_mem_rcu(fa);
fib_release_info(fi_drop);
@@ -1261,30 +1197,32 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_tos = tos;
new_fa->fa_type = cfg->fc_type;
new_fa->fa_state = 0;
- /*
- * Insert new entry to the list.
- */
-
- if (!fa_head) {
- fa_head = fib_insert_node(t, key, plen);
- if (unlikely(!fa_head)) {
- err = -ENOMEM;
- goto out_free_new_fa;
- }
+ new_fa->fa_slen = slen;
+
+ /* (Optionally) offload fib entry to switch hardware. */
+ err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
+ cfg->fc_type, tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ goto out_free_new_fa;
}
+ /* Insert new entry to the list. */
+ err = fib_insert_alias(t, tp, l, new_fa, fa, key);
+ if (err)
+ goto out_sw_fib_del;
+
if (!plen)
tb->tb_num_default++;
- list_add_tail_rcu(&new_fa->fa_list,
- (fa ? &fa->fa_list : fa_head));
-
rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
succeeded:
return 0;
+out_sw_fib_del:
+ netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1293,7 +1231,7 @@ err:
return err;
}
-static inline t_key prefix_mismatch(t_key key, struct tnode *n)
+static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
{
t_key prefix = n->key;
@@ -1309,11 +1247,15 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct trie_use_stats __percpu *stats = t->stats;
#endif
const t_key key = ntohl(flp->daddr);
- struct tnode *n, *pn;
- struct leaf_info *li;
+ struct key_vector *n, *pn;
+ struct fib_alias *fa;
+ unsigned long index;
t_key cindex;
- n = rcu_dereference(t->trie);
+ pn = t->kv;
+ cindex = 0;
+
+ n = get_child_rcu(pn, cindex);
if (!n)
return -EAGAIN;
@@ -1321,24 +1263,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
this_cpu_inc(stats->gets);
#endif
- pn = n;
- cindex = 0;
-
/* Step 1: Travel to the longest prefix match in the trie */
for (;;) {
- unsigned long index = get_index(key, n);
+ index = get_cindex(key, n);
/* This bit of code is a bit tricky but it combines multiple
* checks into a single check. The prefix consists of the
* prefix plus zeros for the "bits" in the prefix. The index
* is the difference between the key and this value. From
* this we can actually derive several pieces of data.
- * if (index & (~0ul << bits))
+ * if (index >= (1ul << bits))
* we have a mismatch in skip bits and failed
* else
* we know the value is cindex
+ *
+ * This check is safe even if bits == KEYLENGTH due to the
+ * fact that we can only allocate a node with 32 bits if a
+ * long is greater than 32 bits.
*/
- if (index & (~0ul << n->bits))
+ if (index >= (1ul << n->bits))
break;
/* we have found a leaf. Prefixes have already been compared */
@@ -1353,7 +1296,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
cindex = index;
}
- n = tnode_get_child_rcu(n, index);
+ n = get_child_rcu(n, index);
if (unlikely(!n))
goto backtrace;
}
@@ -1361,7 +1304,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
/* Step 2: Sort out leaves and begin backtracing for longest prefix */
for (;;) {
/* record the pointer where our next node pointer is stored */
- struct tnode __rcu **cptr = n->child;
+ struct key_vector __rcu **cptr = n->tnode;
/* This test verifies that none of the bits that differ
* between the key and the prefix exist in the region of
@@ -1393,13 +1336,17 @@ backtrace:
while (!cindex) {
t_key pkey = pn->key;
- pn = node_parent_rcu(pn);
- if (unlikely(!pn))
+ /* If we don't have a parent then there is
+ * nothing for us to do as we do not have any
+ * further nodes to parse.
+ */
+ if (IS_TRIE(pn))
return -EAGAIN;
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->backtrack);
#endif
/* Get Child's index */
+ pn = node_parent_rcu(pn);
cindex = get_index(pkey, pn);
}
@@ -1407,138 +1354,132 @@ backtrace:
cindex &= cindex - 1;
/* grab pointer for next child node */
- cptr = &pn->child[cindex];
+ cptr = &pn->tnode[cindex];
}
}
found:
+ /* this line carries forward the xor from earlier in the function */
+ index = key ^ n->key;
+
/* Step 3: Process the leaf, if that fails fall back to backtracing */
- hlist_for_each_entry_rcu(li, &n->list, hlist) {
- struct fib_alias *fa;
+ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+ int nhsel, err;
- if ((key ^ n->key) & li->mask_plen)
+ if ((index >= (1ul << fa->fa_slen)) &&
+ ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
continue;
-
- list_for_each_entry_rcu(fa, &li->falh, fa_list) {
- struct fib_info *fi = fa->fa_info;
- int nhsel, err;
-
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
- continue;
- if (fi->fib_dead)
- continue;
- if (fa->fa_info->fib_scope < flp->flowi4_scope)
- continue;
- fib_alias_accessed(fa);
- err = fib_props[fa->fa_type].error;
- if (unlikely(err < 0)) {
+ if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ continue;
+ if (fi->fib_dead)
+ continue;
+ if (fa->fa_info->fib_scope < flp->flowi4_scope)
+ continue;
+ fib_alias_accessed(fa);
+ err = fib_props[fa->fa_type].error;
+ if (unlikely(err < 0)) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(stats->semantic_match_passed);
+ this_cpu_inc(stats->semantic_match_passed);
#endif
- return err;
- }
- if (fi->fib_flags & RTNH_F_DEAD)
+ return err;
+ }
+ if (fi->fib_flags & RTNH_F_DEAD)
+ continue;
+ for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+ const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+ if (nh->nh_flags & RTNH_F_DEAD)
+ continue;
+ if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
- const struct fib_nh *nh = &fi->fib_nh[nhsel];
-
- if (nh->nh_flags & RTNH_F_DEAD)
- continue;
- if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
- continue;
-
- if (!(fib_flags & FIB_LOOKUP_NOREF))
- atomic_inc(&fi->fib_clntref);
-
- res->prefixlen = li->plen;
- res->nh_sel = nhsel;
- res->type = fa->fa_type;
- res->scope = fi->fib_scope;
- res->fi = fi;
- res->table = tb;
- res->fa_head = &li->falh;
+
+ if (!(fib_flags & FIB_LOOKUP_NOREF))
+ atomic_inc(&fi->fib_clntref);
+
+ res->prefixlen = KEYLENGTH - fa->fa_slen;
+ res->nh_sel = nhsel;
+ res->type = fa->fa_type;
+ res->scope = fi->fib_scope;
+ res->fi = fi;
+ res->table = tb;
+ res->fa_head = &n->leaf;
#ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(stats->semantic_match_passed);
+ this_cpu_inc(stats->semantic_match_passed);
#endif
- return err;
- }
+ return err;
}
-
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(stats->semantic_match_miss);
+ this_cpu_inc(stats->semantic_match_miss);
#endif
- }
goto backtrace;
}
EXPORT_SYMBOL_GPL(fib_table_lookup);
-/*
- * Remove the leaf and return parent.
- */
-static void trie_leaf_remove(struct trie *t, struct tnode *l)
+static void fib_remove_alias(struct trie *t, struct key_vector *tp,
+ struct key_vector *l, struct fib_alias *old)
{
- struct tnode *tp = node_parent(l);
+ /* record the location of the previous list_info entry */
+ struct hlist_node **pprev = old->fa_list.pprev;
+ struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
- pr_debug("entering trie_leaf_remove(%p)\n", l);
+ /* remove the fib_alias from the list */
+ hlist_del_rcu(&old->fa_list);
- if (tp) {
- put_child(tp, get_index(l->key, tp), NULL);
+ /* if we emptied the list this leaf will be freed and we can sort
+ * out parent suffix lengths as a part of trie_rebalance
+ */
+ if (hlist_empty(&l->leaf)) {
+ put_child_root(tp, l->key, NULL);
+ node_free(l);
trie_rebalance(t, tp);
- } else {
- RCU_INIT_POINTER(t->trie, NULL);
+ return;
}
- node_free(l);
+ /* only access fa if it is pointing at the last valid hlist_node */
+ if (*pprev)
+ return;
+
+ /* update the trie with the latest suffix length */
+ l->slen = fa->fa_slen;
+ leaf_pull_suffix(tp, l);
}
-/*
- * Caller must hold RTNL.
- */
+/* Caller must hold RTNL. */
int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
- u32 key, mask;
- int plen = cfg->fc_dst_len;
- u8 tos = cfg->fc_tos;
struct fib_alias *fa, *fa_to_delete;
- struct list_head *fa_head;
- struct tnode *l;
- struct leaf_info *li;
+ struct key_vector *l, *tp;
+ u8 plen = cfg->fc_dst_len;
+ u8 slen = KEYLENGTH - plen;
+ u8 tos = cfg->fc_tos;
+ u32 key;
- if (plen > 32)
+ if (plen > KEYLENGTH)
return -EINVAL;
key = ntohl(cfg->fc_dst);
- mask = ntohl(inet_make_mask(plen));
- if (key & ~mask)
+ if ((plen < KEYLENGTH) && (key << plen))
return -EINVAL;
- key = key & mask;
- l = fib_find_node(t, key);
-
+ l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;
- li = find_leaf_info(l, plen);
-
- if (!li)
- return -ESRCH;
-
- fa_head = &li->falh;
- fa = fib_find_alias(fa_head, tos, 0);
-
+ fa = fib_find_alias(&l->leaf, slen, tos, 0);
if (!fa)
return -ESRCH;
pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
fa_to_delete = NULL;
- fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
- list_for_each_entry_continue(fa, fa_head, fa_list) {
+ hlist_for_each_entry_from(fa, fa_list) {
struct fib_info *fi = fa->fa_info;
- if (fa->fa_tos != tos)
+ if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
break;
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -1557,172 +1498,215 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!fa_to_delete)
return -ESRCH;
- fa = fa_to_delete;
- rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
- &cfg->fc_nlinfo, 0);
+ netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+ cfg->fc_type, tb->tb_id);
- list_del_rcu(&fa->fa_list);
+ rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
+ &cfg->fc_nlinfo, 0);
if (!plen)
tb->tb_num_default--;
- if (list_empty(fa_head)) {
- remove_leaf_info(l, li);
- free_leaf_info(li);
- }
-
- if (hlist_empty(&l->list))
- trie_leaf_remove(t, l);
+ fib_remove_alias(t, tp, l, fa_to_delete);
- if (fa->fa_state & FA_S_ACCESSED)
+ if (fa_to_delete->fa_state & FA_S_ACCESSED)
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- fib_release_info(fa->fa_info);
- alias_free_mem_rcu(fa);
+ fib_release_info(fa_to_delete->fa_info);
+ alias_free_mem_rcu(fa_to_delete);
return 0;
}
-static int trie_flush_list(struct list_head *head)
+/* Scan for the next leaf starting at the provided key value */
+static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
{
- struct fib_alias *fa, *fa_node;
- int found = 0;
+ struct key_vector *pn, *n = *tn;
+ unsigned long cindex;
- list_for_each_entry_safe(fa, fa_node, head, fa_list) {
- struct fib_info *fi = fa->fa_info;
+ /* this loop is meant to try and find the key in the trie */
+ do {
+ /* record parent and next child index */
+ pn = n;
+ cindex = get_index(key, pn);
- if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
- list_del_rcu(&fa->fa_list);
- fib_release_info(fa->fa_info);
- alias_free_mem_rcu(fa);
- found++;
- }
- }
- return found;
-}
+ if (cindex >> pn->bits)
+ break;
-static int trie_flush_leaf(struct tnode *l)
-{
- int found = 0;
- struct hlist_head *lih = &l->list;
- struct hlist_node *tmp;
- struct leaf_info *li = NULL;
- unsigned char plen = KEYLENGTH;
+ /* descend into the next child */
+ n = get_child_rcu(pn, cindex++);
+ if (!n)
+ break;
+
+ /* guarantee forward progress on the keys */
+ if (IS_LEAF(n) && (n->key >= key))
+ goto found;
+ } while (IS_TNODE(n));
- hlist_for_each_entry_safe(li, tmp, lih, hlist) {
- found += trie_flush_list(&li->falh);
+ /* this loop will search for the next leaf with a greater key */
+ while (!IS_TRIE(pn)) {
+ /* if we exhausted the parent node we will need to climb */
+ if (cindex >= (1ul << pn->bits)) {
+ t_key pkey = pn->key;
- if (list_empty(&li->falh)) {
- hlist_del_rcu(&li->hlist);
- free_leaf_info(li);
+ pn = node_parent_rcu(pn);
+ cindex = get_index(pkey, pn) + 1;
continue;
}
- plen = li->plen;
- }
+ /* grab the next available node */
+ n = get_child_rcu(pn, cindex++);
+ if (!n)
+ continue;
- l->slen = KEYLENGTH - plen;
+ /* no need to compare keys since we bumped the index */
+ if (IS_LEAF(n))
+ goto found;
- return found;
+ /* Rescan start scanning in new node */
+ pn = n;
+ cindex = 0;
+ }
+
+ *tn = pn;
+ return NULL; /* Root of trie */
+found:
+ /* if we are at the limit for keys just return NULL for the tnode */
+ *tn = pn;
+ return n;
}
-/*
- * Scan for the next right leaf starting at node p->child[idx]
- * Since we have back pointer, no recursion necessary.
- */
-static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
{
- do {
- unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0;
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *pn = t->kv;
+ unsigned long cindex = 1;
+ struct hlist_node *tmp;
+ struct fib_alias *fa;
- while (idx < tnode_child_length(p)) {
- c = tnode_get_child_rcu(p, idx++);
- if (!c)
- continue;
+ /* walk trie in reverse order */
+ for (;;) {
+ struct key_vector *n;
+
+ if (!(cindex--)) {
+ t_key pkey = pn->key;
- if (IS_LEAF(c))
- return c;
+ /* cannot resize the trie vector */
+ if (IS_TRIE(pn))
+ break;
+
+ /* no need to resize like in flush below */
+ pn = node_parent(pn);
+ cindex = get_index(pkey, pn);
- /* Rescan start scanning in new node */
- p = c;
- idx = 0;
+ continue;
}
- /* Node empty, walk back up to parent */
- c = p;
- } while ((p = node_parent_rcu(c)) != NULL);
+ /* grab the next available node */
+ n = get_child(pn, cindex);
+ if (!n)
+ continue;
- return NULL; /* Root of trie */
-}
+ if (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
-static struct tnode *trie_firstleaf(struct trie *t)
-{
- struct tnode *n = rcu_dereference_rtnl(t->trie);
+ continue;
+ }
- if (!n)
- return NULL;
+ hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
- if (IS_LEAF(n)) /* trie is just a leaf */
- return n;
+ if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
+ continue;
- return leaf_walk_rcu(n, NULL);
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
+ }
+ }
}
-static struct tnode *trie_nextleaf(struct tnode *l)
+/* Caller must hold RTNL. */
+int fib_table_flush(struct fib_table *tb)
{
- struct tnode *p = node_parent_rcu(l);
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *pn = t->kv;
+ unsigned long cindex = 1;
+ struct hlist_node *tmp;
+ struct fib_alias *fa;
+ int found = 0;
- if (!p)
- return NULL; /* trie with just one leaf */
+ /* walk trie in reverse order */
+ for (;;) {
+ unsigned char slen = 0;
+ struct key_vector *n;
- return leaf_walk_rcu(p, l);
-}
+ if (!(cindex--)) {
+ t_key pkey = pn->key;
-static struct tnode *trie_leafindex(struct trie *t, int index)
-{
- struct tnode *l = trie_firstleaf(t);
+ /* cannot resize the trie vector */
+ if (IS_TRIE(pn))
+ break;
- while (l && index-- > 0)
- l = trie_nextleaf(l);
+ /* resize completed node */
+ pn = resize(t, pn);
+ cindex = get_index(pkey, pn);
- return l;
-}
+ continue;
+ }
+ /* grab the next available node */
+ n = get_child(pn, cindex);
+ if (!n)
+ continue;
-/*
- * Caller must hold RTNL.
- */
-int fib_table_flush(struct fib_table *tb)
-{
- struct trie *t = (struct trie *) tb->tb_data;
- struct tnode *l, *ll = NULL;
- int found = 0;
+ if (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
- for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
- found += trie_flush_leaf(l);
+ continue;
+ }
- if (ll) {
- if (hlist_empty(&ll->list))
- trie_leaf_remove(t, ll);
- else
- leaf_pull_suffix(ll);
+ hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
+ slen = fa->fa_slen;
+ continue;
+ }
+
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
+ hlist_del_rcu(&fa->fa_list);
+ fib_release_info(fa->fa_info);
+ alias_free_mem_rcu(fa);
+ found++;
}
- ll = l;
- }
+ /* update leaf slen */
+ n->slen = slen;
- if (ll) {
- if (hlist_empty(&ll->list))
- trie_leaf_remove(t, ll);
- else
- leaf_pull_suffix(ll);
+ if (hlist_empty(&n->leaf)) {
+ put_child_root(pn, n->key, NULL);
+ node_free(n);
+ } else {
+ leaf_pull_suffix(pn, n);
+ }
}
pr_debug("trie_flush found=%d\n", found);
return found;
}
-void fib_free_table(struct fib_table *tb)
+static void __trie_free_rcu(struct rcu_head *head)
{
+ struct fib_table *tb = container_of(head, struct fib_table, rcu);
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie *t = (struct trie *)tb->tb_data;
@@ -1731,20 +1715,23 @@ void fib_free_table(struct fib_table *tb)
kfree(tb);
}
-static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
- struct fib_table *tb,
- struct sk_buff *skb, struct netlink_callback *cb)
+void fib_free_table(struct fib_table *tb)
+{
+ call_rcu(&tb->rcu, __trie_free_rcu);
+}
+
+static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
+ struct sk_buff *skb, struct netlink_callback *cb)
{
- int i, s_i;
+ __be32 xkey = htonl(l->key);
struct fib_alias *fa;
- __be32 xkey = htonl(key);
+ int i, s_i;
- s_i = cb->args[5];
+ s_i = cb->args[4];
i = 0;
/* rcu_read_lock is hold by caller */
-
- list_for_each_entry_rcu(fa, fah, fa_list) {
+ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (i < s_i) {
i++;
continue;
@@ -1756,41 +1743,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
tb->tb_id,
fa->fa_type,
xkey,
- plen,
+ KEYLENGTH - fa->fa_slen,
fa->fa_tos,
fa->fa_info, NLM_F_MULTI) < 0) {
- cb->args[5] = i;
- return -1;
- }
- i++;
- }
- cb->args[5] = i;
- return skb->len;
-}
-
-static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
- struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct leaf_info *li;
- int i, s_i;
-
- s_i = cb->args[4];
- i = 0;
-
- /* rcu_read_lock is hold by caller */
- hlist_for_each_entry_rcu(li, &l->list, hlist) {
- if (i < s_i) {
- i++;
- continue;
- }
-
- if (i > s_i)
- cb->args[5] = 0;
-
- if (list_empty(&li->falh))
- continue;
-
- if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
cb->args[4] = i;
return -1;
}
@@ -1801,44 +1756,38 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
return skb->len;
}
+/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct tnode *l;
- struct trie *t = (struct trie *) tb->tb_data;
- t_key key = cb->args[2];
- int count = cb->args[3];
-
- rcu_read_lock();
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *l, *tp = t->kv;
/* Dump starting at last key.
* Note: 0.0.0.0/0 (ie default) is first key.
*/
- if (count == 0)
- l = trie_firstleaf(t);
- else {
- /* Normally, continue from last key, but if that is missing
- * fallback to using slow rescan
- */
- l = fib_find_node(t, key);
- if (!l)
- l = trie_leafindex(t, count);
- }
+ int count = cb->args[2];
+ t_key key = cb->args[3];
- while (l) {
- cb->args[2] = l->key;
+ while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
- cb->args[3] = count;
- rcu_read_unlock();
+ cb->args[3] = key;
+ cb->args[2] = count;
return -1;
}
++count;
- l = trie_nextleaf(l);
+ key = l->key + 1;
+
memset(&cb->args[4], 0,
sizeof(cb->args) - 4*sizeof(cb->args[0]));
+
+ /* stop loop if key wrapped back to 0 */
+ if (key < l->key)
+ break;
}
- cb->args[3] = count;
- rcu_read_unlock();
+
+ cb->args[3] = key;
+ cb->args[2] = count;
return skb->len;
}
@@ -1850,19 +1799,16 @@ void __init fib_trie_init(void)
0, SLAB_PANIC, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
- max(sizeof(struct tnode),
- sizeof(struct leaf_info)),
+ LEAF_SIZE,
0, SLAB_PANIC, NULL);
}
-
struct fib_table *fib_trie_table(u32 id)
{
struct fib_table *tb;
struct trie *t;
- tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
- GFP_KERNEL);
+ tb = kzalloc(sizeof(*tb) + sizeof(struct trie), GFP_KERNEL);
if (tb == NULL)
return NULL;
@@ -1871,7 +1817,8 @@ struct fib_table *fib_trie_table(u32 id)
tb->tb_num_default = 0;
t = (struct trie *) tb->tb_data;
- RCU_INIT_POINTER(t->trie, NULL);
+ t->kv[0].pos = KEYLENGTH;
+ t->kv[0].slen = KEYLENGTH;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats = alloc_percpu(struct trie_use_stats);
if (!t->stats) {
@@ -1888,65 +1835,63 @@ struct fib_table *fib_trie_table(u32 id)
struct fib_trie_iter {
struct seq_net_private p;
struct fib_table *tb;
- struct tnode *tnode;
+ struct key_vector *tnode;
unsigned int index;
unsigned int depth;
};
-static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
+static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
{
unsigned long cindex = iter->index;
- struct tnode *tn = iter->tnode;
- struct tnode *p;
-
- /* A single entry routing table */
- if (!tn)
- return NULL;
+ struct key_vector *pn = iter->tnode;
+ t_key pkey;
pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
iter->tnode, iter->index, iter->depth);
-rescan:
- while (cindex < tnode_child_length(tn)) {
- struct tnode *n = tnode_get_child_rcu(tn, cindex);
- if (n) {
+ while (!IS_TRIE(pn)) {
+ while (cindex < child_length(pn)) {
+ struct key_vector *n = get_child_rcu(pn, cindex++);
+
+ if (!n)
+ continue;
+
if (IS_LEAF(n)) {
- iter->tnode = tn;
- iter->index = cindex + 1;
+ iter->tnode = pn;
+ iter->index = cindex;
} else {
/* push down one level */
iter->tnode = n;
iter->index = 0;
++iter->depth;
}
+
return n;
}
- ++cindex;
- }
-
- /* Current node exhausted, pop back up */
- p = node_parent_rcu(tn);
- if (p) {
- cindex = get_index(tn->key, p) + 1;
- tn = p;
+ /* Current node exhausted, pop back up */
+ pkey = pn->key;
+ pn = node_parent_rcu(pn);
+ cindex = get_index(pkey, pn) + 1;
--iter->depth;
- goto rescan;
}
- /* got root? */
+ /* record root node so further searches know we are done */
+ iter->tnode = pn;
+ iter->index = 0;
+
return NULL;
}
-static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
- struct trie *t)
+static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
+ struct trie *t)
{
- struct tnode *n;
+ struct key_vector *n, *pn = t->kv;
if (!t)
return NULL;
- n = rcu_dereference(t->trie);
+ n = rcu_dereference(pn->tnode[0]);
if (!n)
return NULL;
@@ -1955,7 +1900,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
iter->index = 0;
iter->depth = 1;
} else {
- iter->tnode = NULL;
+ iter->tnode = pn;
iter->index = 0;
iter->depth = 0;
}
@@ -1965,7 +1910,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
static void trie_collect_stats(struct trie *t, struct trie_stat *s)
{
- struct tnode *n;
+ struct key_vector *n;
struct fib_trie_iter iter;
memset(s, 0, sizeof(*s));
@@ -1973,20 +1918,20 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
rcu_read_lock();
for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
if (IS_LEAF(n)) {
- struct leaf_info *li;
+ struct fib_alias *fa;
s->leaves++;
s->totdepth += iter.depth;
if (iter.depth > s->maxdepth)
s->maxdepth = iter.depth;
- hlist_for_each_entry_rcu(li, &n->list, hlist)
+ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
++s->prefixes;
} else {
s->tnodes++;
if (n->bits < MAX_STAT_DEPTH)
s->nodesizes[n->bits]++;
- s->nullpointers += n->empty_children;
+ s->nullpointers += tn_info(n)->empty_children;
}
}
rcu_read_unlock();
@@ -2009,13 +1954,13 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
- bytes = sizeof(struct tnode) * stat->leaves;
+ bytes = LEAF_SIZE * stat->leaves;
seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
- bytes += sizeof(struct leaf_info) * stat->prefixes;
+ bytes += sizeof(struct fib_alias) * stat->prefixes;
seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
- bytes += sizeof(struct tnode) * stat->tnodes;
+ bytes += TNODE_SIZE(0) * stat->tnodes;
max = MAX_STAT_DEPTH;
while (max > 0 && stat->nodesizes[max-1] == 0)
@@ -2030,7 +1975,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
seq_putc(seq, '\n');
seq_printf(seq, "\tPointers: %u\n", pointers);
- bytes += sizeof(struct tnode *) * pointers;
+ bytes += sizeof(struct key_vector *) * pointers;
seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
}
@@ -2084,7 +2029,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"Basic info: size of leaf:"
" %Zd bytes, size of tnode: %Zd bytes.\n",
- sizeof(struct tnode), sizeof(struct tnode));
+ LEAF_SIZE, TNODE_SIZE(0));
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
@@ -2123,7 +2068,7 @@ static const struct file_operations fib_triestat_fops = {
.release = single_release_net,
};
-static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
+static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
{
struct fib_trie_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
@@ -2135,7 +2080,7 @@ static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
struct fib_table *tb;
hlist_for_each_entry_rcu(tb, head, tb_hlist) {
- struct tnode *n;
+ struct key_vector *n;
for (n = fib_trie_get_first(iter,
(struct trie *) tb->tb_data);
@@ -2164,7 +2109,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct fib_table *tb = iter->tb;
struct hlist_node *tb_node;
unsigned int h;
- struct tnode *n;
+ struct key_vector *n;
++*pos;
/* next node in same table */
@@ -2250,9 +2195,9 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
static int fib_trie_seq_show(struct seq_file *seq, void *v)
{
const struct fib_trie_iter *iter = seq->private;
- struct tnode *n = v;
+ struct key_vector *n = v;
- if (!node_parent_rcu(n))
+ if (IS_TRIE(node_parent_rcu(n)))
fib_table_print(seq, iter->tb);
if (IS_TNODE(n)) {
@@ -2261,30 +2206,28 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
seq_indent(seq, iter->depth-1);
seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
&prf, KEYLENGTH - n->pos - n->bits, n->bits,
- n->full_children, n->empty_children);
+ tn_info(n)->full_children,
+ tn_info(n)->empty_children);
} else {
- struct leaf_info *li;
__be32 val = htonl(n->key);
+ struct fib_alias *fa;
seq_indent(seq, iter->depth);
seq_printf(seq, " |-- %pI4\n", &val);
- hlist_for_each_entry_rcu(li, &n->list, hlist) {
- struct fib_alias *fa;
-
- list_for_each_entry_rcu(fa, &li->falh, fa_list) {
- char buf1[32], buf2[32];
-
- seq_indent(seq, iter->depth+1);
- seq_printf(seq, " /%d %s %s", li->plen,
- rtn_scope(buf1, sizeof(buf1),
- fa->fa_info->fib_scope),
- rtn_type(buf2, sizeof(buf2),
- fa->fa_type));
- if (fa->fa_tos)
- seq_printf(seq, " tos=%d", fa->fa_tos);
- seq_putc(seq, '\n');
- }
+ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
+ char buf1[32], buf2[32];
+
+ seq_indent(seq, iter->depth + 1);
+ seq_printf(seq, " /%zu %s %s",
+ KEYLENGTH - fa->fa_slen,
+ rtn_scope(buf1, sizeof(buf1),
+ fa->fa_info->fib_scope),
+ rtn_type(buf2, sizeof(buf2),
+ fa->fa_type));
+ if (fa->fa_tos)
+ seq_printf(seq, " tos=%d", fa->fa_tos);
+ seq_putc(seq, '\n');
}
}
@@ -2314,31 +2257,47 @@ static const struct file_operations fib_trie_fops = {
struct fib_route_iter {
struct seq_net_private p;
- struct trie *main_trie;
+ struct fib_table *main_tb;
+ struct key_vector *tnode;
loff_t pos;
t_key key;
};
-static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
+static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+ loff_t pos)
{
- struct tnode *l = NULL;
- struct trie *t = iter->main_trie;
+ struct fib_table *tb = iter->main_tb;
+ struct key_vector *l, **tp = &iter->tnode;
+ struct trie *t;
+ t_key key;
- /* use cache location of last found key */
- if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
+ /* use cache location of next-to-find key */
+ if (iter->pos > 0 && pos >= iter->pos) {
pos -= iter->pos;
- else {
+ key = iter->key;
+ } else {
+ t = (struct trie *)tb->tb_data;
+ iter->tnode = t->kv;
iter->pos = 0;
- l = trie_firstleaf(t);
+ key = 0;
}
- while (l && pos-- > 0) {
+ while ((l = leaf_walk_rcu(tp, key)) != NULL) {
+ key = l->key + 1;
iter->pos++;
- l = trie_nextleaf(l);
+
+ if (pos-- <= 0)
+ break;
+
+ l = NULL;
+
+ /* handle unlikely case of a key wrap */
+ if (!key)
+ break;
}
if (l)
- iter->key = pos; /* remember it */
+ iter->key = key; /* remember it */
else
iter->pos = 0; /* forget it */
@@ -2350,37 +2309,46 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
{
struct fib_route_iter *iter = seq->private;
struct fib_table *tb;
+ struct trie *t;
rcu_read_lock();
+
tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
if (!tb)
return NULL;
- iter->main_trie = (struct trie *) tb->tb_data;
- if (*pos == 0)
- return SEQ_START_TOKEN;
- else
- return fib_route_get_idx(iter, *pos - 1);
+ iter->main_tb = tb;
+
+ if (*pos != 0)
+ return fib_route_get_idx(iter, *pos);
+
+ t = (struct trie *)tb->tb_data;
+ iter->tnode = t->kv;
+ iter->pos = 0;
+ iter->key = 0;
+
+ return SEQ_START_TOKEN;
}
static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct fib_route_iter *iter = seq->private;
- struct tnode *l = v;
+ struct key_vector *l = NULL;
+ t_key key = iter->key;
++*pos;
- if (v == SEQ_START_TOKEN) {
- iter->pos = 0;
- l = trie_firstleaf(iter->main_trie);
- } else {
+
+ /* only allow key of 0 for start of sequence */
+ if ((v == SEQ_START_TOKEN) || key)
+ l = leaf_walk_rcu(&iter->tnode, key);
+
+ if (l) {
+ iter->key = l->key + 1;
iter->pos++;
- l = trie_nextleaf(l);
+ } else {
+ iter->pos = 0;
}
- if (l)
- iter->key = l->key;
- else
- iter->pos = 0;
return l;
}
@@ -2412,8 +2380,9 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info
*/
static int fib_route_seq_show(struct seq_file *seq, void *v)
{
- struct tnode *l = v;
- struct leaf_info *li;
+ struct fib_alias *fa;
+ struct key_vector *l = v;
+ __be32 prefix;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2422,45 +2391,40 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
return 0;
}
- hlist_for_each_entry_rcu(li, &l->list, hlist) {
- struct fib_alias *fa;
- __be32 mask, prefix;
+ prefix = htonl(l->key);
- mask = inet_make_mask(li->plen);
- prefix = htonl(l->key);
+ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ const struct fib_info *fi = fa->fa_info;
+ __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
+ unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
- list_for_each_entry_rcu(fa, &li->falh, fa_list) {
- const struct fib_info *fi = fa->fa_info;
- unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
+ if ((fa->fa_type == RTN_BROADCAST) ||
+ (fa->fa_type == RTN_MULTICAST))
+ continue;
- if (fa->fa_type == RTN_BROADCAST
- || fa->fa_type == RTN_MULTICAST)
- continue;
+ seq_setwidth(seq, 127);
+
+ if (fi)
+ seq_printf(seq,
+ "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u",
+ fi->fib_dev ? fi->fib_dev->name : "*",
+ prefix,
+ fi->fib_nh->nh_gw, flags, 0, 0,
+ fi->fib_priority,
+ mask,
+ (fi->fib_advmss ?
+ fi->fib_advmss + 40 : 0),
+ fi->fib_window,
+ fi->fib_rtt >> 3);
+ else
+ seq_printf(seq,
+ "*\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u",
+ prefix, 0, flags, 0, 0, 0,
+ mask, 0, 0, 0);
- seq_setwidth(seq, 127);
-
- if (fi)
- seq_printf(seq,
- "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
- "%d\t%08X\t%d\t%u\t%u",
- fi->fib_dev ? fi->fib_dev->name : "*",
- prefix,
- fi->fib_nh->nh_gw, flags, 0, 0,
- fi->fib_priority,
- mask,
- (fi->fib_advmss ?
- fi->fib_advmss + 40 : 0),
- fi->fib_window,
- fi->fib_rtt >> 3);
- else
- seq_printf(seq,
- "*\t%08X\t%08X\t%04X\t%d\t%u\t"
- "%d\t%08X\t%d\t%u\t%u",
- prefix, 0, flags, 0, 0, 0,
- mask, 0, 0, 0);
-
- seq_pad(seq, '\n');
- }
+ seq_pad(seq, '\n');
}
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 666cf364df86..5cb1ef4ce292 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -97,6 +97,7 @@
#include <net/route.h>
#include <net/sock.h>
#include <net/checksum.h>
+#include <net/inet_common.h>
#include <linux/netfilter_ipv4.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
@@ -1849,30 +1850,25 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
pmc->sfcount[MCAST_EXCLUDE] = 1;
}
-
-/*
- * Join a multicast group
- */
-int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
+int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
{
- int err;
__be32 addr = imr->imr_multiaddr.s_addr;
- struct ip_mc_socklist *iml = NULL, *i;
+ struct ip_mc_socklist *iml, *i;
struct in_device *in_dev;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
int ifindex;
int count = 0;
+ int err;
+
+ ASSERT_RTNL();
if (!ipv4_is_multicast(addr))
return -EINVAL;
- rtnl_lock();
-
in_dev = ip_mc_find_dev(net, imr);
if (!in_dev) {
- iml = NULL;
err = -ENODEV;
goto done;
}
@@ -1900,9 +1896,22 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
ip_mc_inc_group(in_dev, addr);
err = 0;
done:
- rtnl_unlock();
return err;
}
+EXPORT_SYMBOL(__ip_mc_join_group);
+
+/* Join a multicast group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __ip_mc_join_group(sk, imr);
+ rtnl_unlock();
+
+ return ret;
+}
EXPORT_SYMBOL(ip_mc_join_group);
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
@@ -1925,11 +1934,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
return err;
}
-/*
- * Ask a socket to leave a group.
- */
-
-int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *iml;
@@ -1940,7 +1945,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
u32 ifindex;
int ret = -EADDRNOTAVAIL;
- rtnl_lock();
+ ASSERT_RTNL();
+
in_dev = ip_mc_find_dev(net, imr);
if (!in_dev) {
ret = -ENODEV;
@@ -1964,14 +1970,25 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
ip_mc_dec_group(in_dev, group);
- rtnl_unlock();
+
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
out:
+ return ret;
+}
+EXPORT_SYMBOL(__ip_mc_leave_group);
+
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __ip_mc_leave_group(sk, imr);
rtnl_unlock();
+
return ret;
}
EXPORT_SYMBOL(ip_mc_leave_group);
@@ -2724,6 +2741,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
static int __net_init igmp_net_init(struct net *net)
{
struct proc_dir_entry *pde;
+ int err;
pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
if (!pde)
@@ -2732,8 +2750,18 @@ static int __net_init igmp_net_init(struct net *net)
&igmp_mcf_seq_fops);
if (!pde)
goto out_mcfilter;
+ err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
+ SOCK_DGRAM, 0, net);
+ if (err < 0) {
+ pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n",
+ err);
+ goto out_sock;
+ }
+
return 0;
+out_sock:
+ remove_proc_entry("mcfilter", net->proc_net);
out_mcfilter:
remove_proc_entry("igmp", net->proc_net);
out_igmp:
@@ -2744,6 +2772,7 @@ static void __net_exit igmp_net_exit(struct net *net)
{
remove_proc_entry("mcfilter", net->proc_net);
remove_proc_entry("igmp", net->proc_net);
+ inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk);
}
static struct pernet_operations igmp_net_ops = {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 81751f12645f..0c974d3499ed 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -508,7 +508,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
}
entry.sport = inet->inet_num;
entry.dport = ntohs(inet->inet_dport);
- entry.userlocks = sk->sk_userlocks;
+ entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0;
return inet_diag_bc_run(bc, &entry);
}
@@ -642,37 +642,44 @@ static int inet_csk_diag_dump(struct sock *sk,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
+static void twsk_build_assert(void)
+{
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
+ offsetof(struct sock, sk_family));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
+ offsetof(struct inet_sock, inet_num));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
+ offsetof(struct inet_sock, inet_dport));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
+ offsetof(struct inet_sock, inet_rcv_saddr));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
+ offsetof(struct inet_sock, inet_daddr));
+
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
+ offsetof(struct sock, sk_v6_rcv_saddr));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
+ offsetof(struct sock, sk_v6_daddr));
+#endif
+}
+
static int inet_twsk_diag_dump(struct sock *sk,
struct sk_buff *skb,
struct netlink_callback *cb,
struct inet_diag_req_v2 *r,
const struct nlattr *bc)
{
- struct inet_timewait_sock *tw = inet_twsk(sk);
+ twsk_build_assert();
- if (bc != NULL) {
- struct inet_diag_entry entry;
-
- entry.family = tw->tw_family;
-#if IS_ENABLED(CONFIG_IPV6)
- if (tw->tw_family == AF_INET6) {
- entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32;
- entry.daddr = tw->tw_v6_daddr.s6_addr32;
- } else
-#endif
- {
- entry.saddr = &tw->tw_rcv_saddr;
- entry.daddr = &tw->tw_daddr;
- }
- entry.sport = tw->tw_num;
- entry.dport = ntohs(tw->tw_dport);
- entry.userlocks = 0;
-
- if (!inet_diag_bc_run(bc, &entry))
- return 0;
- }
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
- return inet_twsk_diag_fill(tw, skb, r,
+ return inet_twsk_diag_fill(inet_twsk(sk), skb, r,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 59f883d9cadf..fb20f363151f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,24 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
-config NF_LOG_ARP
- tristate "ARP packet logging"
- default m if NETFILTER_ADVANCED=n
- select NF_LOG_COMMON
-
-config NF_LOG_IPV4
- tristate "IPv4 packet logging"
- default m if NETFILTER_ADVANCED=n
- select NF_LOG_COMMON
+if NF_TABLES
config NF_TABLES_IPV4
- depends on NF_TABLES
tristate "IPv4 nf_tables support"
help
This option enables the IPv4 support for nf_tables.
+if NF_TABLES_IPV4
+
config NFT_CHAIN_ROUTE_IPV4
- depends on NF_TABLES_IPV4
tristate "IPv4 nf_tables route chain support"
help
This option enables the "route" chain for IPv4 in nf_tables. This
@@ -61,22 +53,34 @@ config NFT_CHAIN_ROUTE_IPV4
fields such as the source, destination, type of service and
the packet mark.
-config NF_REJECT_IPV4
- tristate "IPv4 packet rejection"
- default m if NETFILTER_ADVANCED=n
-
config NFT_REJECT_IPV4
- depends on NF_TABLES_IPV4
select NF_REJECT_IPV4
default NFT_REJECT
tristate
+endif # NF_TABLES_IPV4
+
config NF_TABLES_ARP
- depends on NF_TABLES
tristate "ARP nf_tables support"
help
This option enables the ARP support for nf_tables.
+endif # NF_TABLES
+
+config NF_LOG_ARP
+ tristate "ARP packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_LOG_IPV4
+ tristate "IPv4 packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_REJECT_IPV4
+ tristate "IPv4 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
config NF_NAT_IPV4
tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e90f83a3415b..f75e9df5e017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
+
+ if (!par->net->xt.clusterip_deprecated_warning) {
+ pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
+ "use xt_cluster instead\n");
+ par->net->xt.clusterip_deprecated_warning = true;
+ }
+
return ret;
}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 8f48f5517e33..87907d4bd259 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,31 +34,32 @@ static unsigned int
reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_reject_info *reject = par->targinfo;
+ int hook = par->hooknum;
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- nf_send_unreach(skb, ICMP_NET_UNREACH);
+ nf_send_unreach(skb, ICMP_NET_UNREACH, hook);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- nf_send_unreach(skb, ICMP_HOST_UNREACH);
+ nf_send_unreach(skb, ICMP_HOST_UNREACH, hook);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- nf_send_unreach(skb, ICMP_PROT_UNREACH);
+ nf_send_unreach(skb, ICMP_PROT_UNREACH, hook);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- nf_send_unreach(skb, ICMP_PORT_UNREACH);
+ nf_send_unreach(skb, ICMP_PORT_UNREACH, hook);
break;
case IPT_ICMP_NET_PROHIBITED:
- nf_send_unreach(skb, ICMP_NET_ANO);
+ nf_send_unreach(skb, ICMP_NET_ANO, hook);
break;
case IPT_ICMP_HOST_PROHIBITED:
- nf_send_unreach(skb, ICMP_HOST_ANO);
+ nf_send_unreach(skb, ICMP_HOST_ANO, hook);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- nf_send_unreach(skb, ICMP_PKT_FILTERED);
+ nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
break;
case IPT_TCP_RESET:
- nf_send_reset(skb, par->hooknum);
+ nf_send_reset(skb, hook);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 536da7bc598a..b7405eb7f1ef 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -164,4 +164,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
}
EXPORT_SYMBOL_GPL(nf_send_reset);
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
+{
+ struct iphdr *iph = ip_hdr(skb_in);
+ u8 proto;
+
+ if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+ return;
+
+ if (skb_csum_unnecessary(skb_in)) {
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+ return;
+ }
+
+ if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
+ proto = iph->protocol;
+ else
+ proto = 0;
+
+ if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index d729542bd1b7..16a5d4d73d75 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,8 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 208d5439e59b..fd88f868776f 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -692,8 +692,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
}
EXPORT_SYMBOL_GPL(ping_common_sendmsg);
-static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
+static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct net *net = sock_net(sk);
struct flowi4 fl4;
@@ -849,8 +848,8 @@ do_confirm:
goto out;
}
-int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+ int flags, int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
int family = sk->sk_family;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f027a708b7e0..923cf538fce1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -481,8 +481,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
}
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
@@ -709,8 +708,8 @@ out: return ret;
* we return it, otherwise we block.
*/
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ad5064362c5c..649c8a3f0189 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -152,7 +152,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
.check = ipv4_dst_check,
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
@@ -2225,7 +2224,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
static struct dst_ops ipv4_dst_blackhole_ops = {
.family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
.check = ipv4_blackhole_dst_check,
.mtu = ipv4_blackhole_mtu,
.default_advmss = ipv4_default_advmss,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d151539da8e6..fdf899163d44 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,20 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tcp_probe_threshold",
+ .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_probe_interval",
+ .data = &init_net.ipv4.sysctl_tcp_probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 995a2259bcfc..62f38428279a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1060,8 +1060,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
return err;
}
-int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t size)
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -1539,8 +1538,8 @@ EXPORT_SYMBOL(tcp_read_sock);
* Probably, code can be easily improved even more.
*/
-int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+ int flags, int *addr_len)
{
struct tcp_sock *tp = tcp_sk(sk);
int copied = 0;
@@ -3001,12 +3000,11 @@ static void __init tcp_init_mem(void)
void __init tcp_init(void)
{
- struct sk_buff *skb = NULL;
unsigned long limit;
int max_rshare, max_wshare, cnt;
unsigned int i;
- BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d694088214cd..d4c3a5e66380 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -83,7 +83,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
ret = -EEXIST;
} else {
list_add_tail_rcu(&ca->list, &tcp_cong_list);
- pr_info("%s registered\n", ca->name);
+ pr_debug("%s registered\n", ca->name);
}
spin_unlock(&tcp_cong_list_lock);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ea82fd492c1b..fe77417fc137 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -221,7 +221,6 @@ static bool tcp_fastopen_create_child(struct sock *sk,
WARN_ON(req->sk == NULL);
return true;
}
-EXPORT_SYMBOL(tcp_fastopen_create_child);
static bool tcp_fastopen_queue_check(struct sock *sk)
{
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed4783b..f0c6fc32bfa8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2460,6 +2460,8 @@ static int __net_init tcp_sk_init(struct net *net)
}
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
+ net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
return 0;
fail:
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9d7930ba8e0f..3f7c2fca5431 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -29,8 +29,8 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
}
}
-struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..5a73ad5afaf7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1354,6 +1354,8 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_af_ops->net_header_len;
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
+ if (icsk->icsk_mtup.enabled)
+ icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
}
EXPORT_SYMBOL(tcp_mtup_init);
@@ -1752,20 +1754,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
bool *is_cwnd_limited, u32 max_segs)
{
- struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 send_win, cong_win, limit, in_flight;
+ u32 age, send_win, cong_win, limit, in_flight;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct skb_mstamp now;
+ struct sk_buff *head;
int win_divisor;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto send_now;
- if (icsk->icsk_ca_state != TCP_CA_Open)
+ if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))
goto send_now;
- /* Defer for less than two clock ticks. */
- if (tp->tso_deferred &&
- (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
+ /* Avoid bursty behavior by allowing defer
+ * only if the last write was recent.
+ */
+ if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1807,11 +1812,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
goto send_now;
}
- /* Ok, it looks like it is advisable to defer.
- * Do not rearm the timer if already set to not break TCP ACK clocking.
- */
- if (!tp->tso_deferred)
- tp->tso_deferred = 1 | (jiffies << 1);
+ head = tcp_write_queue_head(sk);
+ skb_mstamp_get(&now);
+ age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+ /* If next ACK is likely to come too late (half srtt), do not defer */
+ if (age < (tp->srtt_us >> 4))
+ goto send_now;
+
+ /* Ok, it looks like it is advisable to defer. */
if (cong_win < send_win && cong_win < skb->len)
*is_cwnd_limited = true;
@@ -1819,10 +1827,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
return true;
send_now:
- tp->tso_deferred = 0;
return false;
}
+static inline void tcp_mtu_check_reprobe(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
+ u32 interval;
+ s32 delta;
+
+ interval = net->ipv4.sysctl_tcp_probe_interval;
+ delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+ if (unlikely(delta >= interval * HZ)) {
+ int mss = tcp_current_mss(sk);
+
+ /* Update current search range */
+ icsk->icsk_mtup.probe_size = 0;
+ icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+ sizeof(struct tcphdr) +
+ icsk->icsk_af_ops->net_header_len;
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+
+ /* Update probe time stamp */
+ icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ }
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -1837,11 +1869,13 @@ static int tcp_mtu_probe(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb, *nskb, *next;
+ struct net *net = sock_net(sk);
int len;
int probe_size;
int size_needed;
int copy;
int mss_now;
+ int interval;
/* Not currently probing/verifying,
* not in recovery,
@@ -1854,12 +1888,25 @@ static int tcp_mtu_probe(struct sock *sk)
tp->rx_opt.num_sacks || tp->rx_opt.dsack)
return -1;
- /* Very simple search strategy: just double the MSS. */
+ /* Use binary search for probe_size between tcp_mss_base,
+ * and current mss_clamp. if (search_high - search_low)
+ * smaller than a threshold, backoff from probing.
+ */
mss_now = tcp_current_mss(sk);
- probe_size = 2 * tp->mss_cache;
+ probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
+ icsk->icsk_mtup.search_low) >> 1);
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
- if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
- /* TODO: set timer for probe_converge_event */
+ interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+ /* When misfortune happens, we are reprobing actively,
+ * and then reprobe timer has expired. We stick with current
+ * probing process by not resetting search range to its orignal.
+ */
+ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+ interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ /* Check whether enough time has elaplased for
+ * another round of probing.
+ */
+ tcp_mtu_check_reprobe(sk);
return -1;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0732b787904e..15505936511d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
if (net->ipv4.sysctl_tcp_mtu_probing) {
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.enabled = 1;
+ icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct net *net = sock_net(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 97ef1f8b7be8..f27556e2158b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -873,8 +873,7 @@ out:
}
EXPORT_SYMBOL(udp_push_pending_frames);
-int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct udp_sock *up = udp_sk(sk);
@@ -1136,7 +1135,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
* sendpage interface can't pass.
* This will succeed only when the socket is connected.
*/
- ret = udp_sendmsg(NULL, sk, &msg, 0);
+ ret = udp_sendmsg(sk, &msg, 0);
if (ret < 0)
return ret;
}
@@ -1254,8 +1253,8 @@ EXPORT_SYMBOL(udp_ioctl);
* return it, otherwise we block.
*/
-int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+ int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
@@ -2525,6 +2524,16 @@ void __init udp_table_init(struct udp_table *table, const char *name)
}
}
+u32 udp_flow_hashrnd(void)
+{
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+ return hashrnd;
+}
+EXPORT_SYMBOL(udp_flow_hashrnd);
+
void __init udp_init(void)
{
unsigned long limit;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index f3c27899f62b..7e0fe4bdd967 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname,
int compat_udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#endif
-int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+ int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6156f68a1e90..c224c856247b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -232,7 +232,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm4_dst_ops = {
.family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b6030025f411..88d2cf0cae52 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2464,6 +2464,23 @@ err_exit:
return err;
}
+static int ipv6_mc_config(struct sock *sk, bool join,
+ const struct in6_addr *addr, int ifindex)
+{
+ int ret;
+
+ ASSERT_RTNL();
+
+ lock_sock(sk);
+ if (join)
+ ret = __ipv6_sock_mc_join(sk, ifindex, addr);
+ else
+ ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+ release_sock(sk);
+
+ return ret;
+}
+
/*
* Manual configuration of address on an interface
*/
@@ -2476,10 +2493,10 @@ static int inet6_addr_add(struct net *net, int ifindex,
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
+ unsigned long timeout;
+ clock_t expires;
int scope;
u32 flags;
- clock_t expires;
- unsigned long timeout;
ASSERT_RTNL();
@@ -2501,6 +2518,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (IS_ERR(idev))
return PTR_ERR(idev);
+ if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+ true, pfx, ifindex);
+
+ if (ret < 0)
+ return ret;
+ }
+
scope = ipv6_addr_scope(pfx);
timeout = addrconf_timeout_fixup(valid_lft, HZ);
@@ -2542,6 +2567,9 @@ static int inet6_addr_add(struct net *net, int ifindex,
in6_ifa_put(ifp);
addrconf_verify_rtnl();
return 0;
+ } else if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+ false, pfx, ifindex);
}
return PTR_ERR(ifp);
@@ -2578,6 +2606,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
jiffies);
ipv6_del_addr(ifp);
addrconf_verify_rtnl();
+ if (ipv6_addr_is_multicast(pfx)) {
+ ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+ false, pfx, dev->ifindex);
+ }
return 0;
}
}
@@ -3945,7 +3977,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (ifa == NULL) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e8c4400f23e9..6bafcc2c79e3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -824,7 +824,7 @@ static int __init inet6_init(void)
struct list_head *r;
int err = 0;
- BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+ sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
/* Register the socket-side information for inet6_create. */
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 266a264ec212..88300d42fc95 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -64,12 +64,6 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
-#ifdef IP6_TNL_DEBUG
-#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
-#else
-#define IP6_TNL_TRACE(x...) do {;} while(0)
-#endif
-
#define HASH_SIZE_SHIFT 5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce107c8aab3..1dd1fedff9f4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
@@ -140,6 +140,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct net *net = sock_net(sk);
int err;
+ ASSERT_RTNL();
+
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -161,7 +163,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->next = NULL;
mc_lst->addr = *addr;
- rtnl_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
@@ -173,7 +174,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
- rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
}
@@ -190,7 +190,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = ipv6_dev_mc_inc(dev, addr);
if (err) {
- rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
@@ -198,25 +197,37 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->next = np->ipv6_mc_list;
rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+ return 0;
+}
+EXPORT_SYMBOL(__ipv6_sock_mc_join);
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __ipv6_sock_mc_join(sk, ifindex, addr);
rtnl_unlock();
- return 0;
+ return ret;
}
+EXPORT_SYMBOL(ipv6_sock_mc_join);
/*
* socket leave on multicast group
*/
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst;
struct ipv6_mc_socklist __rcu **lnk;
struct net *net = sock_net(sk);
+ ASSERT_RTNL();
+
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
- rtnl_lock();
for (lnk = &np->ipv6_mc_list;
(mc_lst = rtnl_dereference(*lnk)) != NULL;
lnk = &mc_lst->next) {
@@ -235,17 +246,28 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
- rtnl_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
return 0;
}
}
- rtnl_unlock();
return -EADDRNOTAVAIL;
}
+EXPORT_SYMBOL(__ipv6_sock_mc_drop);
+
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+ rtnl_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_sock_mc_drop);
/* called with rcu_read_lock() */
static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
@@ -2907,20 +2929,32 @@ static int __net_init igmp6_net_init(struct net *net)
inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
+ err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
+ if (err < 0) {
+ pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n",
+ err);
+ goto out_sock_create;
+ }
+
err = igmp6_proc_init(net);
if (err)
- goto out_sock_create;
-out:
- return err;
+ goto out_sock_create_autojoin;
+ return 0;
+
+out_sock_create_autojoin:
+ inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
out_sock_create:
inet_ctl_sock_destroy(net->ipv6.igmp_sk);
- goto out;
+out:
+ return err;
}
static void __net_exit igmp6_net_exit(struct net *net)
{
inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+ inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
igmp6_proc_exit(net);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 471ed24aabae..247ad7c298f7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -84,6 +84,7 @@ do { \
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
+static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -117,7 +118,9 @@ static const struct neigh_ops ndisc_direct_ops = {
struct neigh_table nd_tbl = {
.family = AF_INET6,
.key_len = sizeof(struct in6_addr),
+ .protocol = cpu_to_be16(ETH_P_IPV6),
.hash = ndisc_hash,
+ .key_eq = ndisc_key_eq,
.constructor = ndisc_constructor,
.pconstructor = pndisc_constructor,
.pdestructor = pndisc_destructor,
@@ -294,6 +297,11 @@ static u32 ndisc_hash(const void *pkey,
return ndisc_hashfn(pkey, dev, hash_rnd);
}
+static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
+{
+ return neigh_key_eq128(n, pkey);
+}
+
static int ndisc_constructor(struct neighbour *neigh)
{
struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a069822936e6..ca6998345b42 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
+if NF_TABLES
+
config NF_TABLES_IPV6
- depends on NF_TABLES
tristate "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
+if NF_TABLES_IPV6
+
config NFT_CHAIN_ROUTE_IPV6
- depends on NF_TABLES_IPV6
tristate "IPv6 nf_tables route chain support"
help
This option enables the "route" chain for IPv6 in nf_tables. This
@@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
-config NF_REJECT_IPV6
- tristate "IPv6 packet rejection"
- default m if NETFILTER_ADVANCED=n
-
config NFT_REJECT_IPV6
- depends on NF_TABLES_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
tristate
+endif # NF_TABLES_IPV6
+endif # NF_TABLES
+
+config NF_REJECT_IPV6
+ tristate "IPv6 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
config NF_LOG_IPV6
tristate "IPv6 packet logging"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index d05b36440e8b..68e0bb4db1bf 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -208,4 +208,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
+static bool reject6_csum_ok(struct sk_buff *skb, int hook)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int thoff;
+ __be16 fo;
+ u8 proto;
+
+ if (skb->csum_bad)
+ return false;
+
+ if (skb_csum_unnecessary(skb))
+ return true;
+
+ proto = ip6h->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+
+ if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+ return false;
+
+ return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
+}
+
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
+ unsigned char code, unsigned int hooknum)
+{
+ if (!reject6_csum_ok(skb_in, hooknum))
+ return;
+
+ if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+ skb_in->dev = net->loopback_dev;
+
+ icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach6);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index a2dfff6ff227..263a5164a6f5 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
return 0;
}
-int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
+int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dae7f1a1e464..a5287b3582a4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,7 +32,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/skbuff.h>
#include <linux/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/net_namespace.h>
@@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
* we return it, otherwise we block.
*/
-static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
}
-static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len)
+static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4688bd4d7f59..06fa819c43c9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -194,7 +194,6 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
- .protocol = cpu_to_be16(ETH_P_IPV6),
.gc = ip6_dst_gc,
.gc_thresh = 1024,
.check = ip6_dst_check,
@@ -236,7 +235,6 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
- .protocol = cpu_to_be16(ETH_P_IPV6),
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
.mtu = ip6_blackhole_mtu,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index c1ab77105b4c..d883c9204c01 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -41,8 +41,8 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return tcp_gro_complete(skb);
}
-struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct tcphdr *th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d048d46779fc..70568a4548e4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -391,8 +391,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup);
* return it, otherwise we block.
*/
-int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len,
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1101,8 +1100,7 @@ out:
return err;
}
-int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len)
+int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
struct udp_sock *up = udp_sk(sk);
@@ -1164,12 +1162,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
do_udp_sendmsg:
if (__ipv6_only_sock(sk))
return -ENETUNREACH;
- return udp_sendmsg(iocb, sk, msg, len);
+ return udp_sendmsg(sk, msg, len);
}
}
if (up->pending == AF_INET)
- return udp_sendmsg(iocb, sk, msg, len);
+ return udp_sendmsg(sk, msg, len);
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index c779c3c90b9d..0682c031ccdc 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#endif
-int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len);
-int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len);
+int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+ int flags, int *addr_len);
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 48bf5a06847b..8ddf2b545151 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -292,7 +292,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm6_dst_ops = {
.family = AF_INET6,
- .protocol = cpu_to_be16(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
.redirect = xfrm6_redirect,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f11ad1d95e0e..4ea5d7497b5f 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1688,8 +1688,7 @@ out:
return rc;
}
-static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct ipx_sock *ipxs = ipx_sk(sk);
@@ -1754,8 +1753,8 @@ out:
}
-static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct ipx_sock *ipxs = ipx_sk(sk);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 568edc72d737..ee0ea25c8e7a 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock)
}
/*
- * Function irda_sendmsg (iocb, sock, msg, len)
+ * Function irda_sendmsg (sock, msg, len)
*
* Send message down to TinyTP. This function is used for both STREAM and
* SEQPACK services. This is possible since it forces the client to
* fragment the message if necessary
*/
-static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct irda_sock *self;
@@ -1348,13 +1347,13 @@ out:
}
/*
- * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags)
+ * Function irda_recvmsg_dgram (sock, msg, size, flags)
*
* Try to receive message and copy it to user. The frame is discarded
* after being read, regardless of how much the user actually read
*/
-static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
@@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
}
/*
- * Function irda_recvmsg_stream (iocb, sock, msg, size, flags)
+ * Function irda_recvmsg_stream (sock, msg, size, flags)
*/
-static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
@@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
}
/*
- * Function irda_sendmsg_dgram (iocb, sock, msg, len)
+ * Function irda_sendmsg_dgram (sock, msg, len)
*
* Send message down to TinyTP for the unreliable sequenced
* packet service...
*
*/
-static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct irda_sock *self;
@@ -1594,14 +1593,14 @@ out:
}
/*
- * Function irda_sendmsg_ultra (iocb, sock, msg, len)
+ * Function irda_sendmsg_ultra (sock, msg, len)
*
* Send message down to IrLMP for the unreliable Ultra
* packet service...
*/
#ifdef CONFIG_IRDA_ULTRA
-static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct irda_sock *self;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 2e9953b2db84..94b4c898a116 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg,
(void *) prmdata, 8);
}
-static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
@@ -1317,8 +1317,8 @@ static void iucv_process_message_q(struct sock *sk)
}
}
-static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f8ac939d52b4..9255fd9d94bc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
#endif
-static int pfkey_sendmsg(struct kiocb *kiocb,
- struct socket *sock, struct msghdr *msg, size_t len)
+static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct sk_buff *skb = NULL;
@@ -3630,8 +3629,7 @@ out:
return err ? : len;
}
-static int pfkey_recvmsg(struct kiocb *kiocb,
- struct socket *sock, struct msghdr *msg, size_t len,
+static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
struct sock *sk = sock->sk;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 781b3a226ba7..4b552873b556 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -74,7 +74,7 @@ static int l2tp_eth_dev_init(struct net_device *dev)
priv->dev = dev;
eth_hw_addr_random(dev);
- memset(&dev->broadcast[0], 0xff, 6);
+ eth_broadcast_addr(dev->broadcast);
dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
return 0;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 05dfc8aa36af..79649937ec71 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -385,7 +385,7 @@ drop:
/* Userspace will call sendmsg() on the tunnel socket to send L2TP
* control frames.
*/
-static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct sk_buff *skb;
int rc;
@@ -506,7 +506,7 @@ no_route:
goto out;
}
-static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8611f1b63141..d1ded3777815 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -480,8 +480,7 @@ out:
/* Userspace will call sendmsg() on the tunnel socket to send L2TP
* control frames.
*/
-static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len)
+static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
@@ -643,9 +642,8 @@ do_confirm:
goto done;
}
-static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index cc7a828fc914..e9b0dec56b8e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
/* Receive message. This is the recvmsg for the PPPoL2TP socket.
*/
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len,
- int flags)
+static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
int err;
struct sk_buff *skb;
@@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
* when a user application does a sendmsg() on the session socket. L2TP and
* PPP headers must be inserted into the user's data.
*/
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
{
static const unsigned char ppph[2] = { 0xff, 0x03 };
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2c0b83ce43bd..17a8dff06090 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -704,8 +704,8 @@ out:
* Copy received data to the socket user.
* Returns non-negative upon success, negative otherwise.
*/
-static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name);
const int nonblock = flags & MSG_DONTWAIT;
@@ -878,8 +878,7 @@ copy_uaddr:
* Transmit data provided by the socket user.
* Returns non-negative upon success, negative otherwise.
*/
-static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dd4ff36c557a..74f509c500f2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1488,7 +1488,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
if (next_hop_sta)
memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN);
else
- memset(next_hop, 0, ETH_ALEN);
+ eth_zero_addr(next_hop);
memset(pinfo, 0, sizeof(*pinfo));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b606b53a49a7..f9b07588baf5 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1742,7 +1742,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
ieee80211_ibss_disconnect(sdata);
ifibss->ssid_len = 0;
- memset(ifibss->bssid, 0, ETH_ALEN);
+ eth_zero_addr(ifibss->bssid);
/* remove beacon */
kfree(sdata->u.ibss.ie);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0c8b2a77d312..49a44bcd8aba 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -520,7 +520,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
} else {
*fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
/* RA TA DA SA */
- memset(hdr->addr1, 0, ETH_ALEN); /* RA is resolved later */
+ eth_zero_addr(hdr->addr1); /* RA is resolved later */
memcpy(hdr->addr2, meshsa, ETH_ALEN);
memcpy(hdr->addr3, meshda, ETH_ALEN);
memcpy(hdr->addr4, meshsa, ETH_ALEN);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 10ac6324c1d0..9f6f3562396a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2033,7 +2033,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_flush_queues(local, sdata, false);
/* clear bssid only after building the needed mgmt frames */
- memset(ifmgd->bssid, 0, ETH_ALEN);
+ eth_zero_addr(ifmgd->bssid);
/* remove AP and TDLS peers */
sta_info_flush(sdata);
@@ -2464,7 +2464,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.timer);
sta_info_destroy_addr(sdata, auth_data->bss->bssid);
- memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
+ eth_zero_addr(sdata->u.mgd.bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
mutex_lock(&sdata->local->mtx);
@@ -2777,7 +2777,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.timer);
sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
- memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
+ eth_zero_addr(sdata->u.mgd.bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
mutex_lock(&sdata->local->mtx);
@@ -4474,7 +4474,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
return 0;
err_clear:
- memset(ifmgd->bssid, 0, ETH_ALEN);
+ eth_zero_addr(ifmgd->bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
ifmgd->auth_data = NULL;
err_free:
@@ -4817,7 +4817,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
return 0;
err_clear:
- memset(ifmgd->bssid, 0, ETH_ALEN);
+ eth_zero_addr(ifmgd->bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
ifmgd->assoc_data = NULL;
err_free:
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 37421db88965..dfca485863e9 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -1,9 +1,30 @@
#
# MPLS configuration
#
+
+menuconfig MPLS
+ tristate "MultiProtocol Label Switching"
+ default n
+ ---help---
+ MultiProtocol Label Switching routes packets through logical
+ circuits. Originally conceived as a way of routing packets at
+ hardware speeds (before hardware was capable of routing ipv4 packets),
+ MPLS remains a simple way of making tunnels.
+
+ If you have not heard of MPLS you probably want to say N here.
+
+if MPLS
+
config NET_MPLS_GSO
- tristate "MPLS: GSO support"
+ bool "MPLS: GSO support"
help
This is helper module to allow segmentation of non-MPLS GSO packets
that have had MPLS stack entries pushed onto them and thus
become MPLS GSO packets.
+
+config MPLS_ROUTING
+ bool "MPLS: routing support"
+ help
+ Add support for forwarding of mpls packets.
+
+endif # MPLS
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 6dec088c2d0f..60af15f1960e 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -2,3 +2,4 @@
# Makefile for MPLS.
#
obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o
+obj-$(CONFIG_MPLS_ROUTING) += af_mpls.o
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
new file mode 100644
index 000000000000..0ad8f7141be2
--- /dev/null
+++ b/net/mpls/af_mpls.c
@@ -0,0 +1,1008 @@
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/sysctl.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/mpls.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include "internal.h"
+
+#define LABEL_NOT_SPECIFIED (1<<20)
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+
+struct mpls_route { /* next hop label forwarding entry */
+ struct net_device __rcu *rt_dev;
+ struct rcu_head rt_rcu;
+ u32 rt_label[MAX_NEW_LABELS];
+ u8 rt_protocol; /* routing protocol that set this entry */
+ u8 rt_labels;
+ u8 rt_via_alen;
+ u8 rt_via_table;
+ u8 rt_via[0];
+};
+
+static int zero = 0;
+static int label_limit = (1 << 20) - 1;
+
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+ struct nlmsghdr *nlh, struct net *net, u32 portid,
+ unsigned int nlm_flags);
+
+static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
+{
+ struct mpls_route *rt = NULL;
+
+ if (index < net->mpls.platform_labels) {
+ struct mpls_route __rcu **platform_label =
+ rcu_dereference(net->mpls.platform_label);
+ rt = rcu_dereference(platform_label[index]);
+ }
+ return rt;
+}
+
+static bool mpls_output_possible(const struct net_device *dev)
+{
+ return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
+}
+
+static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
+{
+ /* The size of the layer 2.5 labels to be added for this route */
+ return rt->rt_labels * sizeof(struct mpls_shim_hdr);
+}
+
+static unsigned int mpls_dev_mtu(const struct net_device *dev)
+{
+ /* The amount of data the layer 2 frame can hold */
+ return dev->mtu;
+}
+
+static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
+ struct mpls_entry_decoded dec)
+{
+ /* RFC4385 and RFC5586 encode other packets in mpls such that
+ * they don't conflict with the ip version number, making
+ * decoding by examining the ip version correct in everything
+ * except for the strangest cases.
+ *
+ * The strange cases if we choose to support them will require
+ * manual configuration.
+ */
+ struct iphdr *hdr4 = ip_hdr(skb);
+ bool success = true;
+
+ if (hdr4->version == 4) {
+ skb->protocol = htons(ETH_P_IP);
+ csum_replace2(&hdr4->check,
+ htons(hdr4->ttl << 8),
+ htons(dec.ttl << 8));
+ hdr4->ttl = dec.ttl;
+ }
+ else if (hdr4->version == 6) {
+ struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+ skb->protocol = htons(ETH_P_IPV6);
+ hdr6->hop_limit = dec.ttl;
+ }
+ else
+ /* version 0 and version 1 are used by pseudo wires */
+ success = false;
+ return success;
+}
+
+static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+ struct mpls_shim_hdr *hdr;
+ struct mpls_route *rt;
+ struct mpls_entry_decoded dec;
+ struct net_device *out_dev;
+ unsigned int hh_len;
+ unsigned int new_header_size;
+ unsigned int mtu;
+ int err;
+
+ /* Careful this entire function runs inside of an rcu critical section */
+
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(*hdr)))
+ goto drop;
+
+ /* Read and decode the label */
+ hdr = mpls_hdr(skb);
+ dec = mpls_entry_decode(hdr);
+
+ /* Pop the label */
+ skb_pull(skb, sizeof(*hdr));
+ skb_reset_network_header(skb);
+
+ skb_orphan(skb);
+
+ rt = mpls_route_input_rcu(net, dec.label);
+ if (!rt)
+ goto drop;
+
+ /* Find the output device */
+ out_dev = rcu_dereference(rt->rt_dev);
+ if (!mpls_output_possible(out_dev))
+ goto drop;
+
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
+ skb_forward_csum(skb);
+
+ /* Verify ttl is valid */
+ if (dec.ttl <= 1)
+ goto drop;
+ dec.ttl -= 1;
+
+ /* Verify the destination can hold the packet */
+ new_header_size = mpls_rt_header_size(rt);
+ mtu = mpls_dev_mtu(out_dev);
+ if (mpls_pkt_too_big(skb, mtu - new_header_size))
+ goto drop;
+
+ hh_len = LL_RESERVED_SPACE(out_dev);
+ if (!out_dev->header_ops)
+ hh_len = 0;
+
+ /* Ensure there is enough space for the headers in the skb */
+ if (skb_cow(skb, hh_len + new_header_size))
+ goto drop;
+
+ skb->dev = out_dev;
+ skb->protocol = htons(ETH_P_MPLS_UC);
+
+ if (unlikely(!new_header_size && dec.bos)) {
+ /* Penultimate hop popping */
+ if (!mpls_egress(rt, skb, dec))
+ goto drop;
+ } else {
+ bool bos;
+ int i;
+ skb_push(skb, new_header_size);
+ skb_reset_network_header(skb);
+ /* Push the new labels */
+ hdr = mpls_hdr(skb);
+ bos = dec.bos;
+ for (i = rt->rt_labels - 1; i >= 0; i--) {
+ hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
+ bos = false;
+ }
+ }
+
+ err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
+ if (err)
+ net_dbg_ratelimited("%s: packet transmission failed: %d\n",
+ __func__, err);
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static struct packet_type mpls_packet_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_MPLS_UC),
+ .func = mpls_forward,
+};
+
+static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
+ [RTA_DST] = { .type = NLA_U32 },
+ [RTA_OIF] = { .type = NLA_U32 },
+};
+
+struct mpls_route_config {
+ u32 rc_protocol;
+ u32 rc_ifindex;
+ u16 rc_via_table;
+ u16 rc_via_alen;
+ u8 rc_via[MAX_VIA_ALEN];
+ u32 rc_label;
+ u32 rc_output_labels;
+ u32 rc_output_label[MAX_NEW_LABELS];
+ u32 rc_nlflags;
+ struct nl_info rc_nlinfo;
+};
+
+static struct mpls_route *mpls_rt_alloc(size_t alen)
+{
+ struct mpls_route *rt;
+
+ rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
+ if (rt)
+ rt->rt_via_alen = alen;
+ return rt;
+}
+
+static void mpls_rt_free(struct mpls_route *rt)
+{
+ if (rt)
+ kfree_rcu(rt, rt_rcu);
+}
+
+static void mpls_notify_route(struct net *net, unsigned index,
+ struct mpls_route *old, struct mpls_route *new,
+ const struct nl_info *info)
+{
+ struct nlmsghdr *nlh = info ? info->nlh : NULL;
+ unsigned portid = info ? info->portid : 0;
+ int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
+ struct mpls_route *rt = new ? new : old;
+ unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
+ /* Ignore reserved labels for now */
+ if (rt && (index >= 16))
+ rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
+}
+
+static void mpls_route_update(struct net *net, unsigned index,
+ struct net_device *dev, struct mpls_route *new,
+ const struct nl_info *info)
+{
+ struct mpls_route __rcu **platform_label;
+ struct mpls_route *rt, *old = NULL;
+
+ ASSERT_RTNL();
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ rt = rtnl_dereference(platform_label[index]);
+ if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
+ rcu_assign_pointer(platform_label[index], new);
+ old = rt;
+ }
+
+ mpls_notify_route(net, index, old, new, info);
+
+ /* If we removed a route free it now */
+ mpls_rt_free(old);
+}
+
+static unsigned find_free_label(struct net *net)
+{
+ struct mpls_route __rcu **platform_label;
+ size_t platform_labels;
+ unsigned index;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ platform_labels = net->mpls.platform_labels;
+ for (index = 16; index < platform_labels; index++) {
+ if (!rtnl_dereference(platform_label[index]))
+ return index;
+ }
+ return LABEL_NOT_SPECIFIED;
+}
+
+static int mpls_route_add(struct mpls_route_config *cfg)
+{
+ struct mpls_route __rcu **platform_label;
+ struct net *net = cfg->rc_nlinfo.nl_net;
+ struct net_device *dev = NULL;
+ struct mpls_route *rt, *old;
+ unsigned index;
+ int i;
+ int err = -EINVAL;
+
+ index = cfg->rc_label;
+
+ /* If a label was not specified during insert pick one */
+ if ((index == LABEL_NOT_SPECIFIED) &&
+ (cfg->rc_nlflags & NLM_F_CREATE)) {
+ index = find_free_label(net);
+ }
+
+ /* The first 16 labels are reserved, and may not be set */
+ if (index < 16)
+ goto errout;
+
+ /* The full 20 bit range may not be supported. */
+ if (index >= net->mpls.platform_labels)
+ goto errout;
+
+ /* Ensure only a supported number of labels are present */
+ if (cfg->rc_output_labels > MAX_NEW_LABELS)
+ goto errout;
+
+ err = -ENODEV;
+ dev = dev_get_by_index(net, cfg->rc_ifindex);
+ if (!dev)
+ goto errout;
+
+ /* For now just support ethernet devices */
+ err = -EINVAL;
+ if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
+ goto errout;
+
+ err = -EINVAL;
+ if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
+ (dev->addr_len != cfg->rc_via_alen))
+ goto errout;
+
+ /* Append makes no sense with mpls */
+ err = -EOPNOTSUPP;
+ if (cfg->rc_nlflags & NLM_F_APPEND)
+ goto errout;
+
+ err = -EEXIST;
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ old = rtnl_dereference(platform_label[index]);
+ if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
+ goto errout;
+
+ err = -EEXIST;
+ if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
+ goto errout;
+
+ err = -ENOENT;
+ if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
+ goto errout;
+
+ err = -ENOMEM;
+ rt = mpls_rt_alloc(cfg->rc_via_alen);
+ if (!rt)
+ goto errout;
+
+ rt->rt_labels = cfg->rc_output_labels;
+ for (i = 0; i < rt->rt_labels; i++)
+ rt->rt_label[i] = cfg->rc_output_label[i];
+ rt->rt_protocol = cfg->rc_protocol;
+ RCU_INIT_POINTER(rt->rt_dev, dev);
+ rt->rt_via_table = cfg->rc_via_table;
+ memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
+
+ mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
+
+ dev_put(dev);
+ return 0;
+
+errout:
+ if (dev)
+ dev_put(dev);
+ return err;
+}
+
+static int mpls_route_del(struct mpls_route_config *cfg)
+{
+ struct net *net = cfg->rc_nlinfo.nl_net;
+ unsigned index;
+ int err = -EINVAL;
+
+ index = cfg->rc_label;
+
+ /* The first 16 labels are reserved, and may not be removed */
+ if (index < 16)
+ goto errout;
+
+ /* The full 20 bit range may not be supported */
+ if (index >= net->mpls.platform_labels)
+ goto errout;
+
+ mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
+
+ err = 0;
+errout:
+ return err;
+}
+
+static void mpls_ifdown(struct net_device *dev)
+{
+ struct mpls_route __rcu **platform_label;
+ struct net *net = dev_net(dev);
+ unsigned index;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ for (index = 0; index < net->mpls.platform_labels; index++) {
+ struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+ if (!rt)
+ continue;
+ if (rtnl_dereference(rt->rt_dev) != dev)
+ continue;
+ rt->rt_dev = NULL;
+ }
+}
+
+static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch(event) {
+ case NETDEV_UNREGISTER:
+ mpls_ifdown(dev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mpls_dev_notifier = {
+ .notifier_call = mpls_dev_notify,
+};
+
+static int nla_put_via(struct sk_buff *skb,
+ u8 table, const void *addr, int alen)
+{
+ static const int table_to_family[NEIGH_NR_TABLES + 1] = {
+ AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+ };
+ struct nlattr *nla;
+ struct rtvia *via;
+ int family = AF_UNSPEC;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (table <= NEIGH_NR_TABLES)
+ family = table_to_family[table];
+
+ via = nla_data(nla);
+ via->rtvia_family = family;
+ memcpy(via->rtvia_addr, addr, alen);
+ return 0;
+}
+
+int nla_put_labels(struct sk_buff *skb, int attrtype,
+ u8 labels, const u32 label[])
+{
+ struct nlattr *nla;
+ struct mpls_shim_hdr *nla_label;
+ bool bos;
+ int i;
+ nla = nla_reserve(skb, attrtype, labels*4);
+ if (!nla)
+ return -EMSGSIZE;
+
+ nla_label = nla_data(nla);
+ bos = true;
+ for (i = labels - 1; i >= 0; i--) {
+ nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
+ bos = false;
+ }
+
+ return 0;
+}
+
+int nla_get_labels(const struct nlattr *nla,
+ u32 max_labels, u32 *labels, u32 label[])
+{
+ unsigned len = nla_len(nla);
+ unsigned nla_labels;
+ struct mpls_shim_hdr *nla_label;
+ bool bos;
+ int i;
+
+ /* len needs to be an even multiple of 4 (the label size) */
+ if (len & 3)
+ return -EINVAL;
+
+ /* Limit the number of new labels allowed */
+ nla_labels = len/4;
+ if (nla_labels > max_labels)
+ return -EINVAL;
+
+ nla_label = nla_data(nla);
+ bos = true;
+ for (i = nla_labels - 1; i >= 0; i--, bos = false) {
+ struct mpls_entry_decoded dec;
+ dec = mpls_entry_decode(nla_label + i);
+
+ /* Ensure the bottom of stack flag is properly set
+ * and ttl and tc are both clear.
+ */
+ if ((dec.bos != bos) || dec.ttl || dec.tc)
+ return -EINVAL;
+
+ label[i] = dec.label;
+ }
+ *labels = nla_labels;
+ return 0;
+}
+
+static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct mpls_route_config *cfg)
+{
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
+ int index;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
+ if (err < 0)
+ goto errout;
+
+ err = -EINVAL;
+ rtm = nlmsg_data(nlh);
+ memset(cfg, 0, sizeof(*cfg));
+
+ if (rtm->rtm_family != AF_MPLS)
+ goto errout;
+ if (rtm->rtm_dst_len != 20)
+ goto errout;
+ if (rtm->rtm_src_len != 0)
+ goto errout;
+ if (rtm->rtm_tos != 0)
+ goto errout;
+ if (rtm->rtm_table != RT_TABLE_MAIN)
+ goto errout;
+ /* Any value is acceptable for rtm_protocol */
+
+ /* As mpls uses destination specific addresses
+ * (or source specific address in the case of multicast)
+ * all addresses have universal scope.
+ */
+ if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
+ goto errout;
+ if (rtm->rtm_type != RTN_UNICAST)
+ goto errout;
+ if (rtm->rtm_flags != 0)
+ goto errout;
+
+ cfg->rc_label = LABEL_NOT_SPECIFIED;
+ cfg->rc_protocol = rtm->rtm_protocol;
+ cfg->rc_nlflags = nlh->nlmsg_flags;
+ cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
+ cfg->rc_nlinfo.nlh = nlh;
+ cfg->rc_nlinfo.nl_net = sock_net(skb->sk);
+
+ for (index = 0; index <= RTA_MAX; index++) {
+ struct nlattr *nla = tb[index];
+ if (!nla)
+ continue;
+
+ switch(index) {
+ case RTA_OIF:
+ cfg->rc_ifindex = nla_get_u32(nla);
+ break;
+ case RTA_NEWDST:
+ if (nla_get_labels(nla, MAX_NEW_LABELS,
+ &cfg->rc_output_labels,
+ cfg->rc_output_label))
+ goto errout;
+ break;
+ case RTA_DST:
+ {
+ u32 label_count;
+ if (nla_get_labels(nla, 1, &label_count,
+ &cfg->rc_label))
+ goto errout;
+
+ /* The first 16 labels are reserved, and may not be set */
+ if (cfg->rc_label < 16)
+ goto errout;
+
+ break;
+ }
+ case RTA_VIA:
+ {
+ struct rtvia *via = nla_data(nla);
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
+ goto errout;
+ cfg->rc_via_alen = nla_len(nla) -
+ offsetof(struct rtvia, rtvia_addr);
+ if (cfg->rc_via_alen > MAX_VIA_ALEN)
+ goto errout;
+
+ /* Validate the address family */
+ switch(via->rtvia_family) {
+ case AF_PACKET:
+ cfg->rc_via_table = NEIGH_LINK_TABLE;
+ break;
+ case AF_INET:
+ cfg->rc_via_table = NEIGH_ARP_TABLE;
+ if (cfg->rc_via_alen != 4)
+ goto errout;
+ break;
+ case AF_INET6:
+ cfg->rc_via_table = NEIGH_ND_TABLE;
+ if (cfg->rc_via_alen != 16)
+ goto errout;
+ break;
+ default:
+ /* Unsupported address family */
+ goto errout;
+ }
+
+ memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+ break;
+ }
+ default:
+ /* Unsupported attribute */
+ goto errout;
+ }
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct mpls_route_config cfg;
+ int err;
+
+ err = rtm_to_route_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+
+ return mpls_route_del(&cfg);
+}
+
+
+static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct mpls_route_config cfg;
+ int err;
+
+ err = rtm_to_route_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+
+ return mpls_route_add(&cfg);
+}
+
+static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ u32 label, struct mpls_route *rt, int flags)
+{
+ struct net_device *dev;
+ struct nlmsghdr *nlh;
+ struct rtmsg *rtm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ rtm = nlmsg_data(nlh);
+ rtm->rtm_family = AF_MPLS;
+ rtm->rtm_dst_len = 20;
+ rtm->rtm_src_len = 0;
+ rtm->rtm_tos = 0;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_protocol = rt->rt_protocol;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_flags = 0;
+
+ if (rt->rt_labels &&
+ nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
+ goto nla_put_failure;
+ if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
+ goto nla_put_failure;
+ dev = rtnl_dereference(rt->rt_dev);
+ if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
+ goto nla_put_failure;
+ if (nla_put_labels(skb, RTA_DST, 1, &label))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mpls_route __rcu **platform_label;
+ size_t platform_labels;
+ unsigned int index;
+
+ ASSERT_RTNL();
+
+ index = cb->args[0];
+ if (index < 16)
+ index = 16;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ platform_labels = net->mpls.platform_labels;
+ for (; index < platform_labels; index++) {
+ struct mpls_route *rt;
+ rt = rtnl_dereference(platform_label[index]);
+ if (!rt)
+ continue;
+
+ if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+ index, rt, NLM_F_MULTI) < 0)
+ break;
+ }
+ cb->args[0] = index;
+
+ return skb->len;
+}
+
+static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
+{
+ size_t payload =
+ NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */
+ + nla_total_size(4); /* RTA_DST */
+ if (rt->rt_labels) /* RTA_NEWDST */
+ payload += nla_total_size(rt->rt_labels * 4);
+ if (rt->rt_dev) /* RTA_OIF */
+ payload += nla_total_size(4);
+ return payload;
+}
+
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+ struct nlmsghdr *nlh, struct net *net, u32 portid,
+ unsigned int nlm_flags)
+{
+ struct sk_buff *skb;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
+
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+}
+
+static int resize_platform_label_table(struct net *net, size_t limit)
+{
+ size_t size = sizeof(struct mpls_route *) * limit;
+ size_t old_limit;
+ size_t cp_size;
+ struct mpls_route __rcu **labels = NULL, **old;
+ struct mpls_route *rt0 = NULL, *rt2 = NULL;
+ unsigned index;
+
+ if (size) {
+ labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (!labels)
+ labels = vzalloc(size);
+
+ if (!labels)
+ goto nolabels;
+ }
+
+ /* In case the predefined labels need to be populated */
+ if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+ struct net_device *lo = net->loopback_dev;
+ rt0 = mpls_rt_alloc(lo->addr_len);
+ if (!rt0)
+ goto nort0;
+ RCU_INIT_POINTER(rt0->rt_dev, lo);
+ rt0->rt_protocol = RTPROT_KERNEL;
+ rt0->rt_via_table = NEIGH_LINK_TABLE;
+ memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+ }
+ if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+ struct net_device *lo = net->loopback_dev;
+ rt2 = mpls_rt_alloc(lo->addr_len);
+ if (!rt2)
+ goto nort2;
+ RCU_INIT_POINTER(rt2->rt_dev, lo);
+ rt2->rt_protocol = RTPROT_KERNEL;
+ rt2->rt_via_table = NEIGH_LINK_TABLE;
+ memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+ }
+
+ rtnl_lock();
+ /* Remember the original table */
+ old = rtnl_dereference(net->mpls.platform_label);
+ old_limit = net->mpls.platform_labels;
+
+ /* Free any labels beyond the new table */
+ for (index = limit; index < old_limit; index++)
+ mpls_route_update(net, index, NULL, NULL, NULL);
+
+ /* Copy over the old labels */
+ cp_size = size;
+ if (old_limit < limit)
+ cp_size = old_limit * sizeof(struct mpls_route *);
+
+ memcpy(labels, old, cp_size);
+
+ /* If needed set the predefined labels */
+ if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
+ (limit > LABEL_IPV6_EXPLICIT_NULL)) {
+ RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2);
+ rt2 = NULL;
+ }
+
+ if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
+ (limit > LABEL_IPV4_EXPLICIT_NULL)) {
+ RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0);
+ rt0 = NULL;
+ }
+
+ /* Update the global pointers */
+ net->mpls.platform_labels = limit;
+ rcu_assign_pointer(net->mpls.platform_label, labels);
+
+ rtnl_unlock();
+
+ mpls_rt_free(rt2);
+ mpls_rt_free(rt0);
+
+ if (old) {
+ synchronize_rcu();
+ kvfree(old);
+ }
+ return 0;
+
+nort2:
+ mpls_rt_free(rt0);
+nort0:
+ kvfree(labels);
+nolabels:
+ return -ENOMEM;
+}
+
+static int mpls_platform_labels(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = table->data;
+ int platform_labels = net->mpls.platform_labels;
+ int ret;
+ struct ctl_table tmp = {
+ .procname = table->procname,
+ .data = &platform_labels,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ .extra1 = &zero,
+ .extra2 = &label_limit,
+ };
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0)
+ ret = resize_platform_label_table(net, platform_labels);
+
+ return ret;
+}
+
+static struct ctl_table mpls_table[] = {
+ {
+ .procname = "platform_labels",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = mpls_platform_labels,
+ },
+ { }
+};
+
+static int mpls_net_init(struct net *net)
+{
+ struct ctl_table *table;
+
+ net->mpls.platform_labels = 0;
+ net->mpls.platform_label = NULL;
+
+ table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
+ if (table == NULL)
+ return -ENOMEM;
+
+ table[0].data = net;
+ net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
+ if (net->mpls.ctl == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mpls_net_exit(struct net *net)
+{
+ struct mpls_route __rcu **platform_label;
+ size_t platform_labels;
+ struct ctl_table *table;
+ unsigned int index;
+
+ table = net->mpls.ctl->ctl_table_arg;
+ unregister_net_sysctl_table(net->mpls.ctl);
+ kfree(table);
+
+ /* An rcu grace period has passed since there was a device in
+ * the network namespace (and thus the last in flight packet)
+ * left this network namespace. This is because
+ * unregister_netdevice_many and netdev_run_todo has completed
+ * for each network device that was in this network namespace.
+ *
+ * As such no additional rcu synchronization is necessary when
+ * freeing the platform_label table.
+ */
+ rtnl_lock();
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ platform_labels = net->mpls.platform_labels;
+ for (index = 0; index < platform_labels; index++) {
+ struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+ RCU_INIT_POINTER(platform_label[index], NULL);
+ mpls_rt_free(rt);
+ }
+ rtnl_unlock();
+
+ kvfree(platform_label);
+}
+
+static struct pernet_operations mpls_net_ops = {
+ .init = mpls_net_init,
+ .exit = mpls_net_exit,
+};
+
+static int __init mpls_init(void)
+{
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
+
+ err = register_pernet_subsys(&mpls_net_ops);
+ if (err)
+ goto out;
+
+ err = register_netdevice_notifier(&mpls_dev_notifier);
+ if (err)
+ goto out_unregister_pernet;
+
+ dev_add_pack(&mpls_packet_type);
+
+ rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
+ err = 0;
+out:
+ return err;
+
+out_unregister_pernet:
+ unregister_pernet_subsys(&mpls_net_ops);
+ goto out;
+}
+module_init(mpls_init);
+
+static void __exit mpls_exit(void)
+{
+ rtnl_unregister_all(PF_MPLS);
+ dev_remove_pack(&mpls_packet_type);
+ unregister_netdevice_notifier(&mpls_dev_notifier);
+ unregister_pernet_subsys(&mpls_net_ops);
+}
+module_exit(mpls_exit);
+
+MODULE_DESCRIPTION("MultiProtocol Label Switching");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_NETPROTO(PF_MPLS);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
new file mode 100644
index 000000000000..fb6de92052c4
--- /dev/null
+++ b/net/mpls/internal.h
@@ -0,0 +1,59 @@
+#ifndef MPLS_INTERNAL_H
+#define MPLS_INTERNAL_H
+
+#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */
+#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */
+#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */
+#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */
+#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */
+#define LABEL_GAL 13 /* RFC5586 */
+#define LABEL_OAM_ALERT 14 /* RFC3429 */
+#define LABEL_EXTENSION 15 /* RFC7274 */
+
+
+struct mpls_shim_hdr {
+ __be32 label_stack_entry;
+};
+
+struct mpls_entry_decoded {
+ u32 label;
+ u8 ttl;
+ u8 tc;
+ u8 bos;
+};
+
+struct sk_buff;
+
+static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
+{
+ return (struct mpls_shim_hdr *)skb_network_header(skb);
+}
+
+static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
+{
+ struct mpls_shim_hdr result;
+ result.label_stack_entry =
+ cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
+ (tc << MPLS_LS_TC_SHIFT) |
+ (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
+ (ttl << MPLS_LS_TTL_SHIFT));
+ return result;
+}
+
+static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
+{
+ struct mpls_entry_decoded result;
+ unsigned entry = be32_to_cpu(hdr->label_stack_entry);
+
+ result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+ result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
+
+ return result;
+}
+
+int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]);
+int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]);
+
+#endif /* MPLS_INTERNAL_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b02660fa9eb0..971cd7526f4b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -438,8 +438,10 @@ config NF_TABLES
To compile it as a module, choose M here.
+if NF_TABLES
+
config NF_TABLES_INET
- depends on NF_TABLES && IPV6
+ depends on IPV6
select NF_TABLES_IPV4
select NF_TABLES_IPV6
tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
@@ -447,21 +449,18 @@ config NF_TABLES_INET
This option enables support for a mixed IPv4/IPv6 "inet" table.
config NFT_EXTHDR
- depends on NF_TABLES
tristate "Netfilter nf_tables IPv6 exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers.
config NFT_META
- depends on NF_TABLES
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_CT
- depends on NF_TABLES
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
@@ -469,42 +468,36 @@ config NFT_CT
connection tracking information such as the flow state.
config NFT_RBTREE
- depends on NF_TABLES
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
config NFT_HASH
- depends on NF_TABLES
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
config NFT_COUNTER
- depends on NF_TABLES
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_LOG
- depends on NF_TABLES
tristate "Netfilter nf_tables log module"
help
This option adds the "log" expression that you can use to log
packets matching some criteria.
config NFT_LIMIT
- depends on NF_TABLES
tristate "Netfilter nf_tables limit module"
help
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
config NFT_MASQ
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables masquerade support"
@@ -513,7 +506,6 @@ config NFT_MASQ
to perform NAT in the masquerade flavour.
config NFT_REDIR
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables redirect support"
@@ -522,7 +514,6 @@ config NFT_REDIR
to perform NAT in the redirect flavour.
config NFT_NAT
- depends on NF_TABLES
depends on NF_CONNTRACK
select NF_NAT
tristate "Netfilter nf_tables nat module"
@@ -531,7 +522,6 @@ config NFT_NAT
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
- depends on NF_TABLES
depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
@@ -540,7 +530,6 @@ config NFT_QUEUE
infrastructure (also known as NFQUEUE) from nftables.
config NFT_REJECT
- depends on NF_TABLES
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -554,7 +543,6 @@ config NFT_REJECT_INET
tristate
config NFT_COMPAT
- depends on NF_TABLES
depends on NETFILTER_XTABLES
tristate "Netfilter x_tables over nf_tables module"
help
@@ -562,6 +550,8 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+endif # NF_TABLES
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -951,7 +941,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
- depends on NETFILTER_ADVANCED
+ default m if NETFILTER_ADVANCED=n
---help---
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b87ca32efa0b..04dbd9c7213f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
}
@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
}
}
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+ int conn_reuse_mode)
+{
+ /* Controlled (FTP DATA or persistence)? */
+ if (cp->control)
+ return false;
+
+ switch (cp->protocol) {
+ case IPPROTO_TCP:
+ return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ ((conn_reuse_mode & 2) &&
+ (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+ (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+ case IPPROTO_SCTP:
+ return cp->state == IP_VS_SCTP_S_CLOSED;
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1585,6 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
+ int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1653,10 +1680,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
- if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
- is_new_conn(skb, &iph)) {
- ip_vs_conn_expire_now(cp);
+ conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ if (conn_reuse_mode && !iph.fragoffs &&
+ is_new_conn(skb, &iph) && cp &&
+ ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) ||
+ unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ed99448671c3..49532672f66d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
}
static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
{
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
spin_lock_bh(&src->lock);
@@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
}
static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+ dst->conns = (u32)src->conns;
+ dst->inpkts = (u32)src->inpkts;
+ dst->outpkts = (u32)src->outpkts;
+ dst->inbytes = src->inbytes;
+ dst->outbytes = src->outbytes;
+ dst->cps = (u32)src->cps;
+ dst->inpps = (u32)src->inpps;
+ dst->outpps = (u32)src->outpps;
+ dst->inbps = (u32)src->inbps;
+ dst->outbps = (u32)src->outbps;
+}
+
+static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
/* get current counters as zero point, rates are zeroed */
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_reuse_mode",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats_user show;
+ struct ip_vs_kstats show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
- show.inpkts, show.outpkts,
- (unsigned long long) show.inbytes,
- (unsigned long long) show.outbytes);
-
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)show.conns,
+ (unsigned long long)show.inpkts,
+ (unsigned long long)show.outpkts,
+ (unsigned long long)show.inbytes,
+ (unsigned long long)show.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, "%8X %8X %8X %16X %16X\n",
- show.cps, show.inpps, show.outpps,
- show.inbps, show.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+ (unsigned long long)show.cps,
+ (unsigned long long)show.inpps,
+ (unsigned long long)show.outpps,
+ (unsigned long long)show.inbps,
+ (unsigned long long)show.outbps);
return 0;
}
@@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
- struct ip_vs_stats_user rates;
+ struct ip_vs_kstats kstats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
do {
start = u64_stats_fetch_begin_irq(&u->syncp);
- inbytes = u->ustats.inbytes;
- outbytes = u->ustats.outbytes;
+ conns = u->cnt.conns;
+ inpkts = u->cnt.inpkts;
+ outpkts = u->cnt.outpkts;
+ inbytes = u->cnt.inbytes;
+ outbytes = u->cnt.outbytes;
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
- seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
- i, u->ustats.conns, u->ustats.inpkts,
- u->ustats.outpkts, (__u64)inbytes,
- (__u64)outbytes);
+ seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+ i, (u64)conns, (u64)inpkts,
+ (u64)outpkts, (u64)inbytes,
+ (u64)outbytes);
}
- spin_lock_bh(&tot_stats->lock);
-
- seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
- tot_stats->ustats.conns, tot_stats->ustats.inpkts,
- tot_stats->ustats.outpkts,
- (unsigned long long) tot_stats->ustats.inbytes,
- (unsigned long long) tot_stats->ustats.outbytes);
-
- ip_vs_read_estimator(&rates, tot_stats);
+ ip_vs_copy_stats(&kstats, tot_stats);
- spin_unlock_bh(&tot_stats->lock);
+ seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)kstats.conns,
+ (unsigned long long)kstats.inpkts,
+ (unsigned long long)kstats.outpkts,
+ (unsigned long long)kstats.inbytes,
+ (unsigned long long)kstats.outbytes);
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, " %8X %8X %8X %16X %16X\n",
- rates.cps,
- rates.inpps,
- rates.outpps,
- rates.inbps,
- rates.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
+ kstats.cps,
+ kstats.inpps,
+ kstats.outpps,
+ kstats.inbps,
+ kstats.outbps);
return 0;
}
@@ -2400,6 +2426,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
+ struct ip_vs_kstats kstats;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
@@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
dst->num_dests = src->num_dests;
- ip_vs_copy_stats(&dst->stats, &src->stats);
+ ip_vs_copy_stats(&kstats, &src->stats);
+ ip_vs_export_stats_user(&dst->stats, &kstats);
}
static inline int
@@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
+ struct ip_vs_kstats kstats;
memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
entry.persistconns = atomic_read(&dest->persistconns);
- ip_vs_copy_stats(&entry.stats, &dest->stats);
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ ip_vs_export_stats_user(&entry.stats, &kstats);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
ret = -EFAULT;
@@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
- struct ip_vs_stats *stats)
+ struct ip_vs_kstats *kstats)
{
- struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
if (!nl_stats)
return -EMSGSIZE;
- ip_vs_copy_stats(&ustats, stats);
-
- if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+ if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+ goto nla_put_failure;
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+ struct ip_vs_kstats *kstats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
@@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
+ struct ip_vs_kstats kstats;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ ip_vs_copy_stats(&kstats, &svc->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
@@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
+ struct ip_vs_kstats kstats;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
@@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
atomic_read(&dest->persistconns)) ||
nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
@@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
+ ipvs->sysctl_conn_reuse_mode = 1;
+ tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 1425e9a924c4..ef0eb0a8d552 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -45,17 +45,19 @@
NOTES.
- * The stored value for average bps is scaled by 2^5, so that maximal
- rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+ * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
- * A lot code is taken from net/sched/estimator.c
+ * Netlink users can see 64-bit values but sockopt users are restricted
+ to 32-bit values for conns, packets, bps, cps and pps.
+
+ * A lot of code is taken from net/core/gen_estimator.c
*/
/*
* Make a summary from each cpu
*/
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
+
if (add) {
- sum->conns += s->ustats.conns;
- sum->inpkts += s->ustats.inpkts;
- sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- inbytes = s->ustats.inbytes;
- outbytes = s->ustats.outbytes;
+ conns = s->cnt.conns;
+ inpkts = s->cnt.inpkts;
+ outpkts = s->cnt.outpkts;
+ inbytes = s->cnt.inbytes;
+ outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
+ sum->conns += conns;
+ sum->inpkts += inpkts;
+ sum->outpkts += outpkts;
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
add = true;
- sum->conns = s->ustats.conns;
- sum->inpkts = s->ustats.inpkts;
- sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- sum->inbytes = s->ustats.inbytes;
- sum->outbytes = s->ustats.outbytes;
+ sum->conns = s->cnt.conns;
+ sum->inpkts = s->cnt.inpkts;
+ sum->outpkts = s->cnt.outpkts;
+ sum->inbytes = s->cnt.inbytes;
+ sum->outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
- u32 n_conns;
- u32 n_inpkts, n_outpkts;
- u64 n_inbytes, n_outbytes;
- u32 rate;
+ u64 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
- ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
- n_conns = s->ustats.conns;
- n_inpkts = s->ustats.inpkts;
- n_outpkts = s->ustats.outpkts;
- n_inbytes = s->ustats.inbytes;
- n_outbytes = s->ustats.outbytes;
+ ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
/* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns) << 9;
- e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps) >> 2;
-
- rate = (n_inpkts - e->last_inpkts) << 9;
- e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
- rate = (n_outpkts - e->last_outpkts) << 9;
- e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
- rate = (n_inbytes - e->last_inbytes) << 4;
- e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
- rate = (n_outbytes - e->last_outbytes) << 4;
- e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps) >> 2;
+ rate = (s->kstats.conns - e->last_conns) << 9;
+ e->last_conns = s->kstats.conns;
+ e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+ rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+ e->last_inpkts = s->kstats.inpkts;
+ e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+ rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+ e->last_outpkts = s->kstats.outpkts;
+ e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+ /* scaled by 2^5, but divided 2 seconds */
+ rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+ e->last_inbytes = s->kstats.inbytes;
+ e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+ rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+ e->last_outbytes = s->kstats.outbytes;
+ e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
- struct ip_vs_stats_user *u = &stats->ustats;
+ struct ip_vs_kstats *k = &stats->kstats;
/* reset counters, caller must hold the stats->lock lock */
- est->last_inbytes = u->inbytes;
- est->last_outbytes = u->outbytes;
- est->last_conns = u->conns;
- est->last_inpkts = u->inpkts;
- est->last_outpkts = u->outpkts;
+ est->last_inbytes = k->inbytes;
+ est->last_outbytes = k->outbytes;
+ est->last_conns = k->conns;
+ est->last_inpkts = k->inpkts;
+ est->last_outpkts = k->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
}
/* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
- struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d93ceeb3ef04..08d95559b6f7 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
- else
+ if (cp && ((cp->dport != dport) ||
+ !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+ if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ } else {
+ /* This is the expiration message for the
+ * connection that was already replaced, so we
+ * just ignore it.
+ */
+ __ip_vs_conn_put(cp);
+ kfree(param->pe_data);
+ return;
+ }
+ }
+ } else {
cp = ip_vs_ct_in_get(param);
+ }
if (cp) {
/* Free pe_data */
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index b8b95f4027ca..57a26cc90c9f 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- struct ts_state ts;
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple *tuple;
unsigned int dataoff, start, stop, off, i;
@@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb,
return NF_ACCEPT;
}
- memset(&ts, 0, sizeof(ts));
start = skb_find_text(skb, dataoff, skb->len,
- search[SEARCH_CONNECT].ts, &ts);
+ search[SEARCH_CONNECT].ts);
if (start == UINT_MAX)
goto out;
start += dataoff + search[SEARCH_CONNECT].len;
- memset(&ts, 0, sizeof(ts));
stop = skb_find_text(skb, start, skb->len,
- search[SEARCH_NEWLINE].ts, &ts);
+ search[SEARCH_NEWLINE].ts);
if (stop == UINT_MAX)
goto out;
stop += start;
for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
- memset(&ts, 0, sizeof(ts));
- off = skb_find_text(skb, start, stop, search[i].ts, &ts);
+ off = skb_find_text(skb, start, stop, search[i].ts);
if (off == UINT_MAX)
continue;
off += start + search[i].len;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6ab777912237..ea51833c8f5a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
- [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
@@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (!try_module_get(afi->owner))
return -EAFNOSUPPORT;
- table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL) {
module_put(afi->owner);
return -ENOMEM;
}
- nla_strlcpy(table->name, name, nla_len(name));
+ nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3b90eb2b2c55..77165bf023f3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,6 +21,48 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static void __nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ if (unlikely(pkt->skb->nf_trace))
+ __nft_trace_packet(pkt, chain, rulenum, type);
+}
+
static void nft_cmp_fast_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1])
{
@@ -66,40 +108,6 @@ struct nft_jumpstack {
int rulenum;
};
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
-};
-
-static struct nf_loginfo trace_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 4,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
-{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
-}
-
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
@@ -146,8 +154,7 @@ next_rule:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
@@ -157,37 +164,28 @@ next_rule:
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
return data[NFT_REG_VERDICT].verdict;
}
switch (data[NFT_REG_VERDICT].verdict) {
case NFT_JUMP:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- chain = data[NFT_REG_VERDICT].chain;
- goto do_chain;
+ /* fall through */
case NFT_GOTO:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
chain = data[NFT_REG_VERDICT].chain;
goto do_chain;
- case NFT_RETURN:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
- break;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+ rulenum++;
+ /* fall through */
+ case NFT_RETURN:
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
@@ -201,8 +199,7 @@ next_rule:
goto next_rule;
}
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 213584cf04b3..0b0fd4e36294 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -20,6 +20,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -42,6 +43,7 @@ union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
struct ebt_entry ebt;
+ struct arpt_entry arp;
};
static inline void
@@ -140,6 +142,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->target = target;
@@ -351,6 +355,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->match = match;
@@ -537,6 +543,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
case NFPROTO_BRIDGE:
fmt = "ebt_%s";
break;
+ case NFPROTO_ARP:
+ fmt = "arpt_%s";
+ break;
default:
pr_err("nft_compat: unsupported protocol %d\n",
nfmsg->nfgen_family);
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d58680a..92877114aff4 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
- nft_reject_icmp_code(priv->icmp_code));
+ nft_reject_icmp_code(priv->icmp_code),
+ pkt->ops->hooknum);
break;
}
break;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 0d47afea9682..89045982ec94 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter0(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter0(opt.ext.bytes, &info->bytes);
}
@@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter(opt.ext.bytes, &info->bytes);
}
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 5699adb97652..0bc3460319c8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,13 +26,12 @@ static bool
string_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_string_info *conf = par->matchinfo;
- struct ts_state state;
bool invert;
invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
- conf->to_offset, conf->config, &state)
+ conf->to_offset, conf->config)
!= UINT_MAX) ^ invert;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 05919bf3f670..6b0f21950e09 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2256,8 +2256,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
-static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -2346,8 +2345,7 @@ out:
return err;
}
-static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len,
+static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
struct scm_cookie scm;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 69f1d5e9959f..b987fd56c3c5 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
return 1;
}
-static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
@@ -1133,8 +1132,8 @@ out:
return err;
}
-static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 6ae063cebf7d..988f542481a8 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -65,36 +65,6 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
return 1;
}
-#ifdef CONFIG_INET
-
-static int nr_rebuild_header(struct sk_buff *skb)
-{
- unsigned char *bp = skb->data;
-
- if (arp_find(bp + 7, skb))
- return 1;
-
- bp[6] &= ~AX25_CBIT;
- bp[6] &= ~AX25_EBIT;
- bp[6] |= AX25_SSSID_SPARE;
- bp += AX25_ADDR_LEN;
-
- bp[6] &= ~AX25_CBIT;
- bp[6] |= AX25_EBIT;
- bp[6] |= AX25_SSSID_SPARE;
-
- return 0;
-}
-
-#else
-
-static int nr_rebuild_header(struct sk_buff *skb)
-{
- return 1;
-}
-
-#endif
-
static int nr_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr, unsigned int len)
@@ -188,7 +158,6 @@ static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev)
static const struct header_ops nr_header_ops = {
.create = nr_header,
- .rebuild= nr_rebuild_header,
};
static const struct net_device_ops nr_netdev_ops = {
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index e181e290427c..9578bd6a4f3e 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -750,8 +750,8 @@ error:
return ret;
}
-static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return nfc_llcp_send_i_frame(llcp_sock, msg, len);
}
-static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 373e138c0ab6..82b4e8024778 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work)
}
}
-static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct nfc_dev *dev = nfc_rawsock(sk)->dev;
@@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock,
return len;
}
-static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index b7d818c59423..ed6b0f8dd1bb 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -6,6 +6,7 @@ config OPENVSWITCH
tristate "Open vSwitch"
depends on INET
select LIBCRC32C
+ select MPLS
select NET_MPLS_GSO
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f8db7064d81c..b91ac5946ad1 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -216,10 +216,16 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
static void packet_flush_mclist(struct sock *sk);
struct packet_skb_cb {
- unsigned int origlen;
union {
struct sockaddr_pkt pkt;
- struct sockaddr_ll ll;
+ union {
+ /* Trick: alias skb original length with
+ * ll.sll_family and ll.protocol in order
+ * to save room.
+ */
+ unsigned int origlen;
+ struct sockaddr_ll ll;
+ };
} sa;
};
@@ -1608,8 +1614,8 @@ oom:
* protocol layers and you must therefore supply it with a complete frame
*/
-static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
@@ -1818,13 +1824,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
skb = nskb;
}
- BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
- sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
sll = &PACKET_SKB_CB(skb)->sa.ll;
- sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(po->origdev))
sll->sll_ifindex = orig_dev->ifindex;
@@ -1833,7 +1836,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
- PACKET_SKB_CB(skb)->origlen = skb->len;
+ /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
+ * Use their space for storing the original skb length.
+ */
+ PACKET_SKB_CB(skb)->sa.origlen = skb->len;
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
@@ -1847,7 +1853,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
@@ -2603,8 +2609,7 @@ out:
return err;
}
-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
@@ -2884,13 +2889,14 @@ out:
* If necessary we block.
*/
-static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
int vnet_hdr_len = 0;
+ unsigned int origlen = 0;
err = -EINVAL;
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
@@ -2990,6 +2996,15 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
if (err)
goto out_free;
+ if (sock->type != SOCK_PACKET) {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+
+ /* Original length was stored in sockaddr_ll fields */
+ origlen = PACKET_SKB_CB(skb)->sa.origlen;
+ sll->sll_family = AF_PACKET;
+ sll->sll_protocol = skb->protocol;
+ }
+
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
@@ -3001,6 +3016,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_namelen = sizeof(struct sockaddr_pkt);
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+
msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
}
@@ -3014,7 +3030,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
- aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+ aux.tp_len = origlen;
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 26054b4b467c..5e710435ffa9 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -83,8 +83,7 @@ static int pn_init(struct sock *sk)
return 0;
}
-static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len)
+static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name);
struct sk_buff *skb;
@@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
return (err >= 0) ? len : err;
}
-static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct sk_buff *skb = NULL;
struct sockaddr_pn sa;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 5d3f2b7507d4..6de2aeb98a1f 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
}
-static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len)
+static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct pep_sock *pn = pep_sk(sk);
struct sk_buff *skb;
@@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk)
return skb;
}
-static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct sk_buff *skb;
int err;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 008214a3d5eb..d575ef4e9aa6 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -425,15 +425,15 @@ out:
return err;
}
-static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m,
+ size_t total_len)
{
struct sock *sk = sock->sk;
if (pn_socket_autobind(sock))
return -EAGAIN;
- return sk->sk_prot->sendmsg(iocb, sk, m, total_len);
+ return sk->sk_prot->sendmsg(sk, m, total_len);
}
const struct proto_ops phonet_dgram_ops = {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c2a5eef41343..c3f2855c3d84 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -702,8 +702,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
void rds_inc_put(struct rds_incoming *inc);
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
struct rds_incoming *inc, gfp_t gfp);
-int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int msg_flags);
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int msg_flags);
void rds_clear_recv_queue(struct rds_sock *rs);
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
void rds_inc_info_copy(struct rds_incoming *inc,
@@ -711,8 +711,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
__be32 saddr, __be32 daddr, int flip);
/* send.c */
-int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t payload_len);
+int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
void rds_send_reset(struct rds_connection *conn);
int rds_send_xmit(struct rds_connection *conn);
struct sockaddr_in;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index f9ec1acd801c..a00462b0d01d 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
return 0;
}
-int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t size, int msg_flags)
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int msg_flags)
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
diff --git a/net/rds/send.c b/net/rds/send.c
index 42f65d4305c8..44672befc0ee 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -920,8 +920,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
return ret;
}
-int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- size_t payload_len)
+int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 43bac7c4dd9e..8ae603069a1a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
return 1;
}
-static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
@@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
}
-static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 50005888be57..369ca81a8c5d 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -41,6 +41,9 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev,
{
unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2);
+ if (daddr)
+ memcpy(buff + 7, daddr, dev->addr_len);
+
*buff++ = ROSE_GFI | ROSE_Q_BIT;
*buff++ = 0x00;
*buff++ = ROSE_DATA;
@@ -53,43 +56,6 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev,
return -37;
}
-static int rose_rebuild_header(struct sk_buff *skb)
-{
-#ifdef CONFIG_INET
- struct net_device *dev = skb->dev;
- struct net_device_stats *stats = &dev->stats;
- unsigned char *bp = (unsigned char *)skb->data;
- struct sk_buff *skbn;
- unsigned int len;
-
- if (arp_find(bp + 7, skb)) {
- return 1;
- }
-
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- kfree_skb(skb);
- return 1;
- }
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- kfree_skb(skb);
-
- len = skbn->len;
-
- if (!rose_route_frame(skbn, NULL)) {
- kfree_skb(skbn);
- stats->tx_errors++;
- return 1;
- }
-
- stats->tx_packets++;
- stats->tx_bytes += len;
-#endif
- return 1;
-}
-
static int rose_set_mac_address(struct net_device *dev, void *addr)
{
struct sockaddr *sa = addr;
@@ -134,19 +100,26 @@ static int rose_close(struct net_device *dev)
static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_device_stats *stats = &dev->stats;
+ unsigned int len = skb->len;
if (!netif_running(dev)) {
printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n");
return NETDEV_TX_BUSY;
}
- dev_kfree_skb(skb);
- stats->tx_errors++;
+
+ if (!rose_route_frame(skb, NULL)) {
+ dev_kfree_skb(skb);
+ stats->tx_errors++;
+ return NETDEV_TX_OK;
+ }
+
+ stats->tx_packets++;
+ stats->tx_bytes += len;
return NETDEV_TX_OK;
}
static const struct header_ops rose_header_ops = {
.create = rose_header,
- .rebuild = rose_rebuild_header,
};
static const struct net_device_ops rose_netdev_ops = {
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7b1670489638..0095b9a0b779 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
* - sends a call data packet
* - may send an abort (abort code in control data)
*/
-static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t len)
+static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
struct rxrpc_transport *trans;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
@@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
switch (rx->sk.sk_state) {
case RXRPC_SERVER_LISTENING:
if (!m->msg_name) {
- ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+ ret = rxrpc_server_sendmsg(rx, m, len);
break;
}
case RXRPC_SERVER_BOUND:
@@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
break;
}
case RXRPC_CLIENT_CONNECTED:
- ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+ ret = rxrpc_client_sendmsg(rx, trans, m, len);
break;
default:
ret = -ENOTCONN;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ba9fd36d3f15..2fc1e659e5c9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -548,10 +548,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
extern unsigned rxrpc_resend_timeout;
int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
- struct rxrpc_transport *, struct msghdr *, size_t);
-int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *,
- size_t);
+int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
+ struct msghdr *, size_t);
+int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
* ar-peer.c
@@ -572,8 +571,7 @@ extern const struct file_operations rxrpc_connection_seq_fops;
* ar-recvmsg.c
*/
void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
-int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t,
- int);
+int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
* ar-security.c
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 8331c95e1522..09f584566e23 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -23,8 +23,7 @@
*/
unsigned rxrpc_resend_timeout = 4 * HZ;
-static int rxrpc_send_data(struct kiocb *iocb,
- struct rxrpc_sock *rx,
+static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len);
@@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
* - caller holds the socket locked
* - the socket may be either a client socket or a server socket
*/
-int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
- struct rxrpc_transport *trans, struct msghdr *msg,
- size_t len)
+int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
+ struct msghdr *msg, size_t len)
{
struct rxrpc_conn_bundle *bundle;
enum rxrpc_command cmd;
@@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
/* request phase complete for this client call */
ret = -EPROTO;
} else {
- ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ ret = rxrpc_send_data(rx, call, msg, len);
}
rxrpc_put_call(call);
@@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
ret = -EPROTO; /* request phase complete for this client call */
} else {
- ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+ ret = rxrpc_send_data(call->socket, call, msg, len);
}
release_sock(&call->socket->sk);
@@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call);
* send a message through a server socket
* - caller holds the socket locked
*/
-int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
- struct msghdr *msg, size_t len)
+int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
{
enum rxrpc_command cmd;
struct rxrpc_call *call;
@@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
break;
}
- ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ ret = rxrpc_send_data(rx, call, msg, len);
break;
case RXRPC_CMD_SEND_ABORT:
@@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
* - must be called in process context
* - caller holds the socket locked
*/
-static int rxrpc_send_data(struct kiocb *iocb,
- struct rxrpc_sock *rx,
+static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len)
{
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4575485ad1b4..a4f883e2d66f 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
* - we need to be careful about two or more threads calling recvmsg
* simultaneously
*/
-int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct rxrpc_skb_priv *sp;
struct rxrpc_call *call = NULL, *continue_call = NULL;
@@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
&call->conn->trans->peer->srx, len);
msg->msg_namelen = len;
}
- sock_recv_ts_and_drops(msg, &rx->sk, skb);
+ sock_recv_timestamp(msg, &rx->sk, skb);
}
/* receive the message */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index baef987fe2c0..8b0470e418dc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -286,7 +286,7 @@ replay:
RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
err = 0;
goto errout;
}
@@ -301,14 +301,20 @@ replay:
err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
- if (err == 0)
+ if (err == 0) {
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ if (tcf_destroy(tp, false)) {
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
+ }
+ }
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
@@ -329,7 +335,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fc399db86f11..0b8c3ace671f 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -96,11 +96,14 @@ static void basic_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void basic_destroy(struct tcf_proto *tp)
+static bool basic_destroy(struct tcf_proto *tp, bool force)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
+ if (!force && !list_empty(&head->flist))
+ return false;
+
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
@@ -108,6 +111,7 @@ static void basic_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5f3ee9e4b5bf..243c9f225a73 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -16,6 +16,8 @@
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
+
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
@@ -24,6 +26,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
+#define CLS_BPF_NAME_LEN 256
+
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
@@ -32,18 +36,24 @@ struct cls_bpf_head {
struct cls_bpf_prog {
struct bpf_prog *filter;
- struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct tcf_result res;
struct list_head link;
+ struct tcf_result res;
+ struct tcf_exts exts;
u32 handle;
- u16 bpf_num_ops;
+ union {
+ u32 bpf_fd;
+ u16 bpf_num_ops;
+ };
+ struct sock_filter *bpf_ops;
+ const char *bpf_name;
struct tcf_proto *tp;
struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_FD] = { .type = NLA_U32 },
+ [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -76,6 +86,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
+static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
+{
+ return !prog->bpf_ops;
+}
+
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
@@ -94,8 +109,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
- bpf_prog_destroy(prog->filter);
+ if (cls_bpf_is_ebpf(prog))
+ bpf_prog_put(prog->filter);
+ else
+ bpf_prog_destroy(prog->filter);
+ kfree(prog->bpf_name);
kfree(prog->bpf_ops);
kfree(prog);
}
@@ -114,14 +133,18 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
+ if (!force && !list_empty(&head->plist))
+ return false;
+
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
@@ -130,6 +153,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
@@ -151,69 +175,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+static int cls_bpf_prog_from_ops(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
{
struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct sock_fprog_kern tmp;
+ struct sock_fprog_kern fprog_tmp;
struct bpf_prog *fp;
u16 bpf_size, bpf_num_ops;
- u32 classid;
int ret;
- if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
- return -EINVAL;
-
- tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
- ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
- if (ret < 0)
- return ret;
-
- classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
- if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+ return -EINVAL;
bpf_size = bpf_num_ops * sizeof(*bpf_ops);
- if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
+ return -EINVAL;
bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
- if (bpf_ops == NULL) {
- ret = -ENOMEM;
- goto errout;
- }
+ if (bpf_ops == NULL)
+ return -ENOMEM;
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
- tmp.len = bpf_num_ops;
- tmp.filter = bpf_ops;
+ fprog_tmp.len = bpf_num_ops;
+ fprog_tmp.filter = bpf_ops;
- ret = bpf_prog_create(&fp, &tmp);
- if (ret)
- goto errout_free;
+ ret = bpf_prog_create(&fp, &fprog_tmp);
+ if (ret < 0) {
+ kfree(bpf_ops);
+ return ret;
+ }
- prog->bpf_num_ops = bpf_num_ops;
prog->bpf_ops = bpf_ops;
+ prog->bpf_num_ops = bpf_num_ops;
+ prog->bpf_name = NULL;
+
+ prog->filter = fp;
+ prog->res.classid = classid;
+
+ return 0;
+}
+
+static int cls_bpf_prog_from_efd(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
+{
+ struct bpf_prog *fp;
+ char *name = NULL;
+ u32 bpf_fd;
+
+ bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+
+ fp = bpf_prog_get(bpf_fd);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
+ bpf_prog_put(fp);
+ return -EINVAL;
+ }
+
+ if (tb[TCA_BPF_NAME]) {
+ name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
+ nla_len(tb[TCA_BPF_NAME]),
+ GFP_KERNEL);
+ if (!name) {
+ bpf_prog_put(fp);
+ return -ENOMEM;
+ }
+ }
+
+ prog->bpf_ops = NULL;
+ prog->bpf_fd = bpf_fd;
+ prog->bpf_name = name;
+
prog->filter = fp;
prog->res.classid = classid;
+ return 0;
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts exts;
+ bool is_bpf, is_ebpf;
+ u32 classid;
+ int ret;
+
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+ !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
+ cls_bpf_prog_from_efd(tb, prog, classid);
+ if (ret < 0) {
+ tcf_exts_destroy(&exts);
+ return ret;
+ }
+
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
return 0;
-errout_free:
- kfree(bpf_ops);
-errout:
- tcf_exts_destroy(&exts);
- return ret;
}
static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -297,11 +373,43 @@ errout:
return ret;
}
+static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ struct nlattr *nla;
+
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
+ return -EMSGSIZE;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ return 0;
+}
+
+static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
+ return -EMSGSIZE;
+
+ if (prog->bpf_name &&
+ nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *tm)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
- struct nlattr *nest, *nla;
+ struct nlattr *nest;
+ int ret;
if (prog == NULL)
return skb->len;
@@ -314,16 +422,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
- if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
- goto nla_put_failure;
- nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
- sizeof(struct sock_filter));
- if (nla == NULL)
+ if (cls_bpf_is_ebpf(prog))
+ ret = cls_bpf_dump_ebpf_info(prog, skb);
+ else
+ ret = cls_bpf_dump_bpf_info(prog, skb);
+ if (ret)
goto nla_put_failure;
- memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
-
if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221697ab0247..ea611b216412 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,14 +143,18 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ if (!force)
+ return false;
+
if (head) {
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
}
+ return true;
}
static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 461410394d08..a620c4e288a5 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -557,17 +557,21 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static bool flow_destroy(struct tcf_proto *tp, bool force)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
+ if (!force && !list_empty(&head->filters))
+ return false;
+
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a5269f76004c..715e01e5910a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,6 +33,7 @@
struct fw_head {
u32 mask;
+ bool mask_set;
struct fw_filter __rcu *ht[HTSIZE];
struct rcu_head rcu;
};
@@ -113,6 +114,14 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
static int fw_init(struct tcf_proto *tp)
{
+ struct fw_head *head;
+
+ head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ head->mask_set = false;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -124,14 +133,20 @@ static void fw_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void fw_destroy(struct tcf_proto *tp)
+static bool fw_destroy(struct tcf_proto *tp, bool force)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h = 0; h < HTSIZE; h++)
+ if (rcu_access_pointer(head->ht[h]))
+ return false;
+ }
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -143,6 +158,7 @@ static void fw_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
@@ -286,17 +302,11 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (!handle)
return -EINVAL;
- if (head == NULL) {
- u32 mask = 0xFFFFFFFF;
+ if (!head->mask_set) {
+ head->mask = 0xFFFFFFFF;
if (tb[TCA_FW_MASK])
- mask = nla_get_u32(tb[TCA_FW_MASK]);
-
- head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
- head->mask = mask;
-
- rcu_assign_pointer(tp->root, head);
+ head->mask = nla_get_u32(tb[TCA_FW_MASK]);
+ head->mask_set = true;
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 2ecd24688554..08a3b0a6f5ab 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -258,6 +258,13 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
static int route4_init(struct tcf_proto *tp)
{
+ struct route4_head *head;
+
+ head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -270,13 +277,20 @@ route4_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void route4_destroy(struct tcf_proto *tp)
+static bool route4_destroy(struct tcf_proto *tp, bool force)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 <= 256; h1++) {
+ if (rcu_access_pointer(head->table[h1]))
+ return false;
+ }
+ }
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
@@ -301,6 +315,7 @@ static void route4_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
@@ -484,13 +499,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
err = -ENOBUFS;
- if (head == NULL) {
- head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
- if (head == NULL)
- goto errout;
- rcu_assign_pointer(tp->root, head);
- }
-
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
if (!f)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index edd8ade3fbc1..02fa82792dab 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -291,13 +291,20 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
kfree_rcu(f, rcu);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static bool rsvp_destroy(struct tcf_proto *tp, bool force)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 < 256; h1++) {
+ if (rcu_access_pointer(data->ht[h1]))
+ return false;
+ }
+ }
RCU_INIT_POINTER(tp->root, NULL);
@@ -319,6 +326,7 @@ static void rsvp_destroy(struct tcf_proto *tp)
}
}
kfree_rcu(data, rcu);
+ return true;
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index bd49bf547a47..a557dbaf5afe 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -468,11 +468,14 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static bool tcindex_destroy(struct tcf_proto *tp, bool force)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
+ if (!force)
+ return false;
+
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
@@ -481,6 +484,7 @@ static void tcindex_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&p->rcu, __tcindex_destroy);
+ return true;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487afbfd51..375e51b71c80 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -460,13 +460,35 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp)
+static bool ht_empty(struct tc_u_hnode *ht)
+{
+ unsigned int h;
+
+ for (h = 0; h <= ht->divisor; h++)
+ if (rcu_access_pointer(ht->ht[h]))
+ return false;
+
+ return true;
+}
+
+static bool u32_destroy(struct tcf_proto *tp, bool force)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
+ if (!force) {
+ if (root_ht) {
+ if (root_ht->refcnt > 1)
+ return false;
+ if (root_ht->refcnt == 1) {
+ if (!ht_empty(root_ht))
+ return false;
+ }
+ }
+ }
+
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
@@ -491,6 +513,7 @@ static void u32_destroy(struct tcf_proto *tp)
}
tp->data = NULL;
+ return true;
}
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f03c3de16c27..73e2ed576ceb 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -34,7 +34,6 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
{
struct text_match *tm = EM_TEXT_PRIV(m);
int from, to;
- struct ts_state state;
from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data;
from += tm->from_offset;
@@ -42,7 +41,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data;
to += tm->to_offset;
- return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
+ return skb_find_text(skb, from, to, tm->config) != UINT_MAX;
}
static int em_text_change(struct net *net, void *data, int len,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 243b7d169d61..ad9eed70bc8f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1858,11 +1858,15 @@ reclassify:
}
EXPORT_SYMBOL(tc_classify);
-void tcf_destroy(struct tcf_proto *tp)
+bool tcf_destroy(struct tcf_proto *tp, bool force)
{
- tp->ops->destroy(tp);
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
+ if (tp->ops->destroy(tp, force)) {
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
+ return true;
+ }
+
+ return false;
}
void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -1871,7 +1875,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
while ((tp = rtnl_dereference(*fl)) != NULL) {
RCU_INIT_POINTER(*fl, tp->next);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
}
EXPORT_SYMBOL(tcf_destroy_chain);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8f34b27d5775..53b7acde9aa3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1322,8 +1322,7 @@ static __init int sctp_init(void)
int max_share;
int order;
- BUILD_BUG_ON(sizeof(struct sctp_ulpevent) >
- sizeof(((struct sk_buff *) 0)->cb));
+ sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aafe94bf292e..f1a65398f311 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1586,8 +1586,7 @@ static int sctp_error(struct sock *sk, int flags, int err)
static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
-static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
{
struct net *net = sock_net(sk);
struct sctp_sock *sp;
@@ -2066,9 +2065,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
* flags - flags sent or received with the user message, see Section
* 5 for complete description of the flags.
*/
-static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct sctp_ulpevent *event = NULL;
struct sctp_sock *sp = sctp_sk(sk);
diff --git a/net/socket.c b/net/socket.c
index bbedbfcb42c2..95d3085cb477 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -610,45 +610,20 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
}
EXPORT_SYMBOL(__sock_tx_timestamp);
-static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size)
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
+ size_t size)
{
- return sock->ops->sendmsg(iocb, sock, msg, size);
+ return sock->ops->sendmsg(sock, msg, size);
}
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size)
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
int err = security_socket_sendmsg(sock, msg, size);
- return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
-}
-
-static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
- size_t size, bool nosec)
-{
- struct kiocb iocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
- __sock_sendmsg(&iocb, sock, msg, size);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
-{
- return do_sock_sendmsg(sock, msg, size, false);
+ return err ?: sock_sendmsg_nosec(sock, msg, size);
}
EXPORT_SYMBOL(sock_sendmsg);
-static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
-{
- return do_sock_sendmsg(sock, msg, size, true);
-}
-
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -731,9 +706,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
- if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+ if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
- sizeof(__u32), &skb->dropcount);
+ sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}
void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
@@ -744,47 +719,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
-static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ return sock->ops->recvmsg(sock, msg, size, flags);
}
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
int err = security_socket_recvmsg(sock, msg, size, flags);
- return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
-}
-
-int sock_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
-{
- struct kiocb iocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
+ return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
}
EXPORT_SYMBOL(sock_recvmsg);
-static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
-{
- struct kiocb iocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
/**
* kernel_recvmsg - Receive a message from a socket (kernel space)
* @sock: The socket to receive the message from
@@ -861,8 +810,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
return 0;
- res = __sock_recvmsg(iocb, sock, &msg,
- iocb->ki_nbytes, msg.msg_flags);
+ res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags);
*to = msg.msg_iter;
return res;
}
@@ -883,7 +831,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+ res = sock_sendmsg(sock, &msg, iocb->ki_nbytes);
*from = msg.msg_iter;
return res;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8c1e558db118..aba6aa2656d8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
@@ -32,7 +33,7 @@ int netdev_switch_parent_id_get(struct net_device *dev,
return -EOPNOTSUPP;
return ops->ndo_switch_parent_id_get(dev, psid);
}
-EXPORT_SYMBOL(netdev_switch_parent_id_get);
+EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
/**
* netdev_switch_port_stp_update - Notify switch device port of STP
@@ -51,7 +52,7 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
WARN_ON(!ops->ndo_switch_parent_id_get);
return ops->ndo_switch_port_stp_update(dev, state);
}
-EXPORT_SYMBOL(netdev_switch_port_stp_update);
+EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
static DEFINE_MUTEX(netdev_switch_mutex);
static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
@@ -73,7 +74,7 @@ int register_netdev_switch_notifier(struct notifier_block *nb)
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(register_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(register_netdev_switch_notifier);
/**
* unregister_netdev_switch_notifier - Unregister nofifier
@@ -91,7 +92,7 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb)
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(unregister_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
/**
* call_netdev_switch_notifiers - Call nofifiers
@@ -114,7 +115,7 @@ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(call_netdev_switch_notifiers);
+EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers);
/**
* netdev_switch_port_bridge_setlink - Notify switch device port of bridge
@@ -139,7 +140,7 @@ int netdev_switch_port_bridge_setlink(struct net_device *dev,
return ops->ndo_bridge_setlink(dev, nlh, flags);
}
-EXPORT_SYMBOL(netdev_switch_port_bridge_setlink);
+EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink);
/**
* netdev_switch_port_bridge_dellink - Notify switch device port of bridge
@@ -164,7 +165,7 @@ int netdev_switch_port_bridge_dellink(struct net_device *dev,
return ops->ndo_bridge_dellink(dev, nlh, flags);
}
-EXPORT_SYMBOL(netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink);
/**
* ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink
@@ -194,7 +195,7 @@ int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
return ret;
}
-EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink);
+EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink);
/**
* ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink
@@ -224,4 +225,168 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
return ret;
}
-EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink);
+
+static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *lower_dev;
+ struct net_device *port_dev;
+ struct list_head *iter;
+
+ /* Recusively search down until we find a sw port dev.
+ * (A sw port dev supports ndo_switch_parent_id_get).
+ */
+
+ if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD &&
+ ops->ndo_switch_parent_id_get)
+ return dev;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ port_dev = netdev_switch_get_lowest_dev(lower_dev);
+ if (port_dev)
+ return port_dev;
+ }
+
+ return NULL;
+}
+
+static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
+{
+ struct netdev_phys_item_id psid;
+ struct netdev_phys_item_id prev_psid;
+ struct net_device *dev = NULL;
+ int nhsel;
+
+ /* For this route, all nexthop devs must be on the same switch. */
+
+ for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+ const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+ if (!nh->nh_dev)
+ return NULL;
+
+ dev = netdev_switch_get_lowest_dev(nh->nh_dev);
+ if (!dev)
+ return NULL;
+
+ if (netdev_switch_parent_id_get(dev, &psid))
+ return NULL;
+
+ if (nhsel > 0) {
+ if (prev_psid.id_len != psid.id_len)
+ return NULL;
+ if (memcmp(prev_psid.id, psid.id, psid.id_len))
+ return NULL;
+ }
+
+ prev_psid = psid;
+ }
+
+ return dev;
+}
+
+/**
+ * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
+ *
+ * @dst: route's IPv4 destination address
+ * @dst_len: destination address length (prefix length)
+ * @fi: route FIB info structure
+ * @tos: route TOS
+ * @type: route type
+ * @tb_id: route table ID
+ *
+ * Add IPv4 route entry to switch device.
+ */
+int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct net_device *dev;
+ const struct net_device_ops *ops;
+ int err = 0;
+
+ /* Don't offload route if using custom ip rules or if
+ * IPv4 FIB offloading has been disabled completely.
+ */
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ if (fi->fib_net->ipv4.fib_has_custom_rules)
+ return 0;
+#endif
+
+ if (fi->fib_net->ipv4.fib_offload_disabled)
+ return 0;
+
+ dev = netdev_switch_get_dev_by_nhs(fi);
+ if (!dev)
+ return 0;
+ ops = dev->netdev_ops;
+
+ if (ops->ndo_switch_fib_ipv4_add) {
+ err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len,
+ fi, tos, type, tb_id);
+ if (!err)
+ fi->fib_flags |= RTNH_F_EXTERNAL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add);
+
+/**
+ * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
+ *
+ * @dst: route's IPv4 destination address
+ * @dst_len: destination address length (prefix length)
+ * @fi: route FIB info structure
+ * @tos: route TOS
+ * @type: route type
+ * @tb_id: route table ID
+ *
+ * Delete IPv4 route entry from switch device.
+ */
+int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct net_device *dev;
+ const struct net_device_ops *ops;
+ int err = 0;
+
+ if (!(fi->fib_flags & RTNH_F_EXTERNAL))
+ return 0;
+
+ dev = netdev_switch_get_dev_by_nhs(fi);
+ if (!dev)
+ return 0;
+ ops = dev->netdev_ops;
+
+ if (ops->ndo_switch_fib_ipv4_del) {
+ err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len,
+ fi, tos, type, tb_id);
+ if (!err)
+ fi->fib_flags &= ~RTNH_F_EXTERNAL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del);
+
+/**
+ * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
+ *
+ * @fi: route FIB info structure
+ */
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+{
+ /* There was a problem installing this route to the offload
+ * device. For now, until we come up with more refined
+ * policy handling, abruptly end IPv4 fib offloading for
+ * for entire net by flushing offload device(s) of all
+ * IPv4 routes, and mark IPv4 fib offloading broken from
+ * this point forward.
+ */
+
+ fib_flush_external(fi->fib_net);
+ fi->fib_net->ipv4.fib_offload_disabled = true;
+}
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 91c8a8e031db..c25a3a149dc4 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -26,3 +26,11 @@ config TIPC_MEDIA_IB
help
Saying Y here will enable support for running TIPC on
IP-over-InfiniBand devices.
+config TIPC_MEDIA_UDP
+ bool "IP/UDP media type support"
+ depends on TIPC
+ select NET_UDP_TUNNEL
+ help
+ Saying Y here will enable support for running TIPC over IP/UDP
+ bool
+ default y
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 599b1a540d2b..57e460be4692 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -10,5 +10,6 @@ tipc-y += addr.o bcast.o bearer.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
server.o socket.o
+tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 48852c2dcc03..840db89e4283 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -48,6 +48,9 @@ static struct tipc_media * const media_info_array[] = {
#ifdef CONFIG_TIPC_MEDIA_IB
&ib_media_info,
#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ &udp_media_info,
+#endif
NULL
};
@@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
* tipc_enable_bearer - enable bearer with the given name
*/
static int tipc_enable_bearer(struct net *net, const char *name,
- u32 disc_domain, u32 priority)
+ u32 disc_domain, u32 priority,
+ struct nlattr *attr[])
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b_ptr;
@@ -304,7 +308,7 @@ restart:
strcpy(b_ptr->name, name);
b_ptr->media = m_ptr;
- res = m_ptr->enable_media(net, b_ptr);
+ res = m_ptr->enable_media(net, b_ptr, attr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
@@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
kfree_rcu(b_ptr, rcu);
}
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attr[])
{
struct net_device *dev;
char *driver_name = strchr((const char *)b->name, ':') + 1;
@@ -742,7 +747,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- bearer_disable(net, bearer, false);
+ bearer_disable(net, bearer, true);
rtnl_unlock();
return 0;
@@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
}
rtnl_lock();
- err = tipc_enable_bearer(net, bearer, domain, prio);
+ err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
if (err) {
rtnl_unlock();
return err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6b17795ff8bc..5cad243ee8fc 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -41,7 +41,7 @@
#include <net/genetlink.h>
#define MAX_BEARERS 2
-#define MAX_MEDIA 2
+#define MAX_MEDIA 3
#define MAX_NODES 4096
#define WSIZE 32
@@ -50,14 +50,16 @@
* - the field's actual content and length is defined per media
* - remaining unused bytes in the field are set to zero
*/
-#define TIPC_MEDIA_ADDR_SIZE 32
+#define TIPC_MEDIA_INFO_SIZE 32
#define TIPC_MEDIA_TYPE_OFFSET 3
+#define TIPC_MEDIA_ADDR_OFFSET 4
/*
* Identifiers of supported TIPC media types
*/
#define TIPC_MEDIA_TYPE_ETH 1
#define TIPC_MEDIA_TYPE_IB 2
+#define TIPC_MEDIA_TYPE_UDP 3
/**
* struct tipc_node_map - set of node identifiers
@@ -76,7 +78,7 @@ struct tipc_node_map {
* @broadcast: non-zero if address is a broadcast address
*/
struct tipc_media_addr {
- u8 value[TIPC_MEDIA_ADDR_SIZE];
+ u8 value[TIPC_MEDIA_INFO_SIZE];
u8 media_id;
u8 broadcast;
};
@@ -103,7 +105,8 @@ struct tipc_media {
int (*send_msg)(struct net *net, struct sk_buff *buf,
struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest);
- int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr);
+ int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr,
+ struct nlattr *attr[]);
void (*disable_media)(struct tipc_bearer *b_ptr);
int (*addr2str)(struct tipc_media_addr *addr,
char *strbuf,
@@ -182,6 +185,9 @@ extern struct tipc_media eth_media_info;
#ifdef CONFIG_TIPC_MEDIA_IB
extern struct tipc_media ib_media_info;
#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+extern struct tipc_media udp_media_info;
+#endif
int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
@@ -196,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
int tipc_media_set_priority(const char *name, u32 new_value);
int tipc_media_set_window(const char *name, u32 new_value);
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attrs[]);
void tipc_disable_l2_media(struct tipc_bearer *b);
int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
struct tipc_bearer *b, struct tipc_media_addr *dest);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index feef3753615d..5967506833ce 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -86,7 +86,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
msg = buf_msg(buf);
tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
- INT_H_SIZE, dest_domain);
+ MAX_H_SIZE, dest_domain);
msg_set_non_seq(msg, 1);
msg_set_node_sig(msg, tn->random);
msg_set_dest_domain(msg, dest_domain);
@@ -249,7 +249,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
- rbuf = tipc_buf_acquire(INT_H_SIZE);
+ rbuf = tipc_buf_acquire(MAX_H_SIZE);
if (rbuf) {
tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer);
tipc_bearer_send(net, bearer->identity, rbuf, &maddr);
@@ -359,8 +359,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return -ENOMEM;
-
- req->buf = tipc_buf_acquire(INT_H_SIZE);
+ req->buf = tipc_buf_acquire(MAX_H_SIZE);
if (!req->buf) {
kfree(req);
return -ENOMEM;
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 5e1426f1751f..f69a2fde9f4a 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -37,8 +37,6 @@
#include "core.h"
#include "bearer.h"
-#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */
-
/* Convert Ethernet address (media address format) to string */
static int tipc_eth_addr2str(struct tipc_media_addr *addr,
char *strbuf, int bufsz)
@@ -53,9 +51,9 @@ static int tipc_eth_addr2str(struct tipc_media_addr *addr,
/* Convert from media address format to discovery message addr format */
static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
- memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN);
+ memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN);
return 0;
}
@@ -79,7 +77,7 @@ static int tipc_eth_msg2addr(struct tipc_bearer *b,
char *msg)
{
/* Skip past preamble: */
- msg += ETH_ADDR_OFFSET;
+ msg += TIPC_MEDIA_ADDR_OFFSET;
return tipc_eth_raw2addr(b, addr, msg);
}
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 8522eef9c136..e8c16718e3fa 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -57,7 +57,7 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
/* Convert from media address format to discovery message addr format */
static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
memcpy(msg, addr->value, INFINIBAND_ALEN);
return 0;
}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 9ace47f44a69..fa167846d1ab 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -76,7 +76,7 @@ struct plist;
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define TIPC_MEDIA_ADDR_OFFSET 5
+#define TIPC_MEDIA_INFO_OFFSET 5
/**
* TIPC message buffer code
@@ -87,7 +87,7 @@ struct plist;
* Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
* are word aligned for quicker access
*/
-#define BUF_HEADROOM LL_MAX_HEADER
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
struct tipc_skb_cb {
void *handle;
@@ -688,7 +688,7 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r)
static inline char *msg_media_addr(struct tipc_msg *m)
{
- return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET];
+ return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
}
/*
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index fcb07915aaac..506aaa565da7 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -98,7 +98,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
continue;
if (!tipc_node_active_links(node))
continue;
- oskb = skb_copy(skb, GFP_ATOMIC);
+ oskb = pskb_copy(skb, GFP_ATOMIC);
if (!oskb)
break;
msg_set_destnode(buf_msg(oskb), dnode);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b4d4467d0bb0..934947f038b6 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -114,6 +114,9 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
+static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
+ size_t dsz);
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -892,7 +895,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
/**
* tipc_sendmsg - send message in connectionless manner
- * @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
* @dsz: amount of user data to be sent
@@ -904,9 +906,21 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
+static int tipc_sendmsg(struct socket *sock,
struct msghdr *m, size_t dsz)
{
+ struct sock *sk = sock->sk;
+ int ret;
+
+ lock_sock(sk);
+ ret = __tipc_sendmsg(sock, m, dsz);
+ release_sock(sk);
+
+ return ret;
+}
+
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
+{
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -931,22 +945,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
- if (iocb)
- lock_sock(sk);
-
if (unlikely(sock->state != SS_READY)) {
- if (sock->state == SS_LISTENING) {
- rc = -EPIPE;
- goto exit;
- }
- if (sock->state != SS_UNCONNECTED) {
- rc = -EISCONN;
- goto exit;
- }
- if (tsk->published) {
- rc = -EOPNOTSUPP;
- goto exit;
- }
+ if (sock->state == SS_LISTENING)
+ return -EPIPE;
+ if (sock->state != SS_UNCONNECTED)
+ return -EISCONN;
+ if (tsk->published)
+ return -EOPNOTSUPP;
if (dest->addrtype == TIPC_ADDR_NAME) {
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
@@ -956,8 +961,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
if (dest->addrtype == TIPC_ADDR_MCAST) {
- rc = tipc_sendmcast(sock, seq, m, dsz, timeo);
- goto exit;
+ return tipc_sendmcast(sock, seq, m, dsz, timeo);
} else if (dest->addrtype == TIPC_ADDR_NAME) {
u32 type = dest->addr.name.name.type;
u32 inst = dest->addr.name.name.instance;
@@ -972,10 +976,8 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
dport = tipc_nametbl_translate(net, type, inst, &dnode);
msg_set_destnode(mhdr, dnode);
msg_set_destport(mhdr, dport);
- if (unlikely(!dport && !dnode)) {
- rc = -EHOSTUNREACH;
- goto exit;
- }
+ if (unlikely(!dport && !dnode))
+ return -EHOSTUNREACH;
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(mhdr, TIPC_DIRECT_MSG);
@@ -990,7 +992,7 @@ new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
if (rc < 0)
- goto exit;
+ return rc;
do {
skb = skb_peek(pktchain);
@@ -1013,9 +1015,6 @@ new_mtu:
if (rc)
__skb_queue_purge(pktchain);
} while (!rc);
-exit:
- if (iocb)
- release_sock(sk);
return rc;
}
@@ -1052,7 +1051,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
/**
* tipc_send_stream - send stream-oriented data
- * @iocb: (unused)
* @sock: socket structure
* @m: data to send
* @dsz: total length of data to be transmitted
@@ -1062,8 +1060,19 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
* Returns the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
-static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t dsz)
+static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
+{
+ struct sock *sk = sock->sk;
+ int ret;
+
+ lock_sock(sk);
+ ret = __tipc_send_stream(sock, m, dsz);
+ release_sock(sk);
+
+ return ret;
+}
+
+static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
@@ -1080,7 +1089,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
/* Handle implied connection establishment */
if (unlikely(dest)) {
- rc = tipc_sendmsg(iocb, sock, m, dsz);
+ rc = __tipc_sendmsg(sock, m, dsz);
if (dsz && (dsz == rc))
tsk->sent_unacked = 1;
return rc;
@@ -1088,15 +1097,11 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
if (dsz > (uint)INT_MAX)
return -EMSGSIZE;
- if (iocb)
- lock_sock(sk);
-
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_DISCONNECTING)
- rc = -EPIPE;
+ return -EPIPE;
else
- rc = -ENOTCONN;
- goto exit;
+ return -ENOTCONN;
}
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
@@ -1108,7 +1113,7 @@ next:
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
if (unlikely(rc < 0))
- goto exit;
+ return rc;
do {
if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_link_xmit(net, pktchain, dnode, portid);
@@ -1133,15 +1138,12 @@ next:
if (rc)
__skb_queue_purge(pktchain);
} while (!rc);
-exit:
- if (iocb)
- release_sock(sk);
+
return sent ? sent : rc;
}
/**
* tipc_send_packet - send a connection-oriented message
- * @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
* @dsz: length of data to be transmitted
@@ -1150,13 +1152,12 @@ exit:
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t dsz)
+static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
{
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
- return tipc_send_stream(iocb, sock, m, dsz);
+ return tipc_send_stream(sock, m, dsz);
}
/* tipc_sk_finish_conn - complete the setup of a connection
@@ -1317,12 +1318,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
*timeop = timeo;
@@ -1331,7 +1332,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
/**
* tipc_recvmsg - receive packet-oriented message
- * @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
* @flags: receive flags
@@ -1341,8 +1341,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
+ int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1426,7 +1426,6 @@ exit:
/**
* tipc_recv_stream - receive stream-oriented data
- * @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
* @flags: receive flags
@@ -1436,8 +1435,8 @@ exit:
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
+ size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1947,7 +1946,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if (!timeout)
m.msg_flags = MSG_DONTWAIT;
- res = tipc_sendmsg(NULL, sock, &m, 0);
+ res = __tipc_sendmsg(sock, &m, 0);
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
@@ -2027,12 +2026,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
err = -EINVAL;
if (sock->state != SS_LISTENING)
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
return err;
@@ -2103,7 +2102,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
struct msghdr m = {NULL,};
tsk_advance_rx_queue(sk);
- tipc_send_packet(NULL, new_sock, &m, 0);
+ __tipc_send_stream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 72c339e432aa..1c147c869c2e 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -162,19 +162,6 @@ static void subscr_del(struct tipc_subscription *sub)
atomic_dec(&tn->subscription_count);
}
-/**
- * subscr_terminate - terminate communication with a subscriber
- *
- * Note: Must call it in process context since it might sleep.
- */
-static void subscr_terminate(struct tipc_subscription *sub)
-{
- struct tipc_subscriber *subscriber = sub->subscriber;
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-
- tipc_conn_terminate(tn->topsrv, subscriber->conid);
-}
-
static void subscr_release(struct tipc_subscriber *subscriber)
{
struct tipc_subscription *sub;
@@ -312,16 +299,14 @@ static void subscr_conn_msg_event(struct net *net, int conid,
{
struct tipc_subscriber *subscriber = usr_data;
struct tipc_subscription *sub = NULL;
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
spin_lock_bh(&subscriber->lock);
- if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber,
- &sub) < 0) {
- spin_unlock_bh(&subscriber->lock);
- subscr_terminate(sub);
- return;
- }
+ subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub);
if (sub)
tipc_nametbl_subscribe(sub);
+ else
+ tipc_conn_terminate(tn->topsrv, subscriber->conid);
spin_unlock_bh(&subscriber->lock);
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
new file mode 100644
index 000000000000..fc2fb11a354d
--- /dev/null
+++ b/net/tipc/udp_media.c
@@ -0,0 +1,442 @@
+/* net/tipc/udp_media.c: IP bearer support for TIPC
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/udp_tunnel.h>
+#include <linux/tipc_netlink.h>
+#include "core.h"
+#include "bearer.h"
+
+/* IANA assigned UDP port */
+#define UDP_PORT_DEFAULT 6118
+
+static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
+ [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
+ [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+ [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+};
+
+/**
+ * struct udp_media_addr - IP/UDP addressing information
+ *
+ * This is the bearer level originating address used in neighbor discovery
+ * messages, and all fields should be in network byte order
+ */
+struct udp_media_addr {
+ __be16 proto;
+ __be16 udp_port;
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ };
+};
+
+/**
+ * struct udp_bearer - ip/udp bearer data structure
+ * @bearer: associated generic tipc bearer
+ * @ubsock: bearer associated socket
+ * @ifindex: local address scope
+ * @work: used to schedule deferred work on a bearer
+ */
+struct udp_bearer {
+ struct tipc_bearer __rcu *bearer;
+ struct socket *ubsock;
+ u32 ifindex;
+ struct work_struct work;
+};
+
+/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
+static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
+ struct udp_media_addr *ua)
+{
+ memset(addr, 0, sizeof(struct tipc_media_addr));
+ addr->media_id = TIPC_MEDIA_TYPE_UDP;
+ memcpy(addr->value, ua, sizeof(struct udp_media_addr));
+ if (ntohs(ua->proto) == ETH_P_IP) {
+ if (ipv4_is_multicast(ua->ipv4.s_addr))
+ addr->broadcast = 1;
+ } else if (ntohs(ua->proto) == ETH_P_IPV6) {
+ if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
+ addr->broadcast = 1;
+ } else {
+ pr_err("Invalid UDP media address\n");
+ }
+}
+
+/* tipc_udp_addr2str - convert ip/udp address to string */
+static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
+{
+ struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
+
+ if (ntohs(ua->proto) == ETH_P_IP)
+ snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+ else if (ntohs(ua->proto) == ETH_P_IPV6)
+ snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+ else
+ pr_err("Invalid UDP media address\n");
+ return 0;
+}
+
+/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */
+static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a,
+ char *msg)
+{
+ struct udp_media_addr *ua;
+
+ ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET);
+ if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP)
+ return -EINVAL;
+ tipc_udp_media_addr_set(a, ua);
+ return 0;
+}
+
+/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */
+static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
+{
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+ msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP;
+ memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value,
+ sizeof(struct udp_media_addr));
+ return 0;
+}
+
+/* tipc_send_msg - enqueue a send request */
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dest)
+{
+ int ttl, err = 0;
+ struct udp_bearer *ub;
+ struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
+ struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+ struct sk_buff *clone;
+ struct rtable *rt;
+
+ clone = skb_clone(skb, GFP_ATOMIC);
+ skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ err = -ENODEV;
+ goto tx_error;
+ }
+ if (dst->proto == htons(ETH_P_IP)) {
+ struct flowi4 fl = {
+ .daddr = dst->ipv4.s_addr,
+ .saddr = src->ipv4.s_addr,
+ .flowi4_mark = clone->mark,
+ .flowi4_proto = IPPROTO_UDP
+ };
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto tx_error;
+ }
+ ttl = ip4_dst_hoplimit(&rt->dst);
+ err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr,
+ dst->ipv4.s_addr, 0, ttl, 0,
+ src->udp_port, dst->udp_port,
+ false, true);
+ if (err < 0) {
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct dst_entry *ndst;
+ struct flowi6 fl6 = {
+ .flowi6_oif = ub->ifindex,
+ .daddr = dst->ipv6,
+ .saddr = src->ipv6,
+ .flowi6_proto = IPPROTO_UDP
+ };
+ err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6);
+ if (err)
+ goto tx_error;
+ ttl = ip6_dst_hoplimit(ndst);
+ err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6,
+ &dst->ipv6, 0, ttl, src->udp_port,
+ dst->udp_port, false);
+#endif
+ }
+ return err;
+
+tx_error:
+ kfree_skb(clone);
+ return err;
+}
+
+/* tipc_udp_recv - read data from bearer socket */
+static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct udp_bearer *ub;
+ struct tipc_bearer *b;
+
+ ub = rcu_dereference_sk_user_data(sk);
+ if (!ub) {
+ pr_err_ratelimited("Failed to get UDP bearer reference");
+ kfree_skb(skb);
+ return 0;
+ }
+
+ skb_pull(skb, sizeof(struct udphdr));
+ rcu_read_lock();
+ b = rcu_dereference_rtnl(ub->bearer);
+
+ if (b) {
+ tipc_rcv(sock_net(sk), skb, b);
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return 0;
+}
+
+static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
+{
+ int err = 0;
+ struct ip_mreqn mreqn;
+ struct sock *sk = ub->ubsock->sk;
+
+ if (ntohs(remote->proto) == ETH_P_IP) {
+ if (!ipv4_is_multicast(remote->ipv4.s_addr))
+ return 0;
+ mreqn.imr_multiaddr = remote->ipv4;
+ mreqn.imr_ifindex = ub->ifindex;
+ err = __ip_mc_join_group(sk, &mreqn);
+ } else {
+ if (!ipv6_addr_is_multicast(&remote->ipv6))
+ return 0;
+ err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
+ }
+ return err;
+}
+
+/**
+ * parse_options - build local/remote addresses from configuration
+ * @attrs: netlink config data
+ * @ub: UDP bearer instance
+ * @local: local bearer IP address/port
+ * @remote: peer or multicast IP/port
+ */
+static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
+ struct udp_media_addr *local,
+ struct udp_media_addr *remote)
+{
+ struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+ struct sockaddr_storage *sa_local, *sa_remote;
+
+ if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+ goto err;
+ if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+ attrs[TIPC_NLA_BEARER_UDP_OPTS],
+ tipc_nl_udp_policy))
+ goto err;
+ if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
+ sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]);
+ sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]);
+ } else {
+err:
+ pr_err("Invalid UDP bearer configuration");
+ return -EINVAL;
+ }
+ if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) {
+ struct sockaddr_in *ip4;
+
+ ip4 = (struct sockaddr_in *)sa_local;
+ local->proto = htons(ETH_P_IP);
+ local->udp_port = ip4->sin_port;
+ local->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+ ip4 = (struct sockaddr_in *)sa_remote;
+ remote->proto = htons(ETH_P_IP);
+ remote->udp_port = ip4->sin_port;
+ remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+ return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) {
+ struct sockaddr_in6 *ip6;
+
+ ip6 = (struct sockaddr_in6 *)sa_local;
+ local->proto = htons(ETH_P_IPV6);
+ local->udp_port = ip6->sin6_port;
+ local->ipv6 = ip6->sin6_addr;
+ ub->ifindex = ip6->sin6_scope_id;
+
+ ip6 = (struct sockaddr_in6 *)sa_remote;
+ remote->proto = htons(ETH_P_IPV6);
+ remote->udp_port = ip6->sin6_port;
+ remote->ipv6 = ip6->sin6_addr;
+ return 0;
+#endif
+ }
+ return -EADDRNOTAVAIL;
+}
+
+/**
+ * tipc_udp_enable - callback to create a new udp bearer instance
+ * @net: network namespace
+ * @b: pointer to generic tipc_bearer
+ * @attrs: netlink bearer configuration
+ *
+ * validate the bearer parameters and initialize the udp bearer
+ * rtnl_lock should be held
+ */
+static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attrs[])
+{
+ int err = -EINVAL;
+ struct udp_bearer *ub;
+ struct udp_media_addr *remote;
+ struct udp_media_addr local = {0};
+ struct udp_port_cfg udp_conf = {0};
+ struct udp_tunnel_sock_cfg tuncfg = {NULL};
+
+ ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
+ if (!ub)
+ return -ENOMEM;
+
+ remote = (struct udp_media_addr *)&b->bcast_addr.value;
+ memset(remote, 0, sizeof(struct udp_media_addr));
+ err = parse_options(attrs, ub, &local, remote);
+ if (err)
+ goto err;
+
+ b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
+ b->bcast_addr.broadcast = 1;
+ rcu_assign_pointer(b->media_ptr, ub);
+ rcu_assign_pointer(ub->bearer, b);
+ tipc_udp_media_addr_set(&b->addr, &local);
+ if (local.proto == htons(ETH_P_IP)) {
+ struct net_device *dev;
+
+ dev = __ip_dev_find(net, local.ipv4.s_addr, false);
+ if (!dev) {
+ err = -ENODEV;
+ goto err;
+ }
+ udp_conf.family = AF_INET;
+ udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+ udp_conf.use_udp_checksums = false;
+ ub->ifindex = dev->ifindex;
+ b->mtu = dev->mtu - sizeof(struct iphdr)
+ - sizeof(struct udphdr);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (local.proto == htons(ETH_P_IPV6)) {
+ udp_conf.family = AF_INET6;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
+ udp_conf.local_ip6 = in6addr_any;
+ b->mtu = 1280;
+#endif
+ } else {
+ err = -EAFNOSUPPORT;
+ goto err;
+ }
+ udp_conf.local_udp_port = local.udp_port;
+ err = udp_sock_create(net, &udp_conf, &ub->ubsock);
+ if (err)
+ goto err;
+ tuncfg.sk_user_data = ub;
+ tuncfg.encap_type = 1;
+ tuncfg.encap_rcv = tipc_udp_recv;
+ tuncfg.encap_destroy = NULL;
+ setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
+
+ if (enable_mcast(ub, remote))
+ goto err;
+ return 0;
+err:
+ kfree(ub);
+ return err;
+}
+
+/* cleanup_bearer - break the socket/bearer association */
+static void cleanup_bearer(struct work_struct *work)
+{
+ struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+
+ if (ub->ubsock)
+ udp_tunnel_sock_release(ub->ubsock);
+ synchronize_net();
+ kfree(ub);
+}
+
+/* tipc_udp_disable - detach bearer from socket */
+static void tipc_udp_disable(struct tipc_bearer *b)
+{
+ struct udp_bearer *ub;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ pr_err("UDP bearer instance not found\n");
+ return;
+ }
+ if (ub->ubsock)
+ sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+ RCU_INIT_POINTER(b->media_ptr, NULL);
+ RCU_INIT_POINTER(ub->bearer, NULL);
+
+ /* sock_release need to be done outside of rtnl lock */
+ INIT_WORK(&ub->work, cleanup_bearer);
+ schedule_work(&ub->work);
+}
+
+struct tipc_media udp_media_info = {
+ .send_msg = tipc_udp_send_msg,
+ .enable_media = tipc_udp_enable,
+ .disable_media = tipc_udp_disable,
+ .addr2str = tipc_udp_addr2str,
+ .addr2msg = tipc_udp_addr2msg,
+ .msg2addr = tipc_udp_msg2addr,
+ .priority = TIPC_DEF_LINK_PRI,
+ .tolerance = TIPC_DEF_LINK_TOL,
+ .window = TIPC_DEF_LINK_WIN,
+ .type_id = TIPC_MEDIA_TYPE_UDP,
+ .hwaddr_len = 0,
+ .name = "udp"
+};
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 526b6edab018..433f287ee548 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *,
poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
-static int unix_stream_sendmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t);
-static int unix_stream_recvmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t, int);
-static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t);
-static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t, int);
+static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
-static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t);
-static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
- struct msghdr *, size_t, int);
+static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
+ int);
static int unix_set_peek_off(struct sock *sk, int val)
{
@@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
* Send AF_UNIX data.
*/
-static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
@@ -1622,8 +1617,8 @@ out:
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
-static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
struct sock *other = NULL;
@@ -1725,8 +1720,8 @@ out_err:
return sent ? : err;
}
-static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
int err;
struct sock *sk = sock->sk;
@@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (msg->msg_namelen)
msg->msg_namelen = 0;
- return unix_dgram_sendmsg(kiocb, sock, msg, len);
+ return unix_dgram_sendmsg(sock, msg, len);
}
-static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
- int flags)
+static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTCONN;
- return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
+ return unix_dgram_recvmsg(sock, msg, size, flags);
}
static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
@@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
}
}
-static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
- int flags)
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct scm_cookie scm;
struct sock *sk = sock->sk;
@@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb)
return skb->len - UNIXCB(skb).consumed;
}
-static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
- int flags)
+static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
{
struct scm_cookie scm;
struct sock *sk = sock->sk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1d0e39c9a3e2..2ec86e652a19 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
return mask;
}
-static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
int err;
struct sock *sk;
@@ -1062,11 +1062,10 @@ out:
return err;
}
-static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
- return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
- flags);
+ return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags);
}
static const struct proto_ops vsock_dgram_ops = {
@@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock,
return 0;
}
-static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk;
struct vsock_sock *vsk;
@@ -1644,9 +1643,8 @@ out:
static int
-vsock_stream_recvmsg(struct kiocb *kiocb,
- struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk;
struct vsock_sock *vsk;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7f3255084a6c..c294da095461 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue(
return err - sizeof(*dg);
}
-static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
- struct vsock_sock *vsk,
+static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg, size_t len,
int flags)
{
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index e24fc585c883..6309b9c0bcd5 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -533,7 +533,7 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
else if (wdev->wext.ibss.bssid)
memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
else
- memset(ap_addr->sa_data, 0, ETH_ALEN);
+ eth_zero_addr(ap_addr->sa_data);
wdev_unlock(wdev);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index be2501538011..864b782c0202 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5683,8 +5683,8 @@ static int nl80211_parse_random_mac(struct nlattr **attrs,
int i;
if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) {
- memset(mac_addr, 0, ETH_ALEN);
- memset(mac_addr_mask, 0, ETH_ALEN);
+ eth_zero_addr(mac_addr);
+ eth_zero_addr(mac_addr_mask);
mac_addr[0] = 0x2;
mac_addr_mask[0] = 0x3;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index b17b3692f8c2..a00ee8897dc6 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -7,6 +7,7 @@
#include <linux/tracepoint.h>
#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
#include <net/cfg80211.h>
#include "core.h"
@@ -15,7 +16,7 @@
if (given_mac) \
memcpy(__entry->entry_mac, given_mac, ETH_ALEN); \
else \
- memset(__entry->entry_mac, 0, ETH_ALEN); \
+ eth_zero_addr(__entry->entry_mac); \
} while (0)
#define MAC_PR_FMT "%pM"
#define MAC_PR_ARG(entry_mac) (__entry->entry_mac)
@@ -1077,7 +1078,7 @@ TRACE_EVENT(rdev_auth,
if (req->bss)
MAC_ASSIGN(bssid, req->bss->bssid);
else
- memset(__entry->bssid, 0, ETH_ALEN);
+ eth_zero_addr(__entry->bssid);
__entry->auth_type = req->auth_type;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: " MAC_PR_FMT,
@@ -1103,7 +1104,7 @@ TRACE_EVENT(rdev_assoc,
if (req->bss)
MAC_ASSIGN(bssid, req->bss->bssid);
else
- memset(__entry->bssid, 0, ETH_ALEN);
+ eth_zero_addr(__entry->bssid);
MAC_ASSIGN(prev_bssid, req->prev_bssid);
__entry->use_mfp = req->use_mfp;
__entry->flags = req->flags;
@@ -1153,7 +1154,7 @@ TRACE_EVENT(rdev_disassoc,
if (req->bss)
MAC_ASSIGN(bssid, req->bss->bssid);
else
- memset(__entry->bssid, 0, ETH_ALEN);
+ eth_zero_addr(__entry->bssid);
__entry->reason_code = req->reason_code;
__entry->local_state_change = req->local_state_change;
),
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 368611c05739..a4e8af3321d2 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -322,7 +322,7 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
if (wdev->current_bss)
memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
else
- memset(ap_addr->sa_data, 0, ETH_ALEN);
+ eth_zero_addr(ap_addr->sa_data);
wdev_unlock(wdev);
return 0;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d9149b68b9bc..c3ab230e4493 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1077,8 +1077,7 @@ out_clear_request:
goto out;
}
-static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct x25_sock *x25 = x25_sk(sk);
@@ -1252,8 +1251,7 @@ out_kfree_skb:
}
-static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
+static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;