diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 09:00:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 09:00:47 -0700 |
commit | 6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch) | |
tree | 74d1ec65087df1da1021b43ac51acc1ee8601809 /net | |
parent | bb0fd7ab0986105765d11baa82e619c618a235aa (diff) | |
parent | 9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff) | |
download | linux-6c373ca89399c5a3f7ef210ad8f63dc3437da345.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Add BQL support to via-rhine, from Tino Reichardt.
2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers
can support hw switch offloading. From Floria Fainelli.
3) Allow 'ip address' commands to initiate multicast group join/leave,
from Madhu Challa.
4) Many ipv4 FIB lookup optimizations from Alexander Duyck.
5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel
Borkmann.
6) Remove the ugly compat support in ARP for ugly layers like ax25,
rose, etc. And use this to clean up the neigh layer, then use it to
implement MPLS support. All from Eric Biederman.
7) Support L3 forwarding offloading in switches, from Scott Feldman.
8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed
up route lookups even further. From Alexander Duyck.
9) Many improvements and bug fixes to the rhashtable implementation,
from Herbert Xu and Thomas Graf. In particular, in the case where
an rhashtable user bulk adds a large number of items into an empty
table, we expand the table much more sanely.
10) Don't make the tcp_metrics hash table per-namespace, from Eric
Biederman.
11) Extend EBPF to access SKB fields, from Alexei Starovoitov.
12) Split out new connection request sockets so that they can be
established in the main hash table. Much less false sharing since
hash lookups go direct to the request sockets instead of having to
go first to the listener then to the request socks hashed
underneath. From Eric Dumazet.
13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk.
14) Support stable privacy address generation for RFC7217 in IPV6. From
Hannes Frederic Sowa.
15) Hash network namespace into IP frag IDs, also from Hannes Frederic
Sowa.
16) Convert PTP get/set methods to use 64-bit time, from Richard
Cochran.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits)
fm10k: Bump driver version to 0.15.2
fm10k: corrected VF multicast update
fm10k: mbx_update_max_size does not drop all oversized messages
fm10k: reset head instead of calling update_max_size
fm10k: renamed mbx_tx_dropped to mbx_tx_oversized
fm10k: update xcast mode before synchronizing multicast addresses
fm10k: start service timer on probe
fm10k: fix function header comment
fm10k: comment next_vf_mbx flow
fm10k: don't handle mailbox events in iov_event path and always process mailbox
fm10k: use separate workqueue for fm10k driver
fm10k: Set PF queues to unlimited bandwidth during virtualization
fm10k: expose tx_timeout_count as an ethtool stat
fm10k: only increment tx_timeout_count in Tx hang path
fm10k: remove extraneous "Reset interface" message
fm10k: separate PF only stats so that VF does not display them
fm10k: use hw->mac.max_queues for stats
fm10k: only show actual queues, not the maximum in hardware
fm10k: allow creation of VLAN on default vid
fm10k: fix unused warnings
...
Diffstat (limited to 'net')
468 files changed, 19500 insertions, 11521 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index e4a02ef55102..7fa0f382e7d1 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -1,6 +1,61 @@ -config 6LOWPAN +menuconfig 6LOWPAN tristate "6LoWPAN Support" depends on IPV6 ---help--- This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. + +menuconfig 6LOWPAN_NHC + tristate "Next Header Compression Support" + depends on 6LOWPAN + default y + ---help--- + Support for next header compression. + +if 6LOWPAN_NHC + +config 6LOWPAN_NHC_DEST + tristate "Destination Options Header Support" + default y + ---help--- + 6LoWPAN IPv6 Destination Options Header compression according to + RFC6282. + +config 6LOWPAN_NHC_FRAGMENT + tristate "Fragment Header Support" + default y + ---help--- + 6LoWPAN IPv6 Fragment Header compression according to RFC6282. + +config 6LOWPAN_NHC_HOP + tristate "Hop-by-Hop Options Header Support" + default y + ---help--- + 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to + RFC6282. + +config 6LOWPAN_NHC_IPV6 + tristate "IPv6 Header Support" + default y + ---help--- + 6LoWPAN IPv6 Header compression according to RFC6282. + +config 6LOWPAN_NHC_MOBILITY + tristate "Mobility Header Support" + default y + ---help--- + 6LoWPAN IPv6 Mobility Header compression according to RFC6282. + +config 6LOWPAN_NHC_ROUTING + tristate "Routing Header Support" + default y + ---help--- + 6LoWPAN IPv6 Routing Header compression according to RFC6282. + +config 6LOWPAN_NHC_UDP + tristate "UDP Header Support" + default y + ---help--- + 6LoWPAN IPv6 UDP Header compression according to RFC6282. + +endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 415886bb456a..eb8baa72adc8 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,3 +1,12 @@ -obj-$(CONFIG_6LOWPAN) := 6lowpan.o +obj-$(CONFIG_6LOWPAN) += 6lowpan.o -6lowpan-y := iphc.o +6lowpan-y := iphc.o nhc.o + +#rfc6282 nhcs +obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o +obj-$(CONFIG_6LOWPAN_NHC_FRAGMENT) += nhc_fragment.o +obj-$(CONFIG_6LOWPAN_NHC_HOP) += nhc_hop.o +obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o +obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o +obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o +obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 32ffec6ef164..94a375c04f21 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -54,6 +54,8 @@ #include <net/ipv6.h> #include <net/af_ieee802154.h> +#include "nhc.h" + /* Uncompress address function for source and * destination address(non-multicast). * @@ -224,77 +226,6 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, return 0; } -static int uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) -{ - bool fail; - u8 tmp = 0, val = 0; - - fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp)); - - if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { - pr_debug("UDP header uncompression\n"); - switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { - case LOWPAN_NHC_UDP_CS_P_00: - fail |= lowpan_fetch_skb(skb, &uh->source, - sizeof(uh->source)); - fail |= lowpan_fetch_skb(skb, &uh->dest, - sizeof(uh->dest)); - break; - case LOWPAN_NHC_UDP_CS_P_01: - fail |= lowpan_fetch_skb(skb, &uh->source, - sizeof(uh->source)); - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); - break; - case LOWPAN_NHC_UDP_CS_P_10: - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); - fail |= lowpan_fetch_skb(skb, &uh->dest, - sizeof(uh->dest)); - break; - case LOWPAN_NHC_UDP_CS_P_11: - fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); - uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT + - (val >> 4)); - uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + - (val & 0x0f)); - break; - default: - pr_debug("ERROR: unknown UDP format\n"); - goto err; - } - - pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", - ntohs(uh->source), ntohs(uh->dest)); - - /* checksum */ - if (tmp & LOWPAN_NHC_UDP_CS_C) { - pr_debug_ratelimited("checksum elided currently not supported\n"); - goto err; - } else { - fail |= lowpan_fetch_skb(skb, &uh->check, - sizeof(uh->check)); - } - - /* UDP length needs to be infered from the lower layers - * here, we obtain the hint from the remaining size of the - * frame - */ - uh->len = htons(skb->len + sizeof(struct udphdr)); - pr_debug("uncompressed UDP length: src = %d", ntohs(uh->len)); - } else { - pr_debug("ERROR: unsupported NH format\n"); - goto err; - } - - if (fail) - goto err; - - return 0; -err: - return -EINVAL; -} - /* TTL uncompression values */ static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; @@ -425,29 +356,11 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, return -EINVAL; } - /* UDP data uncompression */ + /* Next header data uncompression */ if (iphc0 & LOWPAN_IPHC_NH_C) { - struct udphdr uh; - const int needed = sizeof(struct udphdr) + sizeof(hdr); - - if (uncompress_udp_header(skb, &uh)) - return -EINVAL; - - /* replace the compressed UDP head by the uncompressed UDP - * header - */ - err = skb_cow(skb, needed); - if (unlikely(err)) + err = lowpan_nhc_do_uncompression(skb, dev, &hdr); + if (err < 0) return err; - - skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); - skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); - - raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); - - hdr.nexthdr = UIP_PROTO_UDP; } else { err = skb_cow(skb, sizeof(hdr)); if (unlikely(err)) @@ -500,71 +413,6 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift, return rol8(val, shift); } -static void compress_udp_header(u8 **hc_ptr, struct sk_buff *skb) -{ - struct udphdr *uh; - u8 tmp; - - /* In the case of RAW sockets the transport header is not set by - * the ip6 stack so we must set it ourselves - */ - if (skb->transport_header == skb->network_header) - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - - uh = udp_hdr(skb); - - if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT) && - ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT)) { - pr_debug("UDP header: both ports compression to 4 bits\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_11; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source and destination port */ - tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + - ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of dest\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_01; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); - /* destination port */ - tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of source\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_10; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* destination port */ - lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); - } else { - pr_debug("UDP header: can't compress\n"); - /* compression value */ - tmp = LOWPAN_NHC_UDP_CS_P_00; - lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); - /* source port */ - lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); - /* destination port */ - lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); - } - - /* checksum is always inline */ - lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check)); - - /* skip the UDP header */ - skb_pull(skb, sizeof(struct udphdr)); -} - int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) @@ -572,7 +420,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, u8 tmp, iphc0, iphc1, *hc_ptr; struct ipv6hdr *hdr; u8 head[100] = {}; - int addr_type; + int ret, addr_type; if (type != ETH_P_IPV6) return -EINVAL; @@ -649,13 +497,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, /* NOTE: payload length is always compressed */ - /* Next Header is compress if UDP */ - if (hdr->nexthdr == UIP_PROTO_UDP) - iphc0 |= LOWPAN_IPHC_NH_C; - - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) - lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr, - sizeof(hdr->nexthdr)); + /* Check if we provide the nhc format for nexthdr and compression + * functionality. If not nexthdr is handled inline and not compressed. + */ + ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0); + if (ret < 0) + return ret; /* Hop limit * if 1: compress, encoding is 01 @@ -741,9 +588,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } } - /* UDP header compression */ - if (hdr->nexthdr == UIP_PROTO_UDP) - compress_udp_header(&hc_ptr, skb); + /* next header compression */ + if (iphc0 & LOWPAN_IPHC_NH_C) { + ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr); + if (ret < 0) + return ret; + } head[0] = iphc0; head[1] = iphc1; @@ -761,4 +611,18 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(lowpan_header_compress); +static int __init lowpan_module_init(void) +{ + request_module_nowait("nhc_dest"); + request_module_nowait("nhc_fragment"); + request_module_nowait("nhc_hop"); + request_module_nowait("nhc_ipv6"); + request_module_nowait("nhc_mobility"); + request_module_nowait("nhc_routing"); + request_module_nowait("nhc_udp"); + + return 0; +} +module_init(lowpan_module_init); + MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c new file mode 100644 index 000000000000..fd20fc51a7c4 --- /dev/null +++ b/net/6lowpan/nhc.c @@ -0,0 +1,241 @@ +/* + * 6LoWPAN next header compression + * + * + * Authors: + * Alexander Aring <aar@pengutronix.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/netdevice.h> + +#include <net/ipv6.h> + +#include "nhc.h" + +static struct rb_root rb_root = RB_ROOT; +static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX]; +static DEFINE_SPINLOCK(lowpan_nhc_lock); + +static int lowpan_nhc_insert(struct lowpan_nhc *nhc) +{ + struct rb_node **new = &rb_root.rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct lowpan_nhc *this = container_of(*new, struct lowpan_nhc, + node); + int result, len_dif, len; + + len_dif = nhc->idlen - this->idlen; + + if (nhc->idlen < this->idlen) + len = nhc->idlen; + else + len = this->idlen; + + result = memcmp(nhc->id, this->id, len); + if (!result) + result = len_dif; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&nhc->node, parent, new); + rb_insert_color(&nhc->node, &rb_root); + + return 0; +} + +static void lowpan_nhc_remove(struct lowpan_nhc *nhc) +{ + rb_erase(&nhc->node, &rb_root); +} + +static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb) +{ + struct rb_node *node = rb_root.rb_node; + const u8 *nhcid_skb_ptr = skb->data; + + while (node) { + struct lowpan_nhc *nhc = container_of(node, struct lowpan_nhc, + node); + u8 nhcid_skb_ptr_masked[LOWPAN_NHC_MAX_ID_LEN]; + int result, i; + + if (nhcid_skb_ptr + nhc->idlen > skb->data + skb->len) + return NULL; + + /* copy and mask afterwards the nhid value from skb */ + memcpy(nhcid_skb_ptr_masked, nhcid_skb_ptr, nhc->idlen); + for (i = 0; i < nhc->idlen; i++) + nhcid_skb_ptr_masked[i] &= nhc->idmask[i]; + + result = memcmp(nhcid_skb_ptr_masked, nhc->id, nhc->idlen); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return nhc; + } + + return NULL; +} + +int lowpan_nhc_check_compression(struct sk_buff *skb, + const struct ipv6hdr *hdr, u8 **hc_ptr, + u8 *iphc0) +{ + struct lowpan_nhc *nhc; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; + if (nhc && nhc->compress) + *iphc0 |= LOWPAN_IPHC_NH_C; + else + lowpan_push_hc_data(hc_ptr, &hdr->nexthdr, + sizeof(hdr->nexthdr)); + + spin_unlock_bh(&lowpan_nhc_lock); + + return 0; +} + +int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, + u8 **hc_ptr) +{ + int ret; + struct lowpan_nhc *nhc; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; + /* check if the nhc module was removed in unlocked part. + * TODO: this is a workaround we should prevent unloading + * of nhc modules while unlocked part, this will always drop + * the lowpan packet but it's very unlikely. + * + * Solution isn't easy because we need to decide at + * lowpan_nhc_check_compression if we do a compression or not. + * Because the inline data which is added to skb, we can't move this + * handling. + */ + if (unlikely(!nhc || !nhc->compress)) { + ret = -EINVAL; + goto out; + } + + /* In the case of RAW sockets the transport header is not set by + * the ip6 stack so we must set it ourselves + */ + if (skb->transport_header == skb->network_header) + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + ret = nhc->compress(skb, hc_ptr); + if (ret < 0) + goto out; + + /* skip the transport header */ + skb_pull(skb, nhc->nexthdrlen); + +out: + spin_unlock_bh(&lowpan_nhc_lock); + + return ret; +} + +int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, + struct ipv6hdr *hdr) +{ + struct lowpan_nhc *nhc; + int ret; + + spin_lock_bh(&lowpan_nhc_lock); + + nhc = lowpan_nhc_by_nhcid(skb); + if (nhc) { + if (nhc->uncompress) { + ret = nhc->uncompress(skb, sizeof(struct ipv6hdr) + + nhc->nexthdrlen); + if (ret < 0) { + spin_unlock_bh(&lowpan_nhc_lock); + return ret; + } + } else { + spin_unlock_bh(&lowpan_nhc_lock); + netdev_warn(dev, "received nhc id for %s which is not implemented.\n", + nhc->name); + return -ENOTSUPP; + } + } else { + spin_unlock_bh(&lowpan_nhc_lock); + netdev_warn(dev, "received unknown nhc id which was not found.\n"); + return -ENOENT; + } + + hdr->nexthdr = nhc->nexthdr; + skb_reset_transport_header(skb); + raw_dump_table(__func__, "raw transport header dump", + skb_transport_header(skb), nhc->nexthdrlen); + + spin_unlock_bh(&lowpan_nhc_lock); + + return 0; +} + +int lowpan_nhc_add(struct lowpan_nhc *nhc) +{ + int ret; + + if (!nhc->idlen || !nhc->idsetup) + return -EINVAL; + + WARN_ONCE(nhc->idlen > LOWPAN_NHC_MAX_ID_LEN, + "LOWPAN_NHC_MAX_ID_LEN should be updated to %zd.\n", + nhc->idlen); + + nhc->idsetup(nhc); + + spin_lock_bh(&lowpan_nhc_lock); + + if (lowpan_nexthdr_nhcs[nhc->nexthdr]) { + ret = -EEXIST; + goto out; + } + + ret = lowpan_nhc_insert(nhc); + if (ret < 0) + goto out; + + lowpan_nexthdr_nhcs[nhc->nexthdr] = nhc; +out: + spin_unlock_bh(&lowpan_nhc_lock); + return ret; +} +EXPORT_SYMBOL(lowpan_nhc_add); + +void lowpan_nhc_del(struct lowpan_nhc *nhc) +{ + spin_lock_bh(&lowpan_nhc_lock); + + lowpan_nhc_remove(nhc); + lowpan_nexthdr_nhcs[nhc->nexthdr] = NULL; + + spin_unlock_bh(&lowpan_nhc_lock); + + synchronize_net(); +} +EXPORT_SYMBOL(lowpan_nhc_del); diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h new file mode 100644 index 000000000000..ed44938eb5de --- /dev/null +++ b/net/6lowpan/nhc.h @@ -0,0 +1,146 @@ +#ifndef __6LOWPAN_NHC_H +#define __6LOWPAN_NHC_H + +#include <linux/skbuff.h> +#include <linux/rbtree.h> +#include <linux/module.h> + +#include <net/6lowpan.h> +#include <net/ipv6.h> + +#define LOWPAN_NHC_MAX_ID_LEN 1 + +/** + * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct + * + * @__nhc: variable name of the lowpan_nhc struct. + * @_name: const char * of common header compression name. + * @_nexthdr: ipv6 nexthdr field for the header compression. + * @_nexthdrlen: ipv6 nexthdr len for the reserved space. + * @_idsetup: callback to setup id and mask values. + * @_idlen: len for the next header id and mask, should be always the same. + * @_uncompress: callback for uncompression call. + * @_compress: callback for compression call. + */ +#define LOWPAN_NHC(__nhc, _name, _nexthdr, \ + _hdrlen, _idsetup, _idlen, \ + _uncompress, _compress) \ +static u8 __nhc##_val[_idlen]; \ +static u8 __nhc##_mask[_idlen]; \ +static struct lowpan_nhc __nhc = { \ + .name = _name, \ + .nexthdr = _nexthdr, \ + .nexthdrlen = _hdrlen, \ + .id = __nhc##_val, \ + .idmask = __nhc##_mask, \ + .idlen = _idlen, \ + .idsetup = _idsetup, \ + .uncompress = _uncompress, \ + .compress = _compress, \ +} + +#define module_lowpan_nhc(__nhc) \ +static int __init __nhc##_init(void) \ +{ \ + return lowpan_nhc_add(&(__nhc)); \ +} \ +module_init(__nhc##_init); \ +static void __exit __nhc##_exit(void) \ +{ \ + lowpan_nhc_del(&(__nhc)); \ +} \ +module_exit(__nhc##_exit); + +/** + * struct lowpan_nhc - hold 6lowpan next hdr compression ifnformation + * + * @node: holder for the rbtree. + * @name: name of the specific next header compression + * @nexthdr: next header value of the protocol which should be compressed. + * @nexthdrlen: ipv6 nexthdr len for the reserved space. + * @id: array for nhc id. Note this need to be in network byteorder. + * @mask: array for nhc id mask. Note this need to be in network byteorder. + * @len: the length of the next header id and mask. + * @setup: callback to setup fill the next header id value and mask. + * @compress: callback to do the header compression. + * @uncompress: callback to do the header uncompression. + */ +struct lowpan_nhc { + struct rb_node node; + const char *name; + const u8 nexthdr; + const size_t nexthdrlen; + u8 *id; + u8 *idmask; + const size_t idlen; + + void (*idsetup)(struct lowpan_nhc *nhc); + int (*uncompress)(struct sk_buff *skb, size_t needed); + int (*compress)(struct sk_buff *skb, u8 **hc_ptr); +}; + +/** + * lowpan_nhc_by_nexthdr - return the 6lowpan nhc by ipv6 nexthdr. + * + * @nexthdr: ipv6 nexthdr value. + */ +struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr); + +/** + * lowpan_nhc_check_compression - checks if we support compression format. If + * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be + * set. If we don't support nexthdr will be added as inline data to the + * 6LoWPAN header. + * + * @skb: skb of 6LoWPAN header to read nhc and replace header. + * @hdr: ipv6hdr to check the nexthdr value + * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of + * replaced header. + * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit + */ +int lowpan_nhc_check_compression(struct sk_buff *skb, + const struct ipv6hdr *hdr, u8 **hc_ptr, + u8 *iphc0); + +/** + * lowpan_nhc_do_compression - calling compress callback for nhc + * + * @skb: skb of 6LoWPAN header to read nhc and replace header. + * @hdr: ipv6hdr to set the nexthdr value + * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of + * replaced header. + */ +int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, + u8 **hc_ptr); + +/** + * lowpan_nhc_do_uncompression - calling uncompress callback for nhc + * + * @nhc: 6LoWPAN nhc context, get by lowpan_nhc_by_ functions. + * @skb: skb of 6LoWPAN header, skb->data should be pointed to nhc id value. + * @dev: netdevice for print logging information. + * @hdr: ipv6hdr for setting nexthdr value. + */ +int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, + struct ipv6hdr *hdr); + +/** + * lowpan_nhc_add - register a next header compression to framework + * + * @nhc: nhc which should be add. + */ +int lowpan_nhc_add(struct lowpan_nhc *nhc); + +/** + * lowpan_nhc_del - delete a next header compression from framework + * + * @nhc: nhc which should be delete. + */ +void lowpan_nhc_del(struct lowpan_nhc *nhc); + +/** + * lowpan_nhc_init - adding all default nhcs + */ +void lowpan_nhc_init(void); + +#endif /* __6LOWPAN_NHC_H */ diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c new file mode 100644 index 000000000000..0b292c9646eb --- /dev/null +++ b/net/6lowpan/nhc_dest.c @@ -0,0 +1,28 @@ +/* + * 6LoWPAN IPv6 Destination Options Header compression according to + * RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_DEST_IDLEN 1 +#define LOWPAN_NHC_DEST_ID_0 0xe6 +#define LOWPAN_NHC_DEST_MASK_0 0xfe + +static void dest_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_DEST_ID_0; + nhc->idmask[0] = LOWPAN_NHC_DEST_MASK_0; +} + +LOWPAN_NHC(nhc_dest, "RFC6282 Destination Options", NEXTHDR_DEST, 0, + dest_nhid_setup, LOWPAN_NHC_DEST_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_dest); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Destination Options compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c new file mode 100644 index 000000000000..473dbc58ef84 --- /dev/null +++ b/net/6lowpan/nhc_fragment.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Fragment Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_FRAGMENT_IDLEN 1 +#define LOWPAN_NHC_FRAGMENT_ID_0 0xe4 +#define LOWPAN_NHC_FRAGMENT_MASK_0 0xfe + +static void fragment_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_FRAGMENT_ID_0; + nhc->idmask[0] = LOWPAN_NHC_FRAGMENT_MASK_0; +} + +LOWPAN_NHC(nhc_fragment, "RFC6282 Fragment", NEXTHDR_FRAGMENT, 0, + fragment_nhid_setup, LOWPAN_NHC_FRAGMENT_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_fragment); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Fragment compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c new file mode 100644 index 000000000000..1eb66be16f19 --- /dev/null +++ b/net/6lowpan/nhc_hop.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_HOP_IDLEN 1 +#define LOWPAN_NHC_HOP_ID_0 0xe0 +#define LOWPAN_NHC_HOP_MASK_0 0xfe + +static void hop_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_HOP_ID_0; + nhc->idmask[0] = LOWPAN_NHC_HOP_MASK_0; +} + +LOWPAN_NHC(nhc_hop, "RFC6282 Hop-by-Hop Options", NEXTHDR_HOP, 0, + hop_nhid_setup, LOWPAN_NHC_HOP_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_hop); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Hop-by-Hop Options compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c new file mode 100644 index 000000000000..2313d1600af3 --- /dev/null +++ b/net/6lowpan/nhc_ipv6.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_IPV6_IDLEN 1 +#define LOWPAN_NHC_IPV6_ID_0 0xee +#define LOWPAN_NHC_IPV6_MASK_0 0xfe + +static void ipv6_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_IPV6_ID_0; + nhc->idmask[0] = LOWPAN_NHC_IPV6_MASK_0; +} + +LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, ipv6_nhid_setup, + LOWPAN_NHC_IPV6_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_ipv6); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 IPv6 compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c new file mode 100644 index 000000000000..60d3f3886c98 --- /dev/null +++ b/net/6lowpan/nhc_mobility.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Mobility Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_MOBILITY_IDLEN 1 +#define LOWPAN_NHC_MOBILITY_ID_0 0xe8 +#define LOWPAN_NHC_MOBILITY_MASK_0 0xfe + +static void mobility_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_MOBILITY_ID_0; + nhc->idmask[0] = LOWPAN_NHC_MOBILITY_MASK_0; +} + +LOWPAN_NHC(nhc_mobility, "RFC6282 Mobility", NEXTHDR_MOBILITY, 0, + mobility_nhid_setup, LOWPAN_NHC_MOBILITY_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_mobility); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Mobility compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c new file mode 100644 index 000000000000..c393280f11c4 --- /dev/null +++ b/net/6lowpan/nhc_routing.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN IPv6 Routing Header compression according to RFC6282 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_ROUTING_IDLEN 1 +#define LOWPAN_NHC_ROUTING_ID_0 0xe2 +#define LOWPAN_NHC_ROUTING_MASK_0 0xfe + +static void routing_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_ROUTING_ID_0; + nhc->idmask[0] = LOWPAN_NHC_ROUTING_MASK_0; +} + +LOWPAN_NHC(nhc_routing, "RFC6282 Routing", NEXTHDR_ROUTING, 0, + routing_nhid_setup, LOWPAN_NHC_ROUTING_IDLEN, NULL, NULL); + +module_lowpan_nhc(nhc_routing); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Routing compression"); +MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c new file mode 100644 index 000000000000..c6bcaeb428ae --- /dev/null +++ b/net/6lowpan/nhc_udp.c @@ -0,0 +1,157 @@ +/* + * 6LoWPAN IPv6 UDP compression according to RFC6282 + * + * + * Authors: + * Alexander Aring <aar@pengutronix.de> + * + * Orignal written by: + * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + * Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_NHC_UDP_IDLEN 1 + +static int udp_uncompress(struct sk_buff *skb, size_t needed) +{ + u8 tmp = 0, val = 0; + struct udphdr uh; + bool fail; + int err; + + fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp)); + + pr_debug("UDP header uncompression\n"); + switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { + case LOWPAN_NHC_UDP_CS_P_00: + fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source)); + fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest)); + break; + case LOWPAN_NHC_UDP_CS_P_01: + fail |= lowpan_fetch_skb(skb, &uh.source, sizeof(uh.source)); + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + break; + case LOWPAN_NHC_UDP_CS_P_10: + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + fail |= lowpan_fetch_skb(skb, &uh.dest, sizeof(uh.dest)); + break; + case LOWPAN_NHC_UDP_CS_P_11: + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); + uh.source = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val >> 4)); + uh.dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val & 0x0f)); + break; + default: + BUG(); + } + + pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", + ntohs(uh.source), ntohs(uh.dest)); + + /* checksum */ + if (tmp & LOWPAN_NHC_UDP_CS_C) { + pr_debug_ratelimited("checksum elided currently not supported\n"); + fail = true; + } else { + fail |= lowpan_fetch_skb(skb, &uh.check, sizeof(uh.check)); + } + + if (fail) + return -EINVAL; + + /* UDP length needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh.len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len)); + + /* replace the compressed UDP head by the uncompressed UDP + * header + */ + err = skb_cow(skb, needed); + if (unlikely(err)) + return err; + + skb_push(skb, sizeof(struct udphdr)); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + return 0; +} + +static int udp_compress(struct sk_buff *skb, u8 **hc_ptr) +{ + const struct udphdr *uh = udp_hdr(skb); + u8 tmp; + + if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT) && + ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT)) { + pr_debug("UDP header: both ports compression to 4 bits\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_11; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source and destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + + ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of dest\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_01; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of source\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_10; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* destination port */ + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); + } else { + pr_debug("UDP header: can't compress\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_00; + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); + } + + /* checksum is always inline */ + lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check)); + + return 0; +} + +static void udp_nhid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_NHC_UDP_ID; + nhc->idmask[0] = LOWPAN_NHC_UDP_MASK; +} + +LOWPAN_NHC(nhc_udp, "RFC6282 UDP", NEXTHDR_UDP, sizeof(struct udphdr), + udp_nhid_setup, LOWPAN_NHC_UDP_IDLEN, udp_uncompress, udp_compress); + +module_lowpan_nhc(nhc_udp); +MODULE_DESCRIPTION("6LoWPAN next header RFC6282 UDP compression"); +MODULE_LICENSE("GPL"); diff --git a/net/802/fc.c b/net/802/fc.c index 7c174b6750cd..7b9219022418 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -75,29 +75,8 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, return -hdr_len; } -/* - * A neighbour discovery of some species (eg arp) has completed. We - * can now send the packet. - */ - -static int fc_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - struct fch_hdr *fch=(struct fch_hdr *)skb->data; - struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); - if(fcllc->ethertype != htons(ETH_P_IP)) { - printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); - return 0; - } - return arp_find(fch->daddr, skb); -#else - return 0; -#endif -} - static const struct header_ops fc_header_ops = { .create = fc_header, - .rebuild = fc_rebuild_header, }; static void fc_setup(struct net_device *dev) diff --git a/net/802/fddi.c b/net/802/fddi.c index 59e7346f1193..7d3a0af954e8 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -87,31 +87,6 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, return -hl; } - -/* - * Rebuild the FDDI MAC header. This is called after an ARP - * (or in future other address resolution) has completed on - * this sk_buff. We now let ARP fill in the other fields. - */ - -static int fddi_rebuild_header(struct sk_buff *skb) -{ - struct fddihdr *fddi = (struct fddihdr *)skb->data; - -#ifdef CONFIG_INET - if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP)) - /* Try to get ARP to resolve the header and fill destination address */ - return arp_find(fddi->daddr, skb); - else -#endif - { - printk("%s: Don't know how to resolve type %04X addresses.\n", - skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); - return 0; - } -} - - /* * Determine the packet's protocol ID and fill in skb fields. * This routine is called before an incoming packet is passed @@ -177,7 +152,6 @@ EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, - .rebuild = fddi_rebuild_header, }; diff --git a/net/802/hippi.c b/net/802/hippi.c index 2e03f8259dd5..ade1a52cdcff 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -91,33 +91,6 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, /* - * Rebuild the HIPPI MAC header. This is called after an ARP has - * completed on this sk_buff. We now let ARP fill in the other fields. - */ - -static int hippi_rebuild_header(struct sk_buff *skb) -{ - struct hippi_hdr *hip = (struct hippi_hdr *)skb->data; - - /* - * Only IP is currently supported - */ - - if(hip->snap.ethertype != htons(ETH_P_IP)) - { - printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype)); - return 0; - } - - /* - * We don't support dynamic ARP on HIPPI, but we use the ARP - * static ARP tables to hold the I-FIELDs. - */ - return arp_find(hip->le.daddr, skb); -} - - -/* * Determine the packet's protocol ID. */ @@ -186,7 +159,6 @@ EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, - .rebuild = hippi_rebuild_header, }; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 64c6bed4a3d3..98a30a5b8664 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_transfer_features(dev, vlandev); break; - case NETDEV_DOWN: + case NETDEV_DOWN: { + struct net_device *tmp; + LIST_HEAD(close_list); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); @@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) - dev_change_flags(vlandev, flgs & ~IFF_UP); + list_add(&vlandev->close_list, &close_list); + } + + dev_close_many(&close_list, false); + + list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { netif_stacked_transfer_operstate(dev, vlandev); + list_del_init(&vlandev->close_list); } + list_del(&close_list); break; - + } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 118956448cf6..01d7ba840df8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -37,39 +37,6 @@ #include <linux/netpoll.h> /* - * Rebuild the Ethernet MAC header. This is called after an ARP - * (or in future other address resolution) has completed on this - * sk_buff. We now let ARP fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - * - * TODO: This needs a checkup, I'm ignorant here. --BLG - */ -static int vlan_dev_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); - - switch (veth->h_vlan_encapsulated_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - - /* TODO: Confirm this will work with VLAN headers... */ - return arp_find(veth->h_dest, skb); -#endif - default: - pr_debug("%s: unable to resolve type %X addresses\n", - dev->name, ntohs(veth->h_vlan_encapsulated_proto)); - - ether_addr_copy(veth->h_source, dev->dev_addr); - break; - } - - return 0; -} - -/* * Create the VLAN header for an arbitrary protocol layer * * saddr=NULL means use device source address @@ -534,7 +501,6 @@ static int vlan_dev_get_lock_subclass(struct net_device *dev) static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, - .rebuild = vlan_dev_rebuild_header, .parse = eth_header_parse, }; @@ -554,7 +520,6 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, - .rebuild = dev_rebuild_header, .parse = eth_header_parse, }; @@ -573,7 +538,6 @@ static int vlan_dev_init(struct net_device *dev) /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_MASTER | IFF_SLAVE); - dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); @@ -589,6 +553,7 @@ static int vlan_dev_init(struct net_device *dev) if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; @@ -767,6 +732,13 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) } #endif /* CONFIG_NET_POLL_CONTROLLER */ +static int vlan_dev_get_iflink(const struct net_device *dev) +{ + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + + return real_dev->ifindex; +} + static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, @@ -803,6 +775,7 @@ static const struct net_device_ops vlan_netdev_ops = { #endif .ndo_fix_features = vlan_dev_fix_features, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, + .ndo_get_iflink = vlan_dev_get_iflink, }; static void vlan_dev_free(struct net_device *dev) @@ -827,5 +800,5 @@ void vlan_setup(struct net_device *dev) dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; - memset(dev->broadcast, 0, ETH_ALEN); + eth_zero_addr(dev->broadcast); } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 80d08f6664cb..3e3d82d8ff70 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -940,7 +940,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -988,7 +988,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", diff --git a/net/Makefile b/net/Makefile index 38704bdf941a..3995613e5510 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,7 +69,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ -obj-$(CONFIG_NET_MPLS_GSO) += mpls/ +obj-$(CONFIG_MPLS) += mpls/ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index d1c55d8dd0a2..8ad3ec2610b6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -141,7 +141,7 @@ static void __aarp_send_query(struct aarp_entry *a) eah->pa_src_net = sat->s_net; eah->pa_src_node = sat->s_node; - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); eah->pa_dst_zero = 0; eah->pa_dst_net = a->target_addr.s_net; @@ -189,7 +189,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us, eah->pa_src_node = us->s_node; if (!sha) - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); else ether_addr_copy(eah->hw_dst, sha); @@ -239,7 +239,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us) eah->pa_src_net = us->s_net; eah->pa_src_node = us->s_node; - memset(eah->hw_dst, '\0', ETH_ALEN); + eth_zero_addr(eah->hw_dst); eah->pa_dst_zero = 0; eah->pa_dst_net = us->s_net; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0d0766ea5ab1..3b7ad43c7dad 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1559,8 +1559,7 @@ freeit: return 0; } -static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t len) +static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); @@ -1728,8 +1727,8 @@ out: return err ? : len; } -static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ddpehdr *ddp; diff --git a/net/atm/common.c b/net/atm/common.c index b84057e41bd6..ed0466637e13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) return 0; } -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, return copied; } -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t size) +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); diff --git a/net/atm/common.h b/net/atm/common.h index cc3c2dae4d79..4d6f5b2068ac 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -13,10 +13,9 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t total_len); +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/net/atm/lec.c b/net/atm/lec.c index 4b98f897044a..cd3b37989057 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -2001,7 +2001,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, if (entry == NULL) goto out; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); + eth_zero_addr(entry->mac_addr); entry->recv_vcc = vcc; entry->old_recv_push = old_push; entry->status = ESI_UNKNOWN; @@ -2086,7 +2086,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->vcc = vcc; entry->old_push = old_push; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); + eth_zero_addr(entry->mac_addr); entry->status = ESI_UNKNOWN; hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); entry->timer.expires = jiffies + priv->vcc_timeout_period; diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 523bce72f698..4fd6af47383a 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -19,36 +19,15 @@ #include "resources.h" #include "signaling.h" -#undef WAIT_FOR_DEMON /* #define this if system calls on SVC sockets - should block until the demon runs. - Danger: may cause nasty hangs if the demon - crashes. */ - struct atm_vcc *sigd = NULL; -#ifdef WAIT_FOR_DEMON -static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep); -#endif static void sigd_put_skb(struct sk_buff *skb) { -#ifdef WAIT_FOR_DEMON - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&sigd_sleep, &wait); - while (!sigd) { - set_current_state(TASK_UNINTERRUPTIBLE); - pr_debug("atmsvc: waiting for signaling daemon...\n"); - schedule(); - } - current->state = TASK_RUNNING; - remove_wait_queue(&sigd_sleep, &wait); -#else if (!sigd) { pr_debug("atmsvc: no signaling daemon\n"); kfree_skb(skb); return; } -#endif atm_force_charge(sigd, skb->truesize); skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb); sk_atm(sigd)->sk_data_ready(sk_atm(sigd)); @@ -261,8 +240,5 @@ int sigd_attach(struct atm_vcc *vcc) vcc_insert_socket(sk_atm(vcc)); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); -#ifdef WAIT_FOR_DEMON - wake_up(&sigd_sleep); -#endif return 0; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ca049a7c9287..330c1f4a5a0b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1432,8 +1432,7 @@ out: return err; } -static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); struct sock *sk = sock->sk; @@ -1599,8 +1598,8 @@ out: return err; } -static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 67de6b33f2c3..7c646bb2c6f7 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -46,9 +46,9 @@ #ifdef CONFIG_INET -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) +static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { unsigned char *buff; @@ -100,7 +100,7 @@ int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, return -AX25_HEADER_LEN; /* Unfinished header */ } -int ax25_rebuild_header(struct sk_buff *skb) +netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) { struct sk_buff *ourskb; unsigned char *bp = skb->data; @@ -115,9 +115,6 @@ int ax25_rebuild_header(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); - if (arp_find(bp + 1, skb)) - return 1; - route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -129,6 +126,7 @@ int ax25_rebuild_header(struct sk_buff *skb) dev = skb->dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { + kfree_skb(skb); goto put; } @@ -212,31 +210,29 @@ put: if (route) ax25_put_route(route); - return 1; + return NETDEV_TX_OK; } #else /* INET */ -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) +static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { return -AX25_HEADER_LEN; } -int ax25_rebuild_header(struct sk_buff *skb) +netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) { - return 1; + kfree_skb(skb); + return NETDEV_TX_OK; } - #endif const struct header_ops ax25_header_ops = { .create = ax25_hard_header, - .rebuild = ax25_rebuild_header, }; -EXPORT_SYMBOL(ax25_hard_header); -EXPORT_SYMBOL(ax25_rebuild_header); EXPORT_SYMBOL(ax25_header_ops); +EXPORT_SYMBOL(ax25_ip_xmit); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 27649e85f3f6..090828cf1fa7 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -592,15 +592,16 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_iv.tq_avg, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); + seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_iv.tq_avg, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; if (curr_gw) batadv_gw_node_free_ref(curr_gw); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index fbda6b54baff..baf1f9843f2c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,11 +83,12 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) + if (dev_get_iflink(net_dev) == 0 || + dev_get_iflink(net_dev) == net_dev->ifindex) return false; /* recurse over the parent device */ - parent_dev = __dev_get_by_index(&init_net, net_dev->iflink); + parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 7de74635a110..b8c794b87523 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -91,4 +91,12 @@ config BT_SELFTEST_SMP Run test cases for SMP cryptographic functionality, including both legacy SMP as well as the Secure Connections features. +config BT_DEBUGFS + bool "Export Bluetooth internals in debugfs" + depends on BT && DEBUG_FS + default y + help + Provide extensive information about internal Bluetooth states + in debugfs. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 8e96e3072266..9a8ea232d28f 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,8 +13,9 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o + a2mp.o amp.o ecc.o hci_request.o mgmt_util.o +bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index cedfbda15dad..5a04eb1a7e57 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -19,9 +19,11 @@ #include "a2mp.h" #include "amp.h" +#define A2MP_FEAT_EXT 0x8000 + /* Global AMP Manager list */ -LIST_HEAD(amp_mgr_list); -DEFINE_MUTEX(amp_mgr_list_lock); +static LIST_HEAD(amp_mgr_list); +static DEFINE_MUTEX(amp_mgr_list_lock); /* A2MP build & send command helper functions */ static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) @@ -43,7 +45,7 @@ static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) return cmd; } -void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) +static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) { struct l2cap_chan *chan = mgr->a2mp_chan; struct a2mp_cmd *cmd; @@ -67,7 +69,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) kfree(cmd); } -u8 __next_ident(struct amp_mgr *mgr) +static u8 __next_ident(struct amp_mgr *mgr) { if (++mgr->ident == 0) mgr->ident = 1; @@ -75,6 +77,23 @@ u8 __next_ident(struct amp_mgr *mgr) return mgr->ident; } +static struct amp_mgr *amp_mgr_lookup_by_state(u8 state) +{ + struct amp_mgr *mgr; + + mutex_lock(&_mgr_list_lock); + list_for_each_entry(mgr, &_mgr_list, list) { + if (test_and_clear_bit(state, &mgr->state)) { + amp_mgr_get(mgr); + mutex_unlock(&_mgr_list_lock); + return mgr; + } + } + mutex_unlock(&_mgr_list_lock); + + return NULL; +} + /* hci_dev_list shall be locked */ static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl) { @@ -860,23 +879,6 @@ struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, return mgr->a2mp_chan; } -struct amp_mgr *amp_mgr_lookup_by_state(u8 state) -{ - struct amp_mgr *mgr; - - mutex_lock(&_mgr_list_lock); - list_for_each_entry(mgr, &_mgr_list, list) { - if (test_and_clear_bit(state, &mgr->state)) { - amp_mgr_get(mgr); - mutex_unlock(&_mgr_list_lock); - return mgr; - } - } - mutex_unlock(&_mgr_list_lock); - - return NULL; -} - void a2mp_send_getinfo_rsp(struct hci_dev *hdev) { struct amp_mgr *mgr; diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h index 487b54c1308f..296f665adb09 100644 --- a/net/bluetooth/a2mp.h +++ b/net/bluetooth/a2mp.h @@ -17,8 +17,6 @@ #include <net/bluetooth/l2cap.h> -#define A2MP_FEAT_EXT 0x8000 - enum amp_mgr_state { READ_LOC_AMP_INFO, READ_LOC_AMP_ASSOC, @@ -131,16 +129,10 @@ struct a2mp_physlink_rsp { #define A2MP_STATUS_PHYS_LINK_EXISTS 0x05 #define A2MP_STATUS_SECURITY_VIOLATION 0x06 -extern struct list_head amp_mgr_list; -extern struct mutex amp_mgr_list_lock; - struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -u8 __next_ident(struct amp_mgr *mgr); struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, struct sk_buff *skb); -struct amp_mgr *amp_mgr_lookup_by_state(u8 state); -void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data); void a2mp_discover_amp(struct l2cap_chan *chan); void a2mp_send_getinfo_rsp(struct hci_dev *hdev); void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ce22e0cfa923..70f9d945faf7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) } EXPORT_SYMBOL(bt_accept_dequeue); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) return timeo; } -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; @@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs); static int __init bt_init(void) { - struct sk_buff *skb; int err; - BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); BT_INFO("Core ver %s", VERSION); @@ -750,6 +749,13 @@ static int __init bt_init(void) goto sock_err; } + err = mgmt_init(); + if (err < 0) { + sco_exit(); + l2cap_exit(); + goto sock_err; + } + return 0; sock_err: @@ -764,6 +770,8 @@ error: static void __exit bt_exit(void) { + mgmt_exit(); + sco_exit(); l2cap_exit(); diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 5a5b16f365e9..40854c99bc1e 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -111,6 +111,10 @@ struct bnep_ext_hdr { #define BNEPCONNDEL _IOW('B', 201, int) #define BNEPGETCONNLIST _IOR('B', 210, int) #define BNEPGETCONNINFO _IOR('B', 211, int) +#define BNEPGETSUPPFEAT _IOR('B', 212, int) + +#define BNEP_SETUP_RESPONSE 0 +#define BNEP_SETUP_RSP_SENT 10 struct bnep_connadd_req { int sock; /* Connected socket */ diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 05f57e491ccb..1641367e54ca 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -231,7 +231,14 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len) break; case BNEP_SETUP_CONN_REQ: - err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_CONN_NOT_ALLOWED); + /* Successful response should be sent only once */ + if (test_bit(BNEP_SETUP_RESPONSE, &s->flags) && + !test_and_set_bit(BNEP_SETUP_RSP_SENT, &s->flags)) + err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, + BNEP_SUCCESS); + else + err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, + BNEP_CONN_NOT_ALLOWED); break; default: { @@ -239,7 +246,7 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len) pkt[0] = BNEP_CONTROL; pkt[1] = BNEP_CMD_NOT_UNDERSTOOD; pkt[2] = cmd; - bnep_send(s, pkt, sizeof(pkt)); + err = bnep_send(s, pkt, sizeof(pkt)); } break; } @@ -292,29 +299,55 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) { struct net_device *dev = s->dev; struct sk_buff *nskb; - u8 type; + u8 type, ctrl_type; dev->stats.rx_bytes += skb->len; type = *(u8 *) skb->data; skb_pull(skb, 1); + ctrl_type = *(u8 *)skb->data; if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen)) goto badframe; if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) { - bnep_rx_control(s, skb->data, skb->len); - kfree_skb(skb); - return 0; - } + if (bnep_rx_control(s, skb->data, skb->len) < 0) { + dev->stats.tx_errors++; + kfree_skb(skb); + return 0; + } - skb_reset_mac_header(skb); + if (!(type & BNEP_EXT_HEADER)) { + kfree_skb(skb); + return 0; + } - /* Verify and pull out header */ - if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) - goto badframe; + /* Verify and pull ctrl message since it's already processed */ + switch (ctrl_type) { + case BNEP_SETUP_CONN_REQ: + /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */ + if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2)) + goto badframe; + break; + case BNEP_FILTER_MULTI_ADDR_SET: + case BNEP_FILTER_NET_TYPE_SET: + /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ + if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) + goto badframe; + break; + default: + kfree_skb(skb); + return 0; + } + } else { + skb_reset_mac_header(skb); - s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); + /* Verify and pull out header */ + if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) + goto badframe; + + s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); + } if (type & BNEP_EXT_HEADER) { if (bnep_rx_extension(s, skb) < 0) @@ -525,6 +558,7 @@ static struct device_type bnep_type = { int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) { + u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); struct net_device *dev; struct bnep_session *s, *ss; u8 dst[ETH_ALEN], src[ETH_ALEN]; @@ -535,6 +569,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) if (!l2cap_is_socket(sock)) return -EBADFD; + if (req->flags & ~valid_flags) + return -EINVAL; + baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst); baswap((void *) src, &l2cap_pi(sock->sk)->chan->src); @@ -566,6 +603,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) s->sock = sock; s->role = req->role; s->state = BT_CONNECTED; + s->flags = req->flags; s->msg.msg_flags = MSG_NOSIGNAL; @@ -611,11 +649,15 @@ failed: int bnep_del_connection(struct bnep_conndel_req *req) { + u32 valid_flags = 0; struct bnep_session *s; int err = 0; BT_DBG(""); + if (req->flags & ~valid_flags) + return -EINVAL; + down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); @@ -631,10 +673,12 @@ int bnep_del_connection(struct bnep_conndel_req *req) static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s) { + u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); + memset(ci, 0, sizeof(*ci)); memcpy(ci->dst, s->eh.h_source, ETH_ALEN); strcpy(ci->device, s->dev->name); - ci->flags = s->flags; + ci->flags = s->flags & valid_flags; ci->state = s->state; ci->role = s->role; } diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 4b488ec26105..6ceb5d36a32b 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -218,7 +218,7 @@ static const struct net_device_ops bnep_netdev_ops = { void bnep_net_setup(struct net_device *dev) { - memset(dev->broadcast, 0xff, ETH_ALEN); + eth_broadcast_addr(dev->broadcast); dev->addr_len = ETH_ALEN; ether_setup(dev); diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5f051290daba..bde2bdd9e929 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -57,6 +57,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long struct bnep_conninfo ci; struct socket *nsock; void __user *argp = (void __user *)arg; + __u32 supp_feat = BIT(BNEP_SETUP_RESPONSE); int err; BT_DBG("cmd %x arg %lx", cmd, arg); @@ -120,6 +121,12 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return err; + case BNEPGETSUPPFEAT: + if (copy_to_user(argp, &supp_feat, sizeof(supp_feat))) + return -EFAULT; + + return 0; + default: return -EINVAL; } diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 75bd2c42e3e7..b0c6c6af76ef 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -333,7 +333,7 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb) return; } - if (session->flags & (1 << CMTP_LOOPBACK)) { + if (session->flags & BIT(CMTP_LOOPBACK)) { kfree_skb(skb); return; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 278a194e6af4..298ed37010e6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -75,10 +75,11 @@ static void __cmtp_unlink_session(struct cmtp_session *session) static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) { + u32 valid_flags = BIT(CMTP_LOOPBACK); memset(ci, 0, sizeof(*ci)); bacpy(&ci->bdaddr, &session->bdaddr); - ci->flags = session->flags; + ci->flags = session->flags & valid_flags; ci->state = session->state; ci->num = session->num; @@ -313,7 +314,7 @@ static int cmtp_session(void *arg) down_write(&cmtp_session_sem); - if (!(session->flags & (1 << CMTP_LOOPBACK))) + if (!(session->flags & BIT(CMTP_LOOPBACK))) cmtp_detach_device(session); fput(session->sock->file); @@ -329,6 +330,7 @@ static int cmtp_session(void *arg) int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) { + u32 valid_flags = BIT(CMTP_LOOPBACK); struct cmtp_session *session, *s; int i, err; @@ -337,6 +339,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) if (!l2cap_is_socket(sock)) return -EBADFD; + if (req->flags & ~valid_flags) + return -EINVAL; + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; @@ -385,7 +390,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) goto unlink; } - if (!(session->flags & (1 << CMTP_LOOPBACK))) { + if (!(session->flags & BIT(CMTP_LOOPBACK))) { err = cmtp_attach_device(session); if (err < 0) { atomic_inc(&session->terminate); @@ -409,11 +414,15 @@ failed: int cmtp_del_connection(struct cmtp_conndel_req *req) { + u32 valid_flags = 0; struct cmtp_session *session; int err = 0; BT_DBG(""); + if (req->flags & ~valid_flags) + return -EINVAL; + down_read(&cmtp_session_sem); session = __cmtp_get_session(&req->bdaddr); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c9b8fa544785..ee5e59839b02 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -309,7 +309,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) else hci_add_sco(sco, conn->handle); } else { - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -571,7 +571,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags) || + hci_dev_test_flag(d, HCI_USER_CHANNEL) || d->dev_type != HCI_BREDR) continue; @@ -618,7 +618,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_del(conn); @@ -700,7 +700,7 @@ static void hci_req_directed_advertising(struct hci_request *req, * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); /* Set require_privacy to false so that the remote device has a * chance of identifying us. @@ -733,6 +733,14 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; + /* Let's make sure that le is enabled.*/ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + if (lmp_le_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + + return ERR_PTR(-EOPNOTSUPP); + } + /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -791,7 +799,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * anyway have to disable it in order to start directed * advertising. */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { u8 enable = 0x00; hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); @@ -802,7 +810,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, /* If we're active scanning most controllers are unable * to initiate advertising. Simply reject the attempt. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE) { skb_queue_purge(&req.cmd_q); hci_conn_del(conn); @@ -832,9 +840,9 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * handler for scan disabling knows to set the correct discovery * state. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_req_add_le_scan_disable(&req); - set_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); } hci_req_add_le_create_conn(&req, conn); @@ -856,8 +864,12 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, { struct hci_conn *acl; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + if (lmp_bredr_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + return ERR_PTR(-EOPNOTSUPP); + } acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { @@ -930,7 +942,7 @@ int hci_conn_check_link_mode(struct hci_conn *conn) * Connections is used and the link is encrypted with AES-CCM * using a P-256 authenticated combination key. */ - if (test_bit(HCI_SC_ONLY, &conn->hdev->flags)) { + if (hci_dev_test_flag(conn->hdev, HCI_SC_ONLY)) { if (!hci_conn_sc_enabled(conn) || !test_bit(HCI_CONN_AES_CCM, &conn->flags) || conn->key_type != HCI_LK_AUTH_COMBINATION_P256) @@ -1139,7 +1151,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev) list_for_each_entry_safe(c, n, &h->list, list) { c->state = BT_CLOSED; - hci_proto_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); + hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); hci_conn_del(c); } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3322d3f4c85a..476709bd068a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -51,7 +51,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); -DEFINE_RWLOCK(hci_cb_list_lock); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -80,7 +80,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_DUT_MODE, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -106,7 +106,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_DUT_MODE, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) return -EALREADY; hci_req_lock(hdev); @@ -127,7 +127,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (err < 0) return err; - change_bit(HCI_DUT_MODE, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_DUT_MODE); return count; } @@ -141,13 +141,16 @@ static const struct file_operations dut_mode_fops = { /* ---- HCI requests ---- */ -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, + struct sk_buff *skb) { BT_DBG("%s result 0x%2.2x", hdev->name, result); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; + if (skb) + hdev->req_skb = skb_get(skb); wake_up_interruptible(&hdev->req_wait_q); } } @@ -163,66 +166,12 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } } -static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, - u8 event) -{ - struct hci_ev_cmd_complete *ev; - struct hci_event_hdr *hdr; - struct sk_buff *skb; - - hci_dev_lock(hdev); - - skb = hdev->recv_evt; - hdev->recv_evt = NULL; - - hci_dev_unlock(hdev); - - if (!skb) - return ERR_PTR(-ENODATA); - - if (skb->len < sizeof(*hdr)) { - BT_ERR("Too short HCI event"); - goto failed; - } - - hdr = (void *) skb->data; - skb_pull(skb, HCI_EVENT_HDR_SIZE); - - if (event) { - if (hdr->evt != event) - goto failed; - return skb; - } - - if (hdr->evt != HCI_EV_CMD_COMPLETE) { - BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); - goto failed; - } - - if (skb->len < sizeof(*ev)) { - BT_ERR("Too short cmd_complete event"); - goto failed; - } - - ev = (void *) skb->data; - skb_pull(skb, sizeof(*ev)); - - if (opcode == __le16_to_cpu(ev->opcode)) - return skb; - - BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, - __le16_to_cpu(ev->opcode)); - -failed: - kfree_skb(skb); - return ERR_PTR(-ENODATA); -} - struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout) { DECLARE_WAITQUEUE(wait, current); struct hci_request req; + struct sk_buff *skb; int err = 0; BT_DBG("%s", hdev->name); @@ -236,7 +185,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); - err = hci_req_run(&req, hci_req_sync_complete); + err = hci_req_run_skb(&req, hci_req_sync_complete); if (err < 0) { remove_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_RUNNING); @@ -265,13 +214,20 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, } hdev->req_status = hdev->req_result = 0; + skb = hdev->req_skb; + hdev->req_skb = NULL; BT_DBG("%s end: err %d", hdev->name, err); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return ERR_PTR(err); + } + + if (!skb) + return ERR_PTR(-ENODATA); - return hci_get_cmd_complete(hdev, opcode, event); + return skb; } EXPORT_SYMBOL(__hci_cmd_sync_ev); @@ -303,7 +259,7 @@ static int __hci_req_sync(struct hci_dev *hdev, add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); - err = hci_req_run(&req, hci_req_sync_complete); + err = hci_req_run_skb(&req, hci_req_sync_complete); if (err < 0) { hdev->req_status = 0; @@ -390,7 +346,7 @@ static void bredr_init(struct hci_request *req) hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); } -static void amp_init(struct hci_request *req) +static void amp_init1(struct hci_request *req) { req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; @@ -400,9 +356,6 @@ static void amp_init(struct hci_request *req) /* Read Local Supported Commands */ hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); - /* Read Local Supported Features */ - hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); - /* Read Local AMP Info */ hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); @@ -416,6 +369,16 @@ static void amp_init(struct hci_request *req) hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL); } +static void amp_init2(struct hci_request *req) +{ + /* Read Local Supported Features. Not all AMP controllers + * support this so it's placed conditionally in the second + * stage init. + */ + if (req->hdev->commands[14] & 0x20) + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); +} + static void hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -432,7 +395,7 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt) break; case HCI_AMP: - amp_init(req); + amp_init1(req); break; default: @@ -494,7 +457,7 @@ static void le_setup(struct hci_request *req) /* LE-only controllers have LE implicitly enabled */ if (!lmp_bredr_capable(hdev)) - set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ENABLED); } static void hci_setup_event_mask(struct hci_request *req) @@ -578,10 +541,13 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + if (hdev->dev_type == HCI_AMP) + return amp_init2(req); + if (lmp_bredr_capable(hdev)) bredr_setup(req); else - clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); if (lmp_le_capable(hdev)) le_setup(req); @@ -607,7 +573,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) */ hdev->max_page = 0x01; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { u8 mode = 0x01; hci_req_add(req, HCI_OP_WRITE_SSP_MODE, @@ -646,7 +612,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) sizeof(cp), &cp); } - if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { u8 enable = 1; hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), &enable); @@ -683,7 +649,7 @@ static void hci_set_le_support(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { cp.le = 0x01; cp.simul = 0x00; } @@ -871,7 +837,7 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && bredr_sc_enabled(hdev)) { u8 support = 0x01; @@ -891,22 +857,22 @@ static int __hci_init(struct hci_dev *hdev) /* The Device Under Test (DUT) mode is special and available for * all controller types. So just create it early on. */ - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP)) { debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, &dut_mode_fops); } + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode * BR/EDR/LE type controllers. AMP controllers only need the - * first stage init. + * first two stages of init. */ if (hdev->dev_type != HCI_BREDR) return 0; - err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); - if (err < 0) - return err; - err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); if (err < 0) return err; @@ -927,8 +893,8 @@ static int __hci_init(struct hci_dev *hdev) * So only when in setup phase or config phase, create the debugfs * entries and register the SMP channels. */ - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) return 0; hci_debugfs_create_common(hdev); @@ -1290,12 +1256,12 @@ int hci_inquiry(void __user *arg) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1305,7 +1271,7 @@ int hci_inquiry(void __user *arg) goto done; } - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } @@ -1377,17 +1343,17 @@ static int hci_dev_do_open(struct hci_dev *hdev) hci_req_lock(hdev); - if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { ret = -ENODEV; goto done; } - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { /* Check for rfkill but allow the HCI setup stage to * proceed (which in itself doesn't cause any RF activity). */ - if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { ret = -ERFKILL; goto done; } @@ -1404,7 +1370,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) * This check is only valid for BR/EDR controllers * since AMP controllers do not have an address. */ - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hdev->dev_type == HCI_BREDR && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { @@ -1426,7 +1392,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP)) { if (hdev->setup) ret = hdev->setup(hdev); @@ -1438,7 +1404,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) */ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks)) - set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* For an unconfigured controller it is required to * read at least the version information provided by @@ -1448,11 +1414,11 @@ static int hci_dev_do_open(struct hci_dev *hdev) * also the original Bluetooth public device address * will be read using the Read BD Address command. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) ret = __hci_unconf_init(hdev); } - if (test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_CONFIG)) { /* If public address change is configured, ensure that * the address gets programmed. If the driver does not * support changing the public address, fail the power @@ -1466,8 +1432,8 @@ static int hci_dev_do_open(struct hci_dev *hdev) } if (!ret) { - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) ret = __hci_init(hdev); } @@ -1475,13 +1441,13 @@ static int hci_dev_do_open(struct hci_dev *hdev) if (!ret) { hci_dev_hold(hdev); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags) && - !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG) && + !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hdev->dev_type == HCI_BREDR) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); @@ -1533,8 +1499,8 @@ int hci_dev_open(__u16 dev) * HCI_USER_CHANNEL will be set first before attempting to * open the device. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EOPNOTSUPP; goto done; } @@ -1544,7 +1510,7 @@ int hci_dev_open(__u16 dev) * particularly important if the setup procedure has not yet * completed. */ - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); /* After this call it is guaranteed that the setup procedure @@ -1559,9 +1525,9 @@ int hci_dev_open(__u16 dev) * is in use this bit will be cleared again and userspace has * to explicitly enable it. */ - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && - !test_bit(HCI_MGMT, &hdev->dev_flags)) - set_bit(HCI_BONDABLE, &hdev->dev_flags); + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + !hci_dev_test_flag(hdev, HCI_MGMT)) + hci_dev_set_flag(hdev, HCI_BONDABLE); err = hci_dev_do_open(hdev); @@ -1591,6 +1557,12 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + hdev->shutdown(hdev); + } + cancel_delayed_work(&hdev->power_off); hci_req_cancel(hdev, ENODEV); @@ -1609,17 +1581,17 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (hdev->discov_timeout > 0) { cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = 0; - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); } - if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); /* Avoid potential lockdep warnings from the *_flush() calls by @@ -1631,7 +1603,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { if (hdev->dev_type == HCI_BREDR) mgmt_powered(hdev, 0); } @@ -1651,8 +1623,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (!test_bit(HCI_AUTO_OFF, &hdev->dev_flags) && - !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && + !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); @@ -1674,16 +1646,13 @@ static int hci_dev_do_close(struct hci_dev *hdev) hdev->sent_cmd = NULL; } - kfree_skb(hdev->recv_evt); - hdev->recv_evt = NULL; - /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); /* Clear flags */ hdev->flags &= BIT(HCI_RAW); - hdev->dev_flags &= ~HCI_PERSISTENT_MASK; + hci_dev_clear_volatile_flags(hdev); /* Controller radio is available but is currently powered down */ hdev->amp_status = AMP_STATUS_POWERED_DOWN; @@ -1707,12 +1676,12 @@ int hci_dev_close(__u16 dev) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); err = hci_dev_do_close(hdev); @@ -1770,12 +1739,12 @@ int hci_dev_reset(__u16 dev) goto done; } - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1796,12 +1765,12 @@ int hci_dev_reset_stat(__u16 dev) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { ret = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { ret = -EOPNOTSUPP; goto done; } @@ -1820,29 +1789,29 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) BT_DBG("%s scan 0x%02x", hdev->name, scan); if ((scan & SCAN_PAGE)) - conn_changed = !test_and_set_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = !hci_dev_test_and_set_flag(hdev, + HCI_CONNECTABLE); else - conn_changed = test_and_clear_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = hci_dev_test_and_clear_flag(hdev, + HCI_CONNECTABLE); if ((scan & SCAN_INQUIRY)) { - discov_changed = !test_and_set_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + discov_changed = !hci_dev_test_and_set_flag(hdev, + HCI_DISCOVERABLE); } else { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + discov_changed = hci_dev_test_and_clear_flag(hdev, + HCI_DISCOVERABLE); } - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; if (conn_changed || discov_changed) { /* In case this was disabled through mgmt */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) mgmt_update_adv_data(hdev); mgmt_new_settings(hdev); @@ -1862,12 +1831,12 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!hdev) return -ENODEV; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } @@ -1877,7 +1846,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } @@ -1981,7 +1950,7 @@ int hci_get_dev_list(void __user *arg) * is running, but in that case still indicate that the * device is actually down. */ - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags &= ~BIT(HCI_UP); (dr + n)->dev_id = hdev->id; @@ -2019,7 +1988,7 @@ int hci_get_dev_info(void __user *arg) * is running, but in that case still indicate that the * device is actually down. */ - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags = hdev->flags & ~BIT(HCI_UP); else flags = hdev->flags; @@ -2062,16 +2031,16 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; if (blocked) { - set_bit(HCI_RFKILLED, &hdev->dev_flags); - if (!test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) + hci_dev_set_flag(hdev, HCI_RFKILLED); + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) hci_dev_do_close(hdev); } else { - clear_bit(HCI_RFKILLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_RFKILLED); } return 0; @@ -2100,23 +2069,23 @@ static void hci_power_on(struct work_struct *work) * ignored and they need to be checked now. If they are still * valid, it is important to turn the device back off. */ - if (test_bit(HCI_RFKILLED, &hdev->dev_flags) || - test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_RFKILLED) || + hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || (hdev->dev_type == HCI_BREDR && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { - clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); - } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); } - if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) { /* For unconfigured devices, set the HCI_RAW flag * so that userspace can easily identify them. */ - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) set_bit(HCI_RAW, &hdev->flags); /* For fully configured devices, this will send @@ -2127,11 +2096,11 @@ static void hci_power_on(struct work_struct *work) * and no event will be send. */ mgmt_index_added(hdev); - } else if (test_and_clear_bit(HCI_CONFIG, &hdev->dev_flags)) { + } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) { /* When the controller is now configured, then it * is important to clear the HCI_RAW flag. */ - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) clear_bit(HCI_RAW, &hdev->flags); /* Powering on the controller with HCI_CONFIG set only @@ -2500,6 +2469,42 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) } } +bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +{ + struct smp_ltk *k; + struct smp_irk *irk; + u8 addr_type; + + if (type == BDADDR_BREDR) { + if (hci_find_link_key(hdev, bdaddr)) + return true; + return false; + } + + /* Convert to HCI addr type which struct smp_ltk uses */ + if (type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + + irk = hci_get_irk(hdev, bdaddr, addr_type); + if (irk) { + bdaddr = &irk->bdaddr; + addr_type = irk->addr_type; + } + + rcu_read_lock(); + list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + if (k->bdaddr_type == addr_type && !bacmp(bdaddr, &k->bdaddr)) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + /* HCI command timer function */ static void hci_cmd_timeout(struct work_struct *work) { @@ -2822,7 +2827,6 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, { /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_request req; struct hci_cp_inquiry cp; int err; @@ -2841,21 +2845,37 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, break; case DISCOV_TYPE_INTERLEAVED: - hci_req_init(&req, hdev); + hci_dev_lock(hdev); - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* If we were running LE only scan, change discovery + * state. If we were running both LE and BR/EDR inquiry + * simultaneously, and BR/EDR inquiry is already + * finished, stop discovery, otherwise BR/EDR inquiry + * will stop discovery when finished. + */ + if (!test_bit(HCI_INQUIRY, &hdev->flags)) + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } else { + struct hci_request req; - hci_dev_lock(hdev); + hci_inquiry_cache_flush(hdev); - hci_inquiry_cache_flush(hdev); + hci_req_init(&req, hdev); - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } } hci_dev_unlock(hdev); @@ -2934,7 +2954,7 @@ static void le_scan_restart_work(struct work_struct *work) BT_DBG("%s", hdev->name); /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; hci_req_init(&req, hdev); @@ -2967,9 +2987,9 @@ static void le_scan_restart_work(struct work_struct *work) void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; @@ -3059,6 +3079,7 @@ struct hci_dev *hci_alloc_dev(void) hci_init_sysfs(hdev); discovery_init(hdev); + adv_info_init(hdev); return hdev; } @@ -3137,16 +3158,16 @@ int hci_register_dev(struct hci_dev *hdev) } if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) - set_bit(HCI_RFKILLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RFKILLED); - set_bit(HCI_SETUP, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SETUP); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); if (hdev->dev_type == HCI_BREDR) { /* Assume BR/EDR support until proven otherwise (such as * through reading supported features during init. */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); } write_lock(&hci_dev_list_lock); @@ -3157,7 +3178,7 @@ int hci_register_dev(struct hci_dev *hdev) * and should not be included in normal operation. */ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -3179,11 +3200,11 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int i, id; + int id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - set_bit(HCI_UNREGISTER, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_UNREGISTER); id = hdev->id; @@ -3193,14 +3214,11 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); - for (i = 0; i < NUM_REASSEMBLY; i++) - kfree_skb(hdev->reassembly[i]); - cancel_work_sync(&hdev->power_on); if (!test_bit(HCI_INIT, &hdev->flags) && - !test_bit(HCI_SETUP, &hdev->dev_flags) && - !test_bit(HCI_CONFIG, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { hci_dev_lock(hdev); mgmt_index_removed(hdev); hci_dev_unlock(hdev); @@ -3299,158 +3317,15 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL(hci_recv_frame); -static int hci_reassembly(struct hci_dev *hdev, int type, void *data, - int count, __u8 index) -{ - int len = 0; - int hlen = 0; - int remain = count; - struct sk_buff *skb; - struct bt_skb_cb *scb; - - if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) || - index >= NUM_REASSEMBLY) - return -EILSEQ; - - skb = hdev->reassembly[index]; - - if (!skb) { - switch (type) { - case HCI_ACLDATA_PKT: - len = HCI_MAX_FRAME_SIZE; - hlen = HCI_ACL_HDR_SIZE; - break; - case HCI_EVENT_PKT: - len = HCI_MAX_EVENT_SIZE; - hlen = HCI_EVENT_HDR_SIZE; - break; - case HCI_SCODATA_PKT: - len = HCI_MAX_SCO_SIZE; - hlen = HCI_SCO_HDR_SIZE; - break; - } - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - scb = (void *) skb->cb; - scb->expect = hlen; - scb->pkt_type = type; - - hdev->reassembly[index] = skb; - } - - while (count) { - scb = (void *) skb->cb; - len = min_t(uint, scb->expect, count); - - memcpy(skb_put(skb, len), data, len); - - count -= len; - data += len; - scb->expect -= len; - remain = count; - - switch (type) { - case HCI_EVENT_PKT: - if (skb->len == HCI_EVENT_HDR_SIZE) { - struct hci_event_hdr *h = hci_event_hdr(skb); - scb->expect = h->plen; - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - - case HCI_ACLDATA_PKT: - if (skb->len == HCI_ACL_HDR_SIZE) { - struct hci_acl_hdr *h = hci_acl_hdr(skb); - scb->expect = __le16_to_cpu(h->dlen); - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - - case HCI_SCODATA_PKT: - if (skb->len == HCI_SCO_HDR_SIZE) { - struct hci_sco_hdr *h = hci_sco_hdr(skb); - scb->expect = h->dlen; - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - } - - if (scb->expect == 0) { - /* Complete frame */ - - bt_cb(skb)->pkt_type = type; - hci_recv_frame(hdev, skb); - - hdev->reassembly[index] = NULL; - return remain; - } - } - - return remain; -} - -#define STREAM_REASSEMBLY 0 - -int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) -{ - int type; - int rem = 0; - - while (count) { - struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY]; - - if (!skb) { - struct { char type; } *pkt; - - /* Start of the frame */ - pkt = data; - type = pkt->type; - - data++; - count--; - } else - type = bt_cb(skb)->pkt_type; - - rem = hci_reassembly(hdev, type, data, count, - STREAM_REASSEMBLY); - if (rem < 0) - return rem; - - data += (count - rem); - count = rem; - } - - return rem; -} -EXPORT_SYMBOL(hci_recv_stream_fragment); - /* ---- Interface to upper protocols ---- */ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock(&hci_cb_list_lock); - list_add(&cb->list, &hci_cb_list); - write_unlock(&hci_cb_list_lock); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -3460,9 +3335,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock(&hci_cb_list_lock); + mutex_lock(&hci_cb_list_lock); list_del(&cb->list); - write_unlock(&hci_cb_list_lock); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -3495,11 +3370,6 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) } } -bool hci_req_pending(struct hci_dev *hdev) -{ - return (hdev->req_status == HCI_REQ_PEND); -} - /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -3874,7 +3744,7 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) { - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!cnt && time_after(jiffies, hdev->acl_last_tx + @@ -4057,7 +3927,7 @@ static void hci_sched_le(struct hci_dev *hdev) if (!hci_conn_num(hdev, LE_LINK)) return; - if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { /* LE tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!hdev->le_cnt && hdev->le_pkts && @@ -4105,7 +3975,7 @@ static void hci_tx_work(struct work_struct *work) BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt, hdev->le_cnt); - if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { /* Schedule queues and send stuff to HCI driver */ hci_sched_acl(hdev); hci_sched_sco(hdev); @@ -4220,9 +4090,10 @@ static void hci_resend_last(struct hci_dev *hdev) queue_work(hdev->workqueue, &hdev->cmd_work); } -void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb) { - hci_req_complete_t req_complete = NULL; struct sk_buff *skb; unsigned long flags; @@ -4254,18 +4125,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (hdev->sent_cmd) { - req_complete = bt_cb(hdev->sent_cmd)->req.complete; - - if (req_complete) { - /* We must set the complete callback to NULL to - * avoid calling the callback more than once if - * this function gets called again. - */ - bt_cb(hdev->sent_cmd)->req.complete = NULL; + if (bt_cb(hdev->sent_cmd)->req.complete) { + *req_complete = bt_cb(hdev->sent_cmd)->req.complete; + return; + } - goto call_complete; - } + if (bt_cb(hdev->sent_cmd)->req.complete_skb) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb; + return; } /* Remove all pending commands belonging to this request */ @@ -4276,14 +4143,11 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) break; } - req_complete = bt_cb(skb)->req.complete; + *req_complete = bt_cb(skb)->req.complete; + *req_complete_skb = bt_cb(skb)->req.complete_skb; kfree_skb(skb); } spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - -call_complete: - if (req_complete) - req_complete(hdev, status, status ? opcode : HCI_OP_NOP); } static void hci_rx_work(struct work_struct *work) @@ -4302,7 +4166,7 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { kfree_skb(skb); continue; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 65261e5d4b84..7db4220941cc 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -28,6 +28,54 @@ #include "hci_debugfs.h" +#define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ +static ssize_t __name ## _read(struct file *file, \ + char __user *user_buf, \ + size_t count, loff_t *ppos) \ +{ \ + struct hci_dev *hdev = file->private_data; \ + char buf[3]; \ + \ + buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \ + buf[1] = '\n'; \ + buf[2] = '\0'; \ + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \ +} \ + \ +static ssize_t __name ## _write(struct file *file, \ + const char __user *user_buf, \ + size_t count, loff_t *ppos) \ +{ \ + struct hci_dev *hdev = file->private_data; \ + char buf[32]; \ + size_t buf_size = min(count, (sizeof(buf) - 1)); \ + bool enable; \ + \ + if (test_bit(HCI_UP, &hdev->flags)) \ + return -EBUSY; \ + \ + if (copy_from_user(buf, user_buf, buf_size)) \ + return -EFAULT; \ + \ + buf[buf_size] = '\0'; \ + if (strtobool(buf, &enable)) \ + return -EINVAL; \ + \ + if (enable == test_bit(__quirk, &hdev->quirks)) \ + return -EALREADY; \ + \ + change_bit(__quirk, &hdev->quirks); \ + \ + return count; \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .open = simple_open, \ + .read = __name ## _read, \ + .write = __name ## _write, \ + .llseek = default_llseek, \ +} \ + static int features_show(struct seq_file *f, void *ptr) { struct hci_dev *hdev = f->private; @@ -66,6 +114,30 @@ static const struct file_operations features_fops = { .release = single_release, }; +static int device_id_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%4.4x:%4.4x:%4.4x:%4.4x\n", hdev->devid_source, + hdev->devid_vendor, hdev->devid_product, hdev->devid_version); + hci_dev_unlock(hdev); + + return 0; +} + +static int device_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, device_id_show, inode->i_private); +} + +static const struct file_operations device_id_fops = { + .open = device_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int device_list_show(struct seq_file *f, void *ptr) { struct hci_dev *hdev = f->private; @@ -166,7 +238,7 @@ static int remote_oob_show(struct seq_file *f, void *ptr) seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n", &data->bdaddr, data->bdaddr_type, data->present, 16, data->hash192, 16, data->rand192, - 16, data->hash256, 19, data->rand256); + 16, data->hash256, 16, data->rand256); } hci_dev_unlock(hdev); @@ -247,7 +319,7 @@ static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -265,7 +337,7 @@ static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -287,6 +359,8 @@ void hci_debugfs_create_common(struct hci_dev *hdev) debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); debugfs_create_u8("hardware_error", 0444, hdev->debugfs, &hdev->hw_error_code); + debugfs_create_file("device_id", 0444, hdev->debugfs, hdev, + &device_id_fops); debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, &device_list_fops); @@ -679,7 +753,7 @@ static ssize_t force_static_address_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -704,10 +778,10 @@ static ssize_t force_static_address_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR)) return -EALREADY; - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_FORCE_STATIC_ADDR); return count; } @@ -997,6 +1071,11 @@ static int adv_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, adv_max_interval_set, "%llu\n"); +DEFINE_QUIRK_ATTRIBUTE(quirk_strict_duplicate_filter, + HCI_QUIRK_STRICT_DUPLICATE_FILTER); +DEFINE_QUIRK_ATTRIBUTE(quirk_simultaneous_discovery, + HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + void hci_debugfs_create_le(struct hci_dev *hdev) { debugfs_create_file("identity", 0400, hdev->debugfs, hdev, @@ -1041,6 +1120,13 @@ void hci_debugfs_create_le(struct hci_dev *hdev) &adv_max_interval_fops); debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs, &hdev->discov_interleaved_timeout); + + debugfs_create_file("quirk_strict_duplicate_filter", 0644, + hdev->debugfs, hdev, + &quirk_strict_duplicate_filter_fops); + debugfs_create_file("quirk_simultaneous_discovery", 0644, + hdev->debugfs, hdev, + &quirk_simultaneous_discovery_fops); } void hci_debugfs_create_conn(struct hci_conn *conn) diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h index fb68efe083c5..4444dc8cedc2 100644 --- a/net/bluetooth/hci_debugfs.h +++ b/net/bluetooth/hci_debugfs.h @@ -20,7 +20,29 @@ SOFTWARE IS DISCLAIMED. */ +#if IS_ENABLED(CONFIG_BT_DEBUGFS) + void hci_debugfs_create_common(struct hci_dev *hdev); void hci_debugfs_create_bredr(struct hci_dev *hdev); void hci_debugfs_create_le(struct hci_dev *hdev); void hci_debugfs_create_conn(struct hci_conn *conn); + +#else + +static inline void hci_debugfs_create_common(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_bredr(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_le(struct hci_dev *hdev) +{ +} + +static inline void hci_debugfs_create_conn(struct hci_conn *conn) +{ +} + +#endif diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a3fb094822b6..7b61be73650f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -70,7 +70,7 @@ static void hci_cc_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - set_bit(HCI_PERIODIC_INQ, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_PERIODIC_INQ); } static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) @@ -82,7 +82,7 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - clear_bit(HCI_PERIODIC_INQ, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); hci_conn_check_pending(hdev); } @@ -198,7 +198,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) return; /* Reset all non-persistent flags */ - hdev->dev_flags &= ~HCI_PERSISTENT_MASK; + hci_dev_clear_volatile_flags(hdev); hci_discovery_set_state(hdev, DISCOVERY_STOPPED); @@ -265,7 +265,7 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_set_local_name_complete(hdev, sent, status); else if (!status) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); @@ -282,8 +282,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH); } @@ -309,7 +309,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_AUTH, &hdev->flags); } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_auth_enable_complete(hdev, status); hci_dev_unlock(hdev); @@ -404,7 +404,7 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) if (status == 0) memcpy(hdev->dev_class, sent, 3); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_set_class_of_dev_complete(hdev, sent, status); hci_dev_unlock(hdev); @@ -497,13 +497,13 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SSP; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_ssp_enable_complete(hdev, sent->mode, status); else if (!status) { if (sent->mode) - set_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SSP_ENABLED); else - clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); } hci_dev_unlock(hdev); @@ -529,11 +529,11 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SC; } - if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) { + if (!hci_dev_test_flag(hdev, HCI_MGMT) && !status) { if (sent->support) - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); else - clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ENABLED); } hci_dev_unlock(hdev); @@ -548,8 +548,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) { hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); hdev->lmp_ver = rp->lmp_ver; @@ -568,8 +568,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -691,7 +691,7 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_INIT, &hdev->flags)) bacpy(&hdev->bdaddr, &rp->bdaddr); - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SETUP)) bacpy(&hdev->setup_addr, &rp->bdaddr); } @@ -900,7 +900,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status); if (rp->status) @@ -926,7 +926,7 @@ static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_pin_code_neg_reply_complete(hdev, &rp->bdaddr, rp->status); @@ -985,7 +985,7 @@ static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_confirm_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1001,7 +1001,7 @@ static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_confirm_neg_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1016,7 +1016,7 @@ static void hci_cc_user_passkey_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1032,7 +1032,7 @@ static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_neg_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0, rp->status); @@ -1045,11 +1045,6 @@ static void hci_cc_read_local_oob_data(struct hci_dev *hdev, struct hci_rp_read_local_oob_data *rp = (void *) skb->data; BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - - hci_dev_lock(hdev); - mgmt_read_local_oob_data_complete(hdev, rp->hash, rp->rand, NULL, NULL, - rp->status); - hci_dev_unlock(hdev); } static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev, @@ -1058,15 +1053,8 @@ static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev, struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data; BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - - hci_dev_lock(hdev); - mgmt_read_local_oob_data_complete(hdev, rp->hash192, rp->rand192, - rp->hash256, rp->rand256, - rp->status); - hci_dev_unlock(hdev); } - static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -1109,7 +1097,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (*sent) { struct hci_conn *conn; - set_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ADV); conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (conn) @@ -1117,7 +1105,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) &conn->le_conn_timeout, conn->conn_timeout); } else { - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); } hci_dev_unlock(hdev); @@ -1192,7 +1180,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: - set_bit(HCI_LE_SCAN, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_SCAN); if (hdev->le_scan_type == LE_SCAN_ACTIVE) clear_pending_adv_report(hdev); break; @@ -1217,7 +1205,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, */ cancel_delayed_work(&hdev->le_scan_disable); - clear_bit(HCI_LE_SCAN, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_SCAN); /* The HCI_LE_SCAN_INTERRUPTED flag indicates that we * interrupted scanning due to a connect request. Mark @@ -1226,10 +1214,9 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, * been disabled because of active scanning, so * re-enable it again if necessary. */ - if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED, - &hdev->dev_flags)) + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - else if (!test_bit(HCI_LE_ADV, &hdev->dev_flags) && + else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && hdev->discovery.state == DISCOVERY_FINDING) mgmt_reenable_advertising(hdev); @@ -1388,11 +1375,11 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (sent->le) { hdev->features[1][0] |= LMP_HOST_LE; - set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LE_ENABLED); } else { hdev->features[1][0] &= ~LMP_HOST_LE; - clear_bit(HCI_LE_ENABLED, &hdev->dev_flags); - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ENABLED); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); } if (sent->simul) @@ -1537,7 +1524,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (conn && conn->state == BT_CONNECT) { if (status != 0x0c || conn->attempt > 2) { conn->state = BT_CLOSED; - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_del(conn); } else conn->state = BT_CONNECT2; @@ -1581,7 +1568,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) if (sco) { sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -1608,7 +1595,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1635,7 +1622,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1769,7 +1756,7 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) hci_check_pending_name(hdev, conn, &cp->bdaddr, NULL, 0); if (!conn) @@ -1811,7 +1798,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1838,7 +1825,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { if (conn->state == BT_CONFIG) { - hci_proto_connect_cfm(conn, status); + hci_connect_cfm(conn, status); hci_conn_drop(conn); } } @@ -1873,7 +1860,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) if (sco) { sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); + hci_connect_cfm(sco, status); hci_conn_del(sco); } } @@ -2049,6 +2036,33 @@ unlock: hci_dev_unlock(hdev); } +static void hci_cs_le_read_remote_features(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_le_read_remote_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_READ_REMOTE_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_connect_cfm(conn, status); + hci_conn_drop(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) { struct hci_cp_le_start_enc *cp; @@ -2118,7 +2132,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; hci_dev_lock(hdev); @@ -2127,7 +2141,16 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; if (list_empty(&discov->resolve)) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* When BR/EDR inquiry is active and no LE scanning is in + * progress, then change discovery state to indicate completion. + * + * When running LE scanning and BR/EDR inquiry simultaneously + * and the LE scan already finished, then change the discovery + * state to indicate completion. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); goto unlock; } @@ -2136,7 +2159,16 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) e->name_state = NAME_PENDING; hci_discovery_set_state(hdev, DISCOVERY_RESOLVING); } else { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* When BR/EDR inquiry is active and no LE scanning is in + * progress, then change discovery state to indicate completion. + * + * When running LE scanning and BR/EDR inquiry simultaneously + * and the LE scan already finished, then change the discovery + * state to indicate completion. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } unlock: @@ -2154,7 +2186,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -2255,10 +2287,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_sco_setup(conn, ev->status); if (ev->status) { - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_del(conn); } else if (ev->link_type != ACL_LINK) - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); unlock: hci_dev_unlock(hdev); @@ -2304,8 +2336,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) * connection. These features are only touched through mgmt so * only do the checks if HCI_MGMT is set. */ - if (test_bit(HCI_MGMT, &hdev->dev_flags) && - !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_MGMT) && + !hci_dev_test_flag(hdev, HCI_CONNECTABLE) && !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); @@ -2366,7 +2398,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) &cp); } else { conn->state = BT_CONNECT2; - hci_proto_connect_cfm(conn, 0); + hci_connect_cfm(conn, 0); } } @@ -2444,7 +2476,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) type = conn->type; - hci_proto_disconn_cfm(conn, ev->reason); + hci_disconn_cfm(conn, ev->reason); hci_conn_del(conn); /* Re-enable advertising if necessary, since it might @@ -2501,7 +2533,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) &cp); } else { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } } else { @@ -2542,7 +2574,7 @@ static void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto check_auth; if (ev->status == 0) @@ -2608,7 +2640,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) * whenever the encryption procedure fails. */ if (ev->status && conn->type == LE_LINK) - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); @@ -2626,15 +2658,15 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) * connections that are not encrypted with AES-CCM * using a P-256 authenticated combination key. */ - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && (!test_bit(HCI_CONN_AES_CCM, &conn->flags) || conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) { - hci_proto_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE); + hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); goto unlock; } - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } else hci_encrypt_cfm(conn, ev->status, ev->encrypt); @@ -2707,7 +2739,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } @@ -2715,17 +2747,19 @@ unlock: hci_dev_unlock(hdev); } -static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, + u16 *opcode, u8 *status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb) { struct hci_ev_cmd_complete *ev = (void *) skb->data; - u8 status = skb->data[sizeof(*ev)]; - __u16 opcode; - skb_pull(skb, sizeof(*ev)); + *opcode = __le16_to_cpu(ev->opcode); + *status = skb->data[sizeof(*ev)]; - opcode = __le16_to_cpu(ev->opcode); + skb_pull(skb, sizeof(*ev)); - switch (opcode) { + switch (*opcode) { case HCI_OP_INQUIRY_CANCEL: hci_cc_inquiry_cancel(hdev, skb); break; @@ -3003,32 +3037,36 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) break; default: - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode); break; } - if (opcode != HCI_OP_NOP) + if (*opcode != HCI_OP_NOP) cancel_delayed_work(&hdev->cmd_timer); - hci_req_cmd_complete(hdev, opcode, status); - - if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - queue_work(hdev->workqueue, &hdev->cmd_work); - } + + hci_req_cmd_complete(hdev, *opcode, *status, req_complete, + req_complete_skb); + + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) + queue_work(hdev->workqueue, &hdev->cmd_work); } -static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, + u16 *opcode, u8 *status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb) { struct hci_ev_cmd_status *ev = (void *) skb->data; - __u16 opcode; skb_pull(skb, sizeof(*ev)); - opcode = __le16_to_cpu(ev->opcode); + *opcode = __le16_to_cpu(ev->opcode); + *status = ev->status; - switch (opcode) { + switch (*opcode) { case HCI_OP_INQUIRY: hci_cs_inquiry(hdev, ev->status); break; @@ -3093,27 +3131,38 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_le_create_conn(hdev, ev->status); break; + case HCI_OP_LE_READ_REMOTE_FEATURES: + hci_cs_le_read_remote_features(hdev, ev->status); + break; + case HCI_OP_LE_START_ENC: hci_cs_le_start_enc(hdev, ev->status); break; default: - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode); break; } - if (opcode != HCI_OP_NOP) + if (*opcode != HCI_OP_NOP) cancel_delayed_work(&hdev->cmd_timer); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) + atomic_set(&hdev->cmd_cnt, 1); + + /* Indicate request completion if the command failed. Also, if + * we're not waiting for a special event and we get a success + * command status we should try to flag the request as completed + * (since for this kind of commands there will not be a command + * complete event). + */ if (ev->status || (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) - hci_req_cmd_complete(hdev, opcode, ev->status); + hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, + req_complete_skb); - if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { - atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - queue_work(hdev->workqueue, &hdev->cmd_work); - } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) + queue_work(hdev->workqueue, &hdev->cmd_work); } static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3331,11 +3380,11 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && !test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags)) { hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); - } else if (test_bit(HCI_MGMT, &hdev->dev_flags)) { + } else if (hci_dev_test_flag(hdev, HCI_MGMT)) { u8 secure; if (conn->pending_sec_level == BT_SECURITY_HIGH) @@ -3391,7 +3440,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s", hdev->name); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; hci_dev_lock(hdev); @@ -3465,7 +3514,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) set_bit(HCI_CONN_NEW_LINK_KEY, &conn->flags); conn_set_key(conn, ev->key_type, conn->pin_length); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; key = hci_add_link_key(hdev, conn, &ev->bdaddr, ev->link_key, @@ -3487,7 +3536,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) * store_hint being 0). */ if (key->type == HCI_LK_DEBUG_COMBINATION && - !test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) { list_del_rcu(&key->list); kfree_rcu(key, rcu); goto unlock; @@ -3570,7 +3619,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -3679,7 +3728,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } @@ -3738,7 +3787,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, break; } - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); if (ev->status) hci_conn_del(conn); @@ -3776,7 +3825,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, if (!num_rsp) return; - if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) return; hci_dev_lock(hdev); @@ -3794,7 +3843,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, data.rssi = info->rssi; data.ssp_mode = 0x01; - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) name_known = eir_has_data_type(info->data, sizeof(info->data), EIR_NAME_COMPLETE); @@ -3849,7 +3898,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!ev->status) conn->state = BT_CONNECTED; - hci_proto_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, ev->status); hci_conn_drop(conn); } else { hci_auth_cfm(conn, ev->status); @@ -3890,41 +3939,37 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) if (!data) return 0x00; - if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) { - if (bredr_sc_enabled(hdev)) { - /* When Secure Connections is enabled, then just - * return the present value stored with the OOB - * data. The stored value contains the right present - * information. However it can only be trusted when - * not in Secure Connection Only mode. - */ - if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags)) - return data->present; - - /* When Secure Connections Only mode is enabled, then - * the P-256 values are required. If they are not - * available, then do not declare that OOB data is - * present. - */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) - return 0x00; - - return 0x02; - } + if (bredr_sc_enabled(hdev)) { + /* When Secure Connections is enabled, then just + * return the present value stored with the OOB + * data. The stored value contains the right present + * information. However it can only be trusted when + * not in Secure Connection Only mode. + */ + if (!hci_dev_test_flag(hdev, HCI_SC_ONLY)) + return data->present; - /* When Secure Connections is not enabled or actually - * not supported by the hardware, then check that if - * P-192 data values are present. + /* When Secure Connections Only mode is enabled, then + * the P-256 values are required. If they are not + * available, then do not declare that OOB data is + * present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!memcmp(data->rand256, ZERO_KEY, 16) || + !memcmp(data->hash256, ZERO_KEY, 16)) return 0x00; - return 0x01; + return 0x02; } - return 0x00; + /* When Secure Connections is not enabled or actually + * not supported by the hardware, then check that if + * P-192 data values are present. + */ + if (!memcmp(data->rand192, ZERO_KEY, 16) || + !memcmp(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; } static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3942,13 +3987,13 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_hold(conn); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; /* Allow pairing if we're pairable, the initiators of the * pairing or if the remote is not requesting bonding. */ - if (test_bit(HCI_BONDABLE, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_BONDABLE) || test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags) || (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { struct hci_cp_io_capability_reply cp; @@ -3974,7 +4019,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) /* If we're not bondable, force one of the non-bondable * authentication requirement values. */ - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BONDABLE)) conn->auth_type &= HCI_AT_NO_BONDING_MITM; cp.authentication = conn->auth_type; @@ -4011,8 +4056,6 @@ static void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->remote_cap = ev->capability; conn->remote_auth = ev->authentication; - if (ev->oob_data) - set_bit(HCI_CONN_REMOTE_OOB, &conn->flags); unlock: hci_dev_unlock(hdev); @@ -4029,7 +4072,7 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, hci_dev_lock(hdev); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); @@ -4100,7 +4143,7 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, BT_DBG("%s", hdev->name); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); } @@ -4119,7 +4162,7 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, conn->passkey_notify = __le32_to_cpu(ev->passkey); conn->passkey_entered = 0; - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); @@ -4157,7 +4200,7 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); @@ -4226,7 +4269,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, hci_dev_lock(hdev); - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) goto unlock; data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR); @@ -4243,7 +4286,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, struct hci_cp_remote_oob_ext_data_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { memset(cp.hash192, 0, sizeof(cp.hash192)); memset(cp.rand192, 0, sizeof(cp.rand192)); } else { @@ -4409,7 +4452,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) /* All controllers implicitly stop advertising in the event of a * connection, so ensure that the state bit is cleared. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (!conn) { @@ -4432,7 +4475,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->out) { conn->resp_addr_type = ev->bdaddr_type; bacpy(&conn->resp_addr, &ev->bdaddr); - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { conn->init_addr_type = ADDR_LE_DEV_RANDOM; bacpy(&conn->init_addr, &hdev->rpa); } else { @@ -4503,7 +4546,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->sec_level = BT_SECURITY_LOW; conn->handle = __le16_to_cpu(ev->handle); - conn->state = BT_CONNECTED; + conn->state = BT_CONFIG; conn->le_conn_interval = le16_to_cpu(ev->interval); conn->le_conn_latency = le16_to_cpu(ev->latency); @@ -4512,7 +4555,33 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); - hci_proto_connect_cfm(conn, ev->status); + if (!ev->status) { + /* The remote features procedure is defined for master + * role only. So only in case of an initiated connection + * request the remote features. + * + * If the local controller supports slave-initiated features + * exchange, then requesting the remote features in slave + * role is possible. Otherwise just transition into the + * connected state without requesting the remote features. + */ + if (conn->out || + (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) { + struct hci_cp_le_read_remote_features cp; + + cp.handle = __cpu_to_le16(conn->handle); + + hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES, + sizeof(cp), &cp); + + hci_conn_hold(conn); + } else { + conn->state = BT_CONNECTED; + hci_connect_cfm(conn, ev->status); + } + } else { + hci_connect_cfm(conn, ev->status); + } params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, conn->dst_type); @@ -4658,7 +4727,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* If the controller is not using resolvable random * addresses, then this report can be ignored. */ - if (!test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) return; /* If the local IRK of the controller does not match @@ -4814,6 +4883,48 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_le_remote_feat_complete *ev = (void *)skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + if (!ev->status) + memcpy(conn->features[0], ev->features, 8); + + if (conn->state == BT_CONFIG) { + __u8 status; + + /* If the local controller supports slave-initiated + * features exchange, but the remote controller does + * not, then it is possible that the error code 0x1a + * for unsupported remote feature gets returned. + * + * In this specific case, allow the connection to + * transition into connected state and mark it as + * successful. + */ + if ((hdev->le_features[0] & HCI_LE_SLAVE_FEATURES) && + !conn->out && ev->status == 0x1a) + status = 0x00; + else + status = ev->status; + + conn->state = BT_CONNECTED; + hci_connect_cfm(conn, status); + hci_conn_drop(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_le_ltk_req *ev = (void *) skb->data; @@ -4987,6 +5098,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_le_adv_report_evt(hdev, skb); break; + case HCI_EV_LE_REMOTE_FEAT_COMPLETE: + hci_le_remote_feat_complete_evt(hdev, skb); + break; + case HCI_EV_LE_LTK_REQ: hci_le_ltk_request_evt(hdev, skb); break; @@ -5020,32 +5135,79 @@ static void hci_chan_selected_evt(struct hci_dev *hdev, struct sk_buff *skb) amp_read_loc_assoc_final_data(hdev, hcon); } -void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, + u8 event, struct sk_buff *skb) { - struct hci_event_hdr *hdr = (void *) skb->data; - __u8 event = hdr->evt; + struct hci_ev_cmd_complete *ev; + struct hci_event_hdr *hdr; - hci_dev_lock(hdev); + if (!skb) + return false; - /* Received events are (currently) only needed when a request is - * ongoing so avoid unnecessary memory allocation. - */ - if (hci_req_pending(hdev)) { - kfree_skb(hdev->recv_evt); - hdev->recv_evt = skb_clone(skb, GFP_KERNEL); + if (skb->len < sizeof(*hdr)) { + BT_ERR("Too short HCI event"); + return false; } - hci_dev_unlock(hdev); - + hdr = (void *) skb->data; skb_pull(skb, HCI_EVENT_HDR_SIZE); + if (event) { + if (hdr->evt != event) + return false; + return true; + } + + if (hdr->evt != HCI_EV_CMD_COMPLETE) { + BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); + return false; + } + + if (skb->len < sizeof(*ev)) { + BT_ERR("Too short cmd_complete event"); + return false; + } + + ev = (void *) skb->data; + skb_pull(skb, sizeof(*ev)); + + if (opcode != __le16_to_cpu(ev->opcode)) { + BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, + __le16_to_cpu(ev->opcode)); + return false; + } + + return true; +} + +void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + hci_req_complete_t req_complete = NULL; + hci_req_complete_skb_t req_complete_skb = NULL; + struct sk_buff *orig_skb = NULL; + u8 status = 0, event = hdr->evt, req_evt = 0; + u16 opcode = HCI_OP_NOP; + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(cmd_hdr->opcode); - - hci_req_cmd_complete(hdev, opcode, 0); + opcode = __le16_to_cpu(cmd_hdr->opcode); + hci_req_cmd_complete(hdev, opcode, status, &req_complete, + &req_complete_skb); + req_evt = event; } + /* If it looks like we might end up having to call + * req_complete_skb, store a pristine copy of the skb since the + * various handlers may modify the original one through + * skb_pull() calls, etc. + */ + if (req_complete_skb || event == HCI_EV_CMD_STATUS || + event == HCI_EV_CMD_COMPLETE) + orig_skb = skb_clone(skb, GFP_KERNEL); + + skb_pull(skb, HCI_EVENT_HDR_SIZE); + switch (event) { case HCI_EV_INQUIRY_COMPLETE: hci_inquiry_complete_evt(hdev, skb); @@ -5088,11 +5250,13 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) break; case HCI_EV_CMD_COMPLETE: - hci_cmd_complete_evt(hdev, skb); + hci_cmd_complete_evt(hdev, skb, &opcode, &status, + &req_complete, &req_complete_skb); break; case HCI_EV_CMD_STATUS: - hci_cmd_status_evt(hdev, skb); + hci_cmd_status_evt(hdev, skb, &opcode, &status, &req_complete, + &req_complete_skb); break; case HCI_EV_HARDWARE_ERROR: @@ -5224,6 +5388,17 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) break; } + if (req_complete) { + req_complete(hdev, status, opcode); + } else if (req_complete_skb) { + if (!hci_get_cmd_complete(hdev, opcode, req_evt, orig_skb)) { + kfree_skb(orig_skb); + orig_skb = NULL; + } + req_complete_skb(hdev, status, opcode, orig_skb); + } + + kfree_skb(orig_skb); kfree_skb(skb); hdev->stat.evt_rx++; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b59f92c6df0c..d6025d6e6d59 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -34,7 +34,8 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev) req->err = 0; } -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +static int req_run(struct hci_request *req, hci_req_complete_t complete, + hci_req_complete_skb_t complete_skb) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -56,6 +57,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) skb = skb_peek_tail(&req->cmd_q); bt_cb(skb)->req.complete = complete; + bt_cb(skb)->req.complete_skb = complete_skb; spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -66,6 +68,16 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) return 0; } +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + return req_run(req, complete, NULL); +} + +int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) +{ + return req_run(req, NULL, complete); +} + struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param) { @@ -270,7 +282,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req) * and 0x01 (whitelist enabled) use the new filter policies * 0x02 (no whitelist) and 0x03 (whitelist enabled). */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_PRIVACY) && (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; @@ -304,10 +316,10 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) * In this kind of scenario skip the update and let the random * address be updated at the next cycle. */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_LE_ADV) || hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { BT_DBG("Deferring random address update"); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return; } @@ -324,12 +336,12 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * current RPA has expired or there is something else than * the current RPA in use, then generate a new one. */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { int to; *own_addr_type = ADDR_LE_DEV_RANDOM; - if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && + if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && !bacmp(&hdev->random_addr, &hdev->rpa)) return 0; @@ -383,9 +395,9 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * and a static address has been configured, then use that * address instead of the public BR/EDR address. */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { *own_addr_type = ADDR_LE_DEV_RANDOM; if (bacmp(&hdev->static_addr, &hdev->random_addr)) @@ -425,7 +437,7 @@ void __hci_update_page_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 scan; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; if (!hdev_is_powered(hdev)) @@ -434,7 +446,7 @@ void __hci_update_page_scan(struct hci_request *req) if (mgmt_powering_down(hdev)) return; - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_whitelist_entries(hdev)) scan = SCAN_PAGE; else @@ -443,7 +455,7 @@ void __hci_update_page_scan(struct hci_request *req) if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) return; - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); @@ -471,14 +483,14 @@ void __hci_update_background_scan(struct hci_request *req) if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || - test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_AUTO_OFF) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) return; /* No point in doing scanning if LE support hasn't been enabled */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; /* If discovery is active don't interfere with it */ @@ -502,7 +514,7 @@ void __hci_update_background_scan(struct hci_request *req) */ /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; hci_req_add_le_scan_disable(req); @@ -524,7 +536,7 @@ void __hci_update_background_scan(struct hci_request *req) /* If controller is currently scanning, we stop it to ensure we * don't miss any advertising (due to duplicates filter). */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) hci_req_add_le_scan_disable(req); hci_req_add_le_passive_scan(req); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index adf074d33544..bf6df92f42db 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -32,11 +32,14 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param); void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, const void *param, u8 event); -void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d65c5be7c82..56f9edbf3d05 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -30,6 +30,12 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_mon.h> +#include <net/bluetooth/mgmt.h> + +#include "mgmt_util.h" + +static LIST_HEAD(mgmt_chan_list); +static DEFINE_MUTEX(mgmt_chan_list_lock); static atomic_t monitor_promisc = ATOMIC_INIT(0); @@ -44,11 +50,32 @@ struct hci_pinfo { struct hci_filter filter; __u32 cmsg_mask; unsigned short channel; + unsigned long flags; }; -static inline int hci_test_bit(int nr, void *addr) +void hci_sock_set_flag(struct sock *sk, int nr) +{ + set_bit(nr, &hci_pi(sk)->flags); +} + +void hci_sock_clear_flag(struct sock *sk, int nr) { - return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); + clear_bit(nr, &hci_pi(sk)->flags); +} + +int hci_sock_test_flag(struct sock *sk, int nr) +{ + return test_bit(nr, &hci_pi(sk)->flags); +} + +unsigned short hci_sock_get_channel(struct sock *sk) +{ + return hci_pi(sk)->channel; +} + +static inline int hci_test_bit(int nr, const void *addr) +{ + return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); } /* Security filter */ @@ -183,54 +210,31 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } -/* Send frame to control socket */ -void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) +/* Send frame to sockets with specific channel */ +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) { struct sock *sk; - BT_DBG("len %d", skb->len); + BT_DBG("channel %u len %d", channel, skb->len); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; - /* Skip the original socket */ - if (sk == skip_sk) - continue; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) continue; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) + /* Skip the original socket */ + if (sk == skip_sk) continue; - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - - read_unlock(&hci_sk_list.lock); -} - -static void queue_monitor_skb(struct sk_buff *skb) -{ - struct sock *sk; - - BT_DBG("len %d", skb->len); - - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - if (sk->sk_state != BT_BOUND) continue; - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) + if (hci_pi(sk)->channel != channel) continue; nskb = skb_clone(skb, GFP_ATOMIC); @@ -290,7 +294,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); - queue_monitor_skb(skb_copy); + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb_copy); } @@ -397,7 +402,8 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) skb = create_monitor_event(hdev, event); if (skb) { - queue_monitor_skb(skb); + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } @@ -428,6 +434,56 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) } } +static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel) +{ + struct hci_mgmt_chan *c; + + list_for_each_entry(c, &mgmt_chan_list, list) { + if (c->channel == channel) + return c; + } + + return NULL; +} + +static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel) +{ + struct hci_mgmt_chan *c; + + mutex_lock(&mgmt_chan_list_lock); + c = __hci_mgmt_chan_find(channel); + mutex_unlock(&mgmt_chan_list_lock); + + return c; +} + +int hci_mgmt_chan_register(struct hci_mgmt_chan *c) +{ + if (c->channel < HCI_CHANNEL_CONTROL) + return -EINVAL; + + mutex_lock(&mgmt_chan_list_lock); + if (__hci_mgmt_chan_find(c->channel)) { + mutex_unlock(&mgmt_chan_list_lock); + return -EALREADY; + } + + list_add_tail(&c->list, &mgmt_chan_list); + + mutex_unlock(&mgmt_chan_list_lock); + + return 0; +} +EXPORT_SYMBOL(hci_mgmt_chan_register); + +void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c) +{ + mutex_lock(&mgmt_chan_list_lock); + list_del(&c->list); + mutex_unlock(&mgmt_chan_list_lock); +} +EXPORT_SYMBOL(hci_mgmt_chan_unregister); + static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -448,7 +504,7 @@ static int hci_sock_release(struct socket *sock) if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { mgmt_index_added(hdev); - clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); hci_dev_close(hdev->id); } @@ -508,10 +564,10 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (!hdev) return -EBADFD; - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; if (hdev->dev_type != HCI_BREDR) @@ -687,14 +743,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags)) { + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG)) { err = -EBUSY; hci_dev_put(hdev); goto done; } - if (test_and_set_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) { err = -EUSERS; hci_dev_put(hdev); goto done; @@ -704,7 +760,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = hci_dev_open(hdev->id); if (err) { - clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; @@ -715,38 +771,62 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->hdev = hdev; break; - case HCI_CHANNEL_CONTROL: + case HCI_CHANNEL_MONITOR: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } - if (!capable(CAP_NET_ADMIN)) { + if (!capable(CAP_NET_RAW)) { err = -EPERM; goto done; } + /* The monitor interface is restricted to CAP_NET_RAW + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + send_monitor_replay(sk); + + atomic_inc(&monitor_promisc); break; - case HCI_CHANNEL_MONITOR: - if (haddr.hci_dev != HCI_DEV_NONE) { + default: + if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; goto done; } - if (!capable(CAP_NET_RAW)) { - err = -EPERM; + if (haddr.hci_dev != HCI_DEV_NONE) { + err = -EINVAL; goto done; } - send_monitor_replay(sk); - - atomic_inc(&monitor_promisc); + /* Users with CAP_NET_ADMIN capabilities are allowed + * access to all management commands and events. For + * untrusted users the interface is restricted and + * also only untrusted events are sent. + */ + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* At the moment the index and unconfigured index events + * are enabled unconditionally. Setting them on each + * socket when binding keeps this functionality. They + * however might be cleared later and then sending of these + * events will be disabled, but that is then intentional. + * + * This also enables generic events that are safe to be + * received by untrusted users. Example for such events + * are changes to settings, class of device, name etc. + */ + if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + } break; - - default: - err = -EINVAL; - goto done; } @@ -826,8 +906,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -860,10 +940,13 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, hci_sock_cmsg(sk, msg, skb); break; case HCI_CHANNEL_USER: - case HCI_CHANNEL_CONTROL: case HCI_CHANNEL_MONITOR: sock_recv_timestamp(msg, sk, skb); break; + default: + if (hci_mgmt_chan_find(hci_pi(sk)->channel)) + sock_recv_timestamp(msg, sk, skb); + break; } skb_free_datagram(sk, skb); @@ -871,10 +954,122 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err ? : copied; } -static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, + struct msghdr *msg, size_t msglen) +{ + void *buf; + u8 *cp; + struct mgmt_hdr *hdr; + u16 opcode, index, len; + struct hci_dev *hdev = NULL; + const struct hci_mgmt_handler *handler; + bool var_len, no_hdev; + int err; + + BT_DBG("got %zu bytes", msglen); + + if (msglen < sizeof(*hdr)) + return -EINVAL; + + buf = kmalloc(msglen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, msglen)) { + err = -EFAULT; + goto done; + } + + hdr = buf; + opcode = __le16_to_cpu(hdr->opcode); + index = __le16_to_cpu(hdr->index); + len = __le16_to_cpu(hdr->len); + + if (len != msglen - sizeof(*hdr)) { + err = -EINVAL; + goto done; + } + + if (opcode >= chan->handler_count || + chan->handlers[opcode].func == NULL) { + BT_DBG("Unknown op %u", opcode); + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_UNKNOWN_COMMAND); + goto done; + } + + handler = &chan->handlers[opcode]; + + if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) && + !(handler->flags & HCI_MGMT_UNTRUSTED)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_PERMISSION_DENIED); + goto done; + } + + if (index != MGMT_INDEX_NONE) { + hdev = hci_dev_get(index); + if (!hdev) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !(handler->flags & HCI_MGMT_UNCONFIGURED)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + } + + no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); + if (no_hdev != !hdev) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + var_len = (handler->flags & HCI_MGMT_VAR_LEN); + if ((var_len && len < handler->data_len) || + (!var_len && len != handler->data_len)) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_PARAMS); + goto done; + } + + if (hdev && chan->hdev_init) + chan->hdev_init(sk, hdev); + + cp = buf + sizeof(*hdr); + + err = handler->func(sk, hdev, cp, len); + if (err < 0) + goto done; + + err = msglen; + +done: + if (hdev) + hci_dev_put(hdev); + + kfree(buf); + return err; +} + +static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; + struct hci_mgmt_chan *chan; struct hci_dev *hdev; struct sk_buff *skb; int err; @@ -896,14 +1091,18 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: break; - case HCI_CHANNEL_CONTROL: - err = mgmt_control(sk, msg, len); - goto done; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; default: - err = -EINVAL; + mutex_lock(&mgmt_chan_list_lock); + chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); + if (chan) + err = hci_mgmt_cmd(chan, sk, msg, len); + else + err = -EINVAL; + + mutex_unlock(&mgmt_chan_list_lock); goto done; } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 07348e142f16..a05b9dbf14c9 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -70,10 +70,11 @@ static void hidp_session_terminate(struct hidp_session *s); static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) { + u32 valid_flags = 0; memset(ci, 0, sizeof(*ci)); bacpy(&ci->bdaddr, &session->bdaddr); - ci->flags = session->flags; + ci->flags = session->flags & valid_flags; ci->state = BT_CONNECTED; if (session->input) { @@ -907,7 +908,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, kref_init(&session->ref); atomic_set(&session->state, HIDP_SESSION_IDLING); init_waitqueue_head(&session->state_queue); - session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); + session->flags = req->flags & BIT(HIDP_BLUETOOTH_VENDOR_ID); /* connection management */ bacpy(&session->bdaddr, bdaddr); @@ -1312,6 +1313,7 @@ int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) { + u32 valid_flags = 0; struct hidp_session *session; struct l2cap_conn *conn; struct l2cap_chan *chan; @@ -1321,6 +1323,9 @@ int hidp_connection_add(struct hidp_connadd_req *req, if (ret) return ret; + if (req->flags & ~valid_flags) + return -EINVAL; + chan = l2cap_pi(ctrl_sock->sk)->chan; conn = NULL; l2cap_chan_lock(chan); @@ -1351,13 +1356,17 @@ out_conn: int hidp_connection_del(struct hidp_conndel_req *req) { + u32 valid_flags = BIT(HIDP_VIRTUAL_CABLE_UNPLUG); struct hidp_session *session; + if (req->flags & ~valid_flags) + return -EINVAL; + session = hidp_session_find(&req->bdaddr); if (!session) return -ENOENT; - if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) + if (req->flags & BIT(HIDP_VIRTUAL_CABLE_UNPLUG)) hidp_send_ctrl_message(session, HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6ba33f9631e8..dad419782a12 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -292,7 +292,7 @@ static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head, struct sk_buff *skb; skb_queue_walk(head, skb) { - if (bt_cb(skb)->control.txseq == seq) + if (bt_cb(skb)->l2cap.txseq == seq) return skb; } @@ -954,11 +954,11 @@ static inline void __unpack_control(struct l2cap_chan *chan, { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { __unpack_extended_control(get_unaligned_le32(skb->data), - &bt_cb(skb)->control); + &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_EXT_CTRL_SIZE); } else { __unpack_enhanced_control(get_unaligned_le16(skb->data), - &bt_cb(skb)->control); + &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_ENH_CTRL_SIZE); } } @@ -1200,8 +1200,8 @@ static void l2cap_move_setup(struct l2cap_chan *chan) chan->retry_count = 0; skb_queue_walk(&chan->tx_q, skb) { - if (bt_cb(skb)->control.retries) - bt_cb(skb)->control.retries = 1; + if (bt_cb(skb)->l2cap.retries) + bt_cb(skb)->l2cap.retries = 1; else break; } @@ -1244,6 +1244,13 @@ static void l2cap_move_done(struct l2cap_chan *chan) static void l2cap_chan_ready(struct l2cap_chan *chan) { + /* The channel may have already been flagged as connected in + * case of receiving data before the L2CAP info req/rsp + * procedure is complete. + */ + if (chan->state == BT_CONNECTED) + return; + /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); @@ -1839,8 +1846,8 @@ static void l2cap_streaming_send(struct l2cap_chan *chan, skb = skb_dequeue(&chan->tx_q); - bt_cb(skb)->control.retries = 1; - control = &bt_cb(skb)->control; + bt_cb(skb)->l2cap.retries = 1; + control = &bt_cb(skb)->l2cap; control->reqseq = 0; control->txseq = chan->next_tx_seq; @@ -1884,8 +1891,8 @@ static int l2cap_ertm_send(struct l2cap_chan *chan) skb = chan->tx_send_head; - bt_cb(skb)->control.retries = 1; - control = &bt_cb(skb)->control; + bt_cb(skb)->l2cap.retries = 1; + control = &bt_cb(skb)->l2cap; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) control->final = 1; @@ -1956,11 +1963,11 @@ static void l2cap_ertm_resend(struct l2cap_chan *chan) continue; } - bt_cb(skb)->control.retries++; - control = bt_cb(skb)->control; + bt_cb(skb)->l2cap.retries++; + control = bt_cb(skb)->l2cap; if (chan->max_tx != 0 && - bt_cb(skb)->control.retries > chan->max_tx) { + bt_cb(skb)->l2cap.retries > chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); l2cap_seq_list_clear(&chan->retrans_list); @@ -2038,7 +2045,7 @@ static void l2cap_retransmit_all(struct l2cap_chan *chan, if (chan->unacked_frames) { skb_queue_walk(&chan->tx_q, skb) { - if (bt_cb(skb)->control.txseq == control->reqseq || + if (bt_cb(skb)->l2cap.txseq == control->reqseq || skb == chan->tx_send_head) break; } @@ -2048,7 +2055,7 @@ static void l2cap_retransmit_all(struct l2cap_chan *chan, break; l2cap_seq_list_append(&chan->retrans_list, - bt_cb(skb)->control.txseq); + bt_cb(skb)->l2cap.txseq); } l2cap_ertm_resend(chan); @@ -2260,8 +2267,8 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, return ERR_PTR(err); } - bt_cb(skb)->control.fcs = chan->fcs; - bt_cb(skb)->control.retries = 0; + bt_cb(skb)->l2cap.fcs = chan->fcs; + bt_cb(skb)->l2cap.retries = 0; return skb; } @@ -2314,7 +2321,7 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, return PTR_ERR(skb); } - bt_cb(skb)->control.sar = sar; + bt_cb(skb)->l2cap.sar = sar; __skb_queue_tail(seg_queue, skb); len -= pdu_len; @@ -2849,7 +2856,7 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) continue; /* Don't send frame to the channel it came from */ - if (bt_cb(skb)->chan == chan) + if (bt_cb(skb)->l2cap.chan == chan) continue; nskb = skb_clone(skb, GFP_KERNEL); @@ -3893,7 +3900,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, return -EPROTO; hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_MGMT) && !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) mgmt_device_connected(hdev, hcon, 0, NULL, 0); hci_dev_unlock(hdev); @@ -5911,7 +5918,7 @@ static int l2cap_rx_queued_iframes(struct l2cap_chan *chan) skb_unlink(skb, &chan->srej_q); chan->buffer_seq = __next_seq(chan, chan->buffer_seq); - err = l2cap_reassemble_sdu(chan, skb, &bt_cb(skb)->control); + err = l2cap_reassemble_sdu(chan, skb, &bt_cb(skb)->l2cap); if (err) break; } @@ -5945,7 +5952,7 @@ static void l2cap_handle_srej(struct l2cap_chan *chan, return; } - if (chan->max_tx != 0 && bt_cb(skb)->control.retries >= chan->max_tx) { + if (chan->max_tx != 0 && bt_cb(skb)->l2cap.retries >= chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); return; @@ -5998,7 +6005,7 @@ static void l2cap_handle_rej(struct l2cap_chan *chan, skb = l2cap_ertm_seq_in_queue(&chan->tx_q, control->reqseq); if (chan->max_tx && skb && - bt_cb(skb)->control.retries >= chan->max_tx) { + bt_cb(skb)->l2cap.retries >= chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); return; @@ -6558,7 +6565,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { - struct l2cap_ctrl *control = &bt_cb(skb)->control; + struct l2cap_ctrl *control = &bt_cb(skb)->l2cap; u16 len; u8 event; @@ -6785,6 +6792,13 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, BT_DBG("chan %p, len %d", chan, skb->len); + /* If we receive data on a fixed channel before the info req/rsp + * procdure is done simply assume that the channel is supported + * and mark it as ready. + */ + if (chan->chan_type == L2CAP_CHAN_FIXED) + l2cap_chan_ready(chan); + if (chan->state != BT_CONNECTED) goto drop; @@ -6850,8 +6864,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, goto drop; /* Store remote BD_ADDR and PSM for msg_name */ - bacpy(&bt_cb(skb)->bdaddr, &hcon->dst); - bt_cb(skb)->psm = psm; + bacpy(&bt_cb(skb)->l2cap.bdaddr, &hcon->dst); + bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { l2cap_chan_put(chan); @@ -6973,12 +6987,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS; if (hcon->type == ACL_LINK && - test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags)) + hci_dev_test_flag(hcon->hdev, HCI_HS_ENABLED)) conn->local_fixed_chan |= L2CAP_FC_A2MP; - if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) && + if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) && (bredr_sc_enabled(hcon->hdev) || - test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags))) + hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); @@ -7098,7 +7112,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, else dst_type = ADDR_LE_DEV_RANDOM; - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) role = HCI_ROLE_SLAVE; else role = HCI_ROLE_MASTER; @@ -7238,13 +7252,16 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; struct l2cap_conn *conn; struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7307,8 +7324,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon) return conn->disc_reason; } -void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7331,13 +7351,13 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) } } -int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) +static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan; if (!conn) - return 0; + return; BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); @@ -7420,8 +7440,6 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } mutex_unlock(&conn->chan_lock); - - return 0; } int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) @@ -7529,6 +7547,13 @@ drop: return 0; } +static struct hci_cb l2cap_cb = { + .name = "L2CAP", + .connect_cfm = l2cap_connect_cfm, + .disconn_cfm = l2cap_disconn_cfm, + .security_cfm = l2cap_security_cfm, +}; + static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct l2cap_chan *c; @@ -7570,6 +7595,8 @@ int __init l2cap_init(void) if (err < 0) return err; + hci_register_cb(&l2cap_cb); + if (IS_ERR_OR_NULL(bt_debugfs)) return 0; @@ -7587,6 +7614,7 @@ int __init l2cap_init(void) void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); + hci_unregister_cb(&l2cap_cb); l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 60694f0f4c73..a7278f05eafb 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, return err; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); if (sock->type == SOCK_STREAM) - err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_stream_recvmsg(sock, msg, len, flags); else - err = bt_sock_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_recvmsg(sock, msg, len, flags); if (pi->chan->mode != L2CAP_MODE_ERTM) return err; @@ -1330,7 +1330,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, skb->priority = sk->sk_priority; - bt_cb(skb)->chan = chan; + bt_cb(skb)->l2cap.chan = chan; return skb; } @@ -1444,8 +1444,8 @@ static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name, memset(la, 0, sizeof(struct sockaddr_l2)); la->l2_family = AF_BLUETOOTH; - la->l2_psm = bt_cb(skb)->psm; - bacpy(&la->l2_bdaddr, &bt_cb(skb)->bdaddr); + la->l2_psm = bt_cb(skb)->l2cap.psm; + bacpy(&la->l2_bdaddr, &bt_cb(skb)->l2cap.bdaddr); *msg_namelen = sizeof(struct sockaddr_l2); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9ec5390c85eb..7fd87e7135b5 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -29,14 +29,16 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/hci_sock.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "hci_request.h" #include "smp.h" +#include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 8 +#define MGMT_REVISION 9 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -95,6 +97,11 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_ADV_FEATURES, + MGMT_OP_ADD_ADVERTISING, + MGMT_OP_REMOVE_ADVERTISING, }; static const u16 mgmt_events[] = { @@ -127,6 +134,32 @@ static const u16 mgmt_events[] = { MGMT_EV_UNCONF_INDEX_ADDED, MGMT_EV_UNCONF_INDEX_REMOVED, MGMT_EV_NEW_CONFIG_OPTIONS, + MGMT_EV_EXT_INDEX_ADDED, + MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_LOCAL_OOB_DATA_UPDATED, + MGMT_EV_ADVERTISING_ADDED, + MGMT_EV_ADVERTISING_REMOVED, +}; + +static const u16 mgmt_untrusted_commands[] = { + MGMT_OP_READ_INDEX_LIST, + MGMT_OP_READ_INFO, + MGMT_OP_READ_UNCONF_INDEX_LIST, + MGMT_OP_READ_CONFIG_INFO, + MGMT_OP_READ_EXT_INDEX_LIST, +}; + +static const u16 mgmt_untrusted_events[] = { + MGMT_EV_INDEX_ADDED, + MGMT_EV_INDEX_REMOVED, + MGMT_EV_NEW_SETTINGS, + MGMT_EV_CLASS_OF_DEV_CHANGED, + MGMT_EV_LOCAL_NAME_CHANGED, + MGMT_EV_UNCONF_INDEX_ADDED, + MGMT_EV_UNCONF_INDEX_REMOVED, + MGMT_EV_NEW_CONFIG_OPTIONS, + MGMT_EV_EXT_INDEX_ADDED, + MGMT_EV_EXT_INDEX_REMOVED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -134,17 +167,6 @@ static const u16 mgmt_events[] = { #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" -struct pending_cmd { - struct list_head list; - u16 opcode; - int index; - void *param; - size_t param_len; - struct sock *sk; - void *user_data; - int (*cmd_complete)(struct pending_cmd *cmd, u8 status); -}; - /* HCI to MGMT error code conversion table */ static u8 mgmt_status_table[] = { MGMT_STATUS_SUCCESS, @@ -218,98 +240,32 @@ static u8 mgmt_status(u8 hci_status) return MGMT_STATUS_FAILED; } -static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len, - struct sock *skip_sk) +static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, int flag) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - - skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(event); - if (hdev) - hdr->index = cpu_to_le16(hdev->id); - else - hdr->index = cpu_to_le16(MGMT_INDEX_NONE); - hdr->len = cpu_to_le16(data_len); - - if (data) - memcpy(skb_put(skb, data_len), data, data_len); - - /* Time stamp */ - __net_timestamp(skb); - - hci_send_to_control(skb, skip_sk); - kfree_skb(skb); - - return 0; + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + flag, NULL); } -static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) +static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, int flag, struct sock *skip_sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_status *ev; - int err; - - BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); - hdr->index = cpu_to_le16(index); - hdr->len = cpu_to_le16(sizeof(*ev)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - ev->status = status; - ev->opcode = cpu_to_le16(cmd); - - err = sock_queue_rcv_skb(sk, skb); - if (err < 0) - kfree_skb(skb); - - return err; + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + flag, skip_sk); } -static int cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, - void *rp, size_t rp_len) +static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, + u16 len, struct sock *skip_sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - int err; - - BT_DBG("sock %p", sk); - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->index = cpu_to_le16(index); - hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); - - ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); - ev->opcode = cpu_to_le16(cmd); - ev->status = status; - - if (rp) - memcpy(ev->data, rp, rp_len); - - err = sock_queue_rcv_skb(sk, skb); - if (err < 0) - kfree_skb(skb); + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + HCI_MGMT_GENERIC_EVENTS, skip_sk); +} - return err; +static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, + struct sock *skip_sk) +{ + return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, + HCI_SOCK_TRUSTED, skip_sk); } static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, @@ -322,22 +278,28 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, rp.version = MGMT_VERSION; rp.revision = cpu_to_le16(MGMT_REVISION); - return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, + &rp, sizeof(rp)); } static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_commands *rp; - const u16 num_commands = ARRAY_SIZE(mgmt_commands); - const u16 num_events = ARRAY_SIZE(mgmt_events); - __le16 *opcode; + u16 num_commands, num_events; size_t rp_size; int i, err; BT_DBG("sock %p", sk); + if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { + num_commands = ARRAY_SIZE(mgmt_commands); + num_events = ARRAY_SIZE(mgmt_events); + } else { + num_commands = ARRAY_SIZE(mgmt_untrusted_commands); + num_events = ARRAY_SIZE(mgmt_untrusted_events); + } + rp_size = sizeof(*rp) + ((num_commands + num_events) * sizeof(u16)); rp = kmalloc(rp_size, GFP_KERNEL); @@ -347,14 +309,26 @@ static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, rp->num_commands = cpu_to_le16(num_commands); rp->num_events = cpu_to_le16(num_events); - for (i = 0, opcode = rp->opcodes; i < num_commands; i++, opcode++) - put_unaligned_le16(mgmt_commands[i], opcode); + if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { + __le16 *opcode = rp->opcodes; + + for (i = 0; i < num_commands; i++, opcode++) + put_unaligned_le16(mgmt_commands[i], opcode); + + for (i = 0; i < num_events; i++, opcode++) + put_unaligned_le16(mgmt_events[i], opcode); + } else { + __le16 *opcode = rp->opcodes; + + for (i = 0; i < num_commands; i++, opcode++) + put_unaligned_le16(mgmt_untrusted_commands[i], opcode); - for (i = 0; i < num_events; i++, opcode++) - put_unaligned_le16(mgmt_events[i], opcode); + for (i = 0; i < num_events; i++, opcode++) + put_unaligned_le16(mgmt_untrusted_events[i], opcode); + } - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_COMMANDS, 0, rp, - rp_size); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_COMMANDS, 0, + rp, rp_size); kfree(rp); return err; @@ -376,7 +350,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (d->dev_type == HCI_BREDR && - !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) + !hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -389,9 +363,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (test_bit(HCI_SETUP, &d->dev_flags) || - test_bit(HCI_CONFIG, &d->dev_flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured @@ -401,7 +375,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, continue; if (d->dev_type == HCI_BREDR && - !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { + !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -412,8 +386,8 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, 0, rp, - rp_len); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, + 0, rp, rp_len); kfree(rp); @@ -436,7 +410,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (d->dev_type == HCI_BREDR && - test_bit(HCI_UNCONFIGURED, &d->dev_flags)) + hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -449,9 +423,9 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (test_bit(HCI_SETUP, &d->dev_flags) || - test_bit(HCI_CONFIG, &d->dev_flags) || - test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured @@ -461,7 +435,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, continue; if (d->dev_type == HCI_BREDR && - test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { + hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -472,8 +446,84 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_UNCONF_INDEX_LIST, - 0, rp, rp_len); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_READ_UNCONF_INDEX_LIST, 0, rp, rp_len); + + kfree(rp); + + return err; +} + +static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_index_list *rp; + struct hci_dev *d; + size_t rp_len; + u16 count; + int err; + + BT_DBG("sock %p", sk); + + read_lock(&hci_dev_list_lock); + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP) + count++; + } + + rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + read_unlock(&hci_dev_list_lock); + return -ENOMEM; + } + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (hci_dev_test_flag(d, HCI_SETUP) || + hci_dev_test_flag(d, HCI_CONFIG) || + hci_dev_test_flag(d, HCI_USER_CHANNEL)) + continue; + + /* Devices marked as raw-only are neither configured + * nor unconfigured controllers. + */ + if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + continue; + + if (d->dev_type == HCI_BREDR) { + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) + rp->entry[count].type = 0x01; + else + rp->entry[count].type = 0x00; + } else if (d->dev_type == HCI_AMP) { + rp->entry[count].type = 0x02; + } else { + continue; + } + + rp->entry[count].bus = d->bus; + rp->entry[count++].index = cpu_to_le16(d->id); + BT_DBG("Added hci%u", d->id); + } + + rp->num_controllers = cpu_to_le16(count); + rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count); + + read_unlock(&hci_dev_list_lock); + + /* If this command is called at least once, then all the + * default index and unconfigured index events are disabled + * and from now on only extended index events are used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INDEX_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_INDEX_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); + + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_READ_EXT_INDEX_LIST, 0, rp, rp_len); kfree(rp); @@ -483,7 +533,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, static bool is_configured(struct hci_dev *hdev) { if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && - !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) return false; if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && @@ -498,7 +548,7 @@ static __le32 get_missing_options(struct hci_dev *hdev) u32 options = 0; if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && - !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && @@ -512,16 +562,16 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 options = get_missing_options(hdev); - return cmd_complete(sk, hdev->id, opcode, 0, &options, - sizeof(options)); + return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &options, + sizeof(options)); } static int read_config_info(struct sock *sk, struct hci_dev *hdev, @@ -548,8 +598,8 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, + &rp, sizeof(rp)); } static u32 get_supported_settings(struct hci_dev *hdev) @@ -582,6 +632,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_ADVERTISING; settings |= MGMT_SETTING_SECURE_CONN; settings |= MGMT_SETTING_PRIVACY; + settings |= MGMT_SETTING_STATIC_ADDRESS; } if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || @@ -598,45 +649,64 @@ static u32 get_current_settings(struct hci_dev *hdev) if (hdev_is_powered(hdev)) settings |= MGMT_SETTING_POWERED; - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE)) settings |= MGMT_SETTING_CONNECTABLE; - if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) settings |= MGMT_SETTING_FAST_CONNECTABLE; - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) settings |= MGMT_SETTING_DISCOVERABLE; - if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) settings |= MGMT_SETTING_BONDABLE; - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) settings |= MGMT_SETTING_BREDR; - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) settings |= MGMT_SETTING_LE; - if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) settings |= MGMT_SETTING_LINK_SECURITY; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) settings |= MGMT_SETTING_SSP; - if (test_bit(HCI_HS_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_HS_ENABLED)) settings |= MGMT_SETTING_HS; - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) settings |= MGMT_SETTING_ADVERTISING; - if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) settings |= MGMT_SETTING_DEBUG_KEYS; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) settings |= MGMT_SETTING_PRIVACY; + /* The current setting for static address has two purposes. The + * first is to indicate if the static address will be used and + * the second is to indicate if it is actually set. + * + * This means if the static address is not configured, this flag + * will never be set. If the address is configured, then if the + * address is actually used decides if the flag is set or not. + * + * For single mode LE only controllers and dual-mode controllers + * with BR/EDR disabled, the existence of the static address will + * be evaluated. + */ + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || + !bacmp(&hdev->bdaddr, BDADDR_ANY)) { + if (bacmp(&hdev->static_addr, BDADDR_ANY)) + settings |= MGMT_SETTING_STATIC_ADDRESS; + } + return settings; } @@ -750,35 +820,19 @@ static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) return ptr; } -static struct pending_cmd *mgmt_pending_find(u16 opcode, struct hci_dev *hdev) +static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) { - struct pending_cmd *cmd; - - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { - if (cmd->opcode == opcode) - return cmd; - } - - return NULL; + return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); } -static struct pending_cmd *mgmt_pending_find_data(u16 opcode, +static struct mgmt_pending_cmd *pending_find_data(u16 opcode, struct hci_dev *hdev, const void *data) { - struct pending_cmd *cmd; - - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { - if (cmd->user_data != data) - continue; - if (cmd->opcode == opcode) - return cmd; - } - - return NULL; + return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); } -static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 ad_len = 0; size_t name_len; @@ -804,21 +858,36 @@ static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } -static void update_scan_rsp_data(struct hci_request *req) +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + /* TODO: Set the appropriate entries based on advertising instance flags + * here once flags other than 0 are supported. + */ + memcpy(ptr, hdev->adv_instance.scan_rsp_data, + hdev->adv_instance.scan_rsp_len); + + return hdev->adv_instance.scan_rsp_len; +} + +static void update_scan_rsp_data_for_instance(struct hci_request *req, + u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_rsp_data cp; u8 len; - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; memset(&cp, 0, sizeof(cp)); - len = create_scan_rsp_data(hdev, cp.data); + if (instance) + len = create_instance_scan_rsp_data(hdev, cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); if (hdev->scan_rsp_data_len == len && - memcmp(cp.data, hdev->scan_rsp_data, len) == 0) + !memcmp(cp.data, hdev->scan_rsp_data, len)) return; memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); @@ -829,14 +898,33 @@ static void update_scan_rsp_data(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); } +static void update_scan_rsp_data(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 instance; + + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the scan response data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + instance = 0x01; + else + instance = 0x00; + + update_scan_rsp_data_for_instance(req, instance); +} + static u8 get_adv_discov_flags(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; /* If there's a pending mgmt command the flags will not yet have * their final values, so check for this first. */ - cmd = mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); + cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); if (cmd) { struct mgmt_mode *cp = cmd->param; if (cp->val == 0x01) @@ -844,39 +932,131 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev) else if (cp->val == 0x02) return LE_AD_LIMITED; } else { - if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) return LE_AD_LIMITED; - else if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) return LE_AD_GENERAL; } return 0; } -static u8 create_adv_data(struct hci_dev *hdev, u8 *ptr) +static u8 get_current_adv_instance(struct hci_dev *hdev) +{ + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the advertising data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return 0x01; + + return 0x00; +} + +static bool get_connectable(struct hci_dev *hdev) +{ + struct mgmt_pending_cmd *cmd; + + /* If there's a pending mgmt command the flag will not yet have + * it's final value, so check for this first. + */ + cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + if (cmd) { + struct mgmt_mode *cp = cmd->param; + + return cp->val; + } + + return hci_dev_test_flag(hdev, HCI_CONNECTABLE); +} + +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + + if (instance > 0x01) + return 0; + + if (instance == 0x01) + return hdev->adv_instance.flags; + + /* Instance 0 always manages the "Tx Power" and "Flags" fields */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting corresponds + * to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + return flags; +} + +static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) +{ + /* Ignore instance 0 and other unsupported instances */ + if (instance != 0x01) + return 0; + + /* TODO: Take into account the "appearance" and "local-name" flags here. + * These are currently being ignored as they are not supported. + */ + return hdev->adv_instance.scan_rsp_len; +} + +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { u8 ad_len = 0, flags = 0; + u32 instance_flags = get_adv_instance_flags(hdev, instance); - flags |= get_adv_discov_flags(hdev); + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - flags |= LE_AD_NO_BREDR; + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; - if (flags) { - BT_DBG("adv flags 0x%02x", flags); + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= get_adv_discov_flags(hdev); - ptr[0] = 2; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; - ad_len += 3; - ptr += 3; + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } } - if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 2; + if (instance) { + memcpy(ptr, hdev->adv_instance.adv_data, + hdev->adv_instance.adv_data_len); + + ad_len += hdev->adv_instance.adv_data_len; + ptr += hdev->adv_instance.adv_data_len; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && + (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { + ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; - ptr[2] = (u8) hdev->adv_tx_power; + ptr[2] = (u8)hdev->adv_tx_power; ad_len += 3; ptr += 3; @@ -885,19 +1065,20 @@ static u8 create_adv_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } -static void update_adv_data(struct hci_request *req) +static void update_adv_data_for_instance(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; u8 len; - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; memset(&cp, 0, sizeof(cp)); - len = create_adv_data(hdev, cp.data); + len = create_instance_adv_data(hdev, instance, cp.data); + /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && memcmp(cp.data, hdev->adv_data, len) == 0) return; @@ -910,6 +1091,14 @@ static void update_adv_data(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } +static void update_adv_data(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 instance = get_current_adv_instance(hdev); + + update_adv_data_for_instance(req, instance); +} + int mgmt_update_adv_data(struct hci_dev *hdev) { struct hci_request req; @@ -979,10 +1168,10 @@ static void update_eir(struct hci_request *req) if (!lmp_ext_inq_capable(hdev)) return; - if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return; - if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return; memset(&cp, 0, sizeof(cp)); @@ -1018,17 +1207,17 @@ static void update_class(struct hci_request *req) if (!hdev_is_powered(hdev)) return; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; - if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return; cod[0] = hdev->minor_class; cod[1] = hdev->major_class; cod[2] = get_service_classes(hdev); - if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) cod[1] |= 0x20; if (memcmp(cod, hdev->dev_class, 3) == 0) @@ -1037,22 +1226,6 @@ static void update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static bool get_connectable(struct hci_dev *hdev) -{ - struct pending_cmd *cmd; - - /* If there's a pending mgmt command the flag will not yet have - * it's final value, so check for this first. - */ - cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); - if (cmd) { - struct mgmt_mode *cp = cmd->param; - return cp->val; - } - - return test_bit(HCI_CONNECTABLE, &hdev->dev_flags); -} - static void disable_advertising(struct hci_request *req) { u8 enable = 0x00; @@ -1066,11 +1239,13 @@ static void enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; + u8 instance; + u32 flags; if (hci_conn_num(hdev, LE_LINK) > 0) return; - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(req); /* Clear the HCI_LE_ADV bit temporarily so that the @@ -1078,9 +1253,16 @@ static void enable_advertising(struct hci_request *req) * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ - clear_bit(HCI_LE_ADV, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LE_ADV); + + instance = get_current_adv_instance(hdev); + flags = get_adv_instance_flags(hdev, instance); - connectable = get_connectable(hdev); + /* If the "connectable" instance flag was not set, then choose between + * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. + */ + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || + get_connectable(hdev); /* Set require_privacy to true only when non-connectable * advertising is used. In that case it is fine to use a @@ -1092,7 +1274,14 @@ static void enable_advertising(struct hci_request *req) memset(&cp, 0, sizeof(cp)); cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - cp.type = connectable ? LE_ADV_IND : LE_ADV_NONCONN_IND; + + if (connectable) + cp.type = LE_ADV_IND; + else if (get_adv_instance_scan_rsp_len(hdev, instance)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + cp.own_address_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; @@ -1107,7 +1296,7 @@ static void service_cache_off(struct work_struct *work) service_cache.work); struct hci_request req; - if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) + if (!hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) return; hci_req_init(&req, hdev); @@ -1130,9 +1319,9 @@ static void rpa_expired(struct work_struct *work) BT_DBG(""); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; /* The generation of a new RPA and programming it into the @@ -1145,7 +1334,7 @@ static void rpa_expired(struct work_struct *work) static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { - if (test_and_set_bit(HCI_MGMT, &hdev->dev_flags)) + if (hci_dev_test_and_set_flag(hdev, HCI_MGMT)) return; INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); @@ -1156,7 +1345,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) * for mgmt we require user-space to explicitly enable * it */ - clear_bit(HCI_BONDABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BONDABLE); } static int read_controller_info(struct sock *sk, struct hci_dev *hdev, @@ -1185,73 +1374,16 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_READ_INFO, 0, &rp, - sizeof(rp)); -} - -static void mgmt_pending_free(struct pending_cmd *cmd) -{ - sock_put(cmd->sk); - kfree(cmd->param); - kfree(cmd); -} - -static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, - struct hci_dev *hdev, void *data, - u16 len) -{ - struct pending_cmd *cmd; - - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); - if (!cmd) - return NULL; - - cmd->opcode = opcode; - cmd->index = hdev->id; - - cmd->param = kmemdup(data, len, GFP_KERNEL); - if (!cmd->param) { - kfree(cmd); - return NULL; - } - - cmd->param_len = len; - - cmd->sk = sk; - sock_hold(sk); - - list_add(&cmd->list, &hdev->mgmt_pending); - - return cmd; -} - -static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, - void (*cb)(struct pending_cmd *cmd, - void *data), - void *data) -{ - struct pending_cmd *cmd, *tmp; - - list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { - if (opcode > 0 && cmd->opcode != opcode) - continue; - - cb(cmd, data); - } -} - -static void mgmt_pending_remove(struct pending_cmd *cmd) -{ - list_del(&cmd->list); - mgmt_pending_free(cmd); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_INFO, 0, &rp, + sizeof(rp)); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); - return cmd_complete(sk, hdev->id, opcode, 0, &settings, - sizeof(settings)); + return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &settings, + sizeof(settings)); } static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) @@ -1272,9 +1404,10 @@ static bool hci_stop_discovery(struct hci_request *req) switch (hdev->discovery.state) { case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) { + if (test_bit(HCI_INQUIRY, &hdev->flags)) hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - } else { + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); hci_req_add_le_scan_disable(req); } @@ -1295,7 +1428,7 @@ static bool hci_stop_discovery(struct hci_request *req) default: /* Passive scanning */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_req_add_le_scan_disable(req); return true; } @@ -1306,6 +1439,49 @@ static bool hci_stop_discovery(struct hci_request *req) return false; } +static void advertising_added(struct sock *sk, struct hci_dev *hdev, + u8 instance) +{ + struct mgmt_ev_advertising_added ev; + + ev.instance = instance; + + mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); +} + +static void advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance) +{ + struct mgmt_ev_advertising_removed ev; + + ev.instance = instance; + + mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); +} + +static void clear_adv_instance(struct hci_dev *hdev) +{ + struct hci_request req; + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return; + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + advertising_removed(NULL, hdev, 1); + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return; + + hci_req_init(&req, hdev); + disable_advertising(&req); + hci_req_run(&req, NULL); +} + static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; @@ -1321,7 +1497,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hdev->adv_instance.timeout) + clear_adv_instance(hdev); + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); discov_stopped = hci_stop_discovery(&req); @@ -1369,24 +1548,24 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_POWERED, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_BUSY); goto failed; } - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { cancel_delayed_work(&hdev->power_off); if (cp->val) { @@ -1433,11 +1612,10 @@ failed: static int new_settings(struct hci_dev *hdev, struct sock *skip) { - __le32 ev; - - ev = cpu_to_le32(get_current_settings(hdev)); + __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip); + return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -1451,7 +1629,7 @@ struct cmd_lookup { u8 mgmt_status; }; -static void settings_rsp(struct pending_cmd *cmd, void *data) +static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -1467,15 +1645,15 @@ static void settings_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_free(cmd); } -static void cmd_status_rsp(struct pending_cmd *cmd, void *data) +static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); mgmt_pending_remove(cmd); } -static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) +static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) { if (cmd->cmd_complete) { u8 *status = data; @@ -1489,23 +1667,23 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) cmd_status_rsp(cmd, data); } -static int generic_cmd_complete(struct pending_cmd *cmd, u8 status) +static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, - cmd->param, cmd->param_len); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, cmd->param_len); } -static int addr_cmd_complete(struct pending_cmd *cmd, u8 status) +static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - sizeof(struct mgmt_addr_info)); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) { if (!lmp_bredr_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; - else if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + else if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; @@ -1515,7 +1693,7 @@ static u8 mgmt_le_support(struct hci_dev *hdev) { if (!lmp_le_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; - else if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + else if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; @@ -1524,7 +1702,7 @@ static u8 mgmt_le_support(struct hci_dev *hdev) static void set_discoverable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; struct hci_request req; bool changed; @@ -1533,21 +1711,20 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); + cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); if (!cmd) goto unlock; if (status) { u8 mgmt_err = mgmt_status(status); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto remove_cmd; } cp = cmd->param; if (cp->val) { - changed = !test_and_set_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); if (hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); @@ -1555,8 +1732,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, to); } } else { - changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); @@ -1585,7 +1761,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_discoverable *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u16 timeout; u8 scan; @@ -1593,14 +1769,14 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); timeout = __le16_to_cpu(cp->timeout); @@ -1609,27 +1785,27 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, */ if ((cp->val == 0x00 && timeout > 0) || (cp->val == 0x02 && timeout == 0)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) && timeout > 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_NOT_POWERED); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); goto failed; } - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_CONNECTABLE)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_REJECTED); goto failed; } @@ -1640,8 +1816,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, * not a valid operation since it requires a timeout * and so no need to check HCI_LIMITED_DISCOVERABLE. */ - if (!!cp->val != test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) { - change_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + if (!!cp->val != hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) { + hci_dev_change_flag(hdev, HCI_DISCOVERABLE); changed = true; } @@ -1659,9 +1835,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, * value with the new value. And if only the timeout gets updated, * then no need for any HCI transactions. */ - if (!!cp->val == test_bit(HCI_DISCOVERABLE, &hdev->dev_flags) && - (cp->val == 0x02) == test_bit(HCI_LIMITED_DISCOVERABLE, - &hdev->dev_flags)) { + if (!!cp->val == hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, + HCI_LIMITED_DISCOVERABLE)) { cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; @@ -1690,16 +1866,16 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, /* Limited discoverable mode */ if (cp->val == 0x02) - set_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_req_init(&req, hdev); /* The procedure for LE-only controllers is much simpler - just * update the advertising data. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) goto update_ad; scan = SCAN_PAGE; @@ -1729,7 +1905,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, scan |= SCAN_INQUIRY; } else { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); } hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); @@ -1752,7 +1928,7 @@ static void write_fast_connectable(struct hci_request *req, bool enable) struct hci_cp_write_page_scan_activity acp; u8 type; - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return; if (hdev->hci_ver < BLUETOOTH_VER_1_2) @@ -1784,7 +1960,7 @@ static void write_fast_connectable(struct hci_request *req, bool enable) static void set_connectable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; bool conn_changed, discov_changed; @@ -1792,26 +1968,26 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); if (!cmd) goto unlock; if (status) { u8 mgmt_err = mgmt_status(status); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); goto remove_cmd; } cp = cmd->param; if (cp->val) { - conn_changed = !test_and_set_bit(HCI_CONNECTABLE, - &hdev->dev_flags); + conn_changed = !hci_dev_test_and_set_flag(hdev, + HCI_CONNECTABLE); discov_changed = false; } else { - conn_changed = test_and_clear_bit(HCI_CONNECTABLE, - &hdev->dev_flags); - discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, - &hdev->dev_flags); + conn_changed = hci_dev_test_and_clear_flag(hdev, + HCI_CONNECTABLE); + discov_changed = hci_dev_test_and_clear_flag(hdev, + HCI_DISCOVERABLE); } send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); @@ -1837,14 +2013,14 @@ static int set_connectable_update_settings(struct hci_dev *hdev, bool changed = false; int err; - if (!!val != test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (!!val != hci_dev_test_flag(hdev, HCI_CONNECTABLE)) changed = true; if (val) { - set_bit(HCI_CONNECTABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_CONNECTABLE); } else { - clear_bit(HCI_CONNECTABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_CONNECTABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_CONNECTABLE, hdev); @@ -1864,21 +2040,21 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 scan; int err; BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -1887,10 +2063,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_BUSY); goto failed; } @@ -1906,10 +2082,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, * by-product of disabling connectable, we need to update the * advertising flags. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { if (!cp->val) { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } update_adv_data(&req); } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { @@ -1938,17 +2114,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, } no_scan_update: - /* If we're going from non-connectable to connectable or - * vice-versa when fast connectable is enabled ensure that fast - * connectable gets disabled. write_fast_connectable won't do - * anything if the page scan parameters are already what they - * should be. - */ - if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) - write_fast_connectable(&req, false); - /* Update the advertising parameters if necessary */ - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) enable_advertising(&req); err = hci_req_run(&req, set_connectable_complete); @@ -1975,15 +2143,15 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_BONDABLE); else - changed = test_and_clear_bit(HCI_BONDABLE, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_BONDABLE); err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev); if (err < 0) @@ -2001,7 +2169,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 val, status; int err; @@ -2009,21 +2177,20 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + status); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { bool changed = false; - if (!!cp->val != test_bit(HCI_LINK_SECURITY, - &hdev->dev_flags)) { - change_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (!!cp->val != hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { + hci_dev_change_flag(hdev, HCI_LINK_SECURITY); changed = true; } @@ -2037,9 +2204,9 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_LINK_SECURITY, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_LINK_SECURITY, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_BUSY); goto failed; } @@ -2070,7 +2237,7 @@ failed: static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status; int err; @@ -2078,15 +2245,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); if (!lmp_ssp_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -2094,16 +2261,16 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) bool changed; if (cp->val) { - changed = !test_and_set_bit(HCI_SSP_ENABLED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, + HCI_SSP_ENABLED); } else { - changed = test_and_clear_bit(HCI_SSP_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_SSP_ENABLED); if (!changed) - changed = test_and_clear_bit(HCI_HS_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_HS_ENABLED); else - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); @@ -2116,14 +2283,13 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_SSP, hdev) || - mgmt_pending_find(MGMT_OP_SET_HS, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_SSP, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_BUSY); goto failed; } - if (!!cp->val == test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (!!cp->val == hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); goto failed; } @@ -2134,7 +2300,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - if (!cp->val && test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + if (!cp->val && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(cp->val), &cp->val); @@ -2160,32 +2326,38 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) status = mgmt_bredr_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); if (!lmp_ssp_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_NOT_SUPPORTED); - if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); + if (pending_find(MGMT_OP_SET_SSP, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_BUSY); + goto unlock; + } + if (cp->val) { - changed = !test_and_set_bit(HCI_HS_ENABLED, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_HS_ENABLED); } else { if (hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_REJECTED); goto unlock; } - changed = test_and_clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_HS_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev); @@ -2226,7 +2398,7 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) * has actually been enabled. During power on, the * update in powered_update_hci will take care of it. */ - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { struct hci_request req; hci_req_init(&req, hdev); @@ -2244,7 +2416,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct hci_cp_write_le_host_supported hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; u8 val, enabled; @@ -2252,17 +2424,29 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_INVALID_PARAMS); + + /* Bluetooth single mode LE only controllers or dual-mode + * controllers configured as LE only devices, do not allow + * switching LE off. These have either LE enabled explicitly + * or BR/EDR has been previously switched off. + * + * When trying to enable an already enabled LE, then gracefully + * send a positive response. Trying to disable it however will + * result into rejection. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + if (cp->val == 0x01) + return send_settings_rsp(sk, MGMT_OP_SET_LE, hdev); - /* LE-only devices do not allow toggling LE on/off */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_REJECTED); + } hci_dev_lock(hdev); @@ -2272,13 +2456,13 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; - if (val != test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { - change_bit(HCI_LE_ENABLED, &hdev->dev_flags); + if (val != hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + hci_dev_change_flag(hdev, HCI_LE_ENABLED); changed = true; } - if (!val && test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + if (!val && hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + hci_dev_clear_flag(hdev, HCI_ADVERTISING); changed = true; } @@ -2292,10 +2476,10 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - if (mgmt_pending_find(MGMT_OP_SET_LE, hdev) || - mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_LE, hdev) || + pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_BUSY); goto unlock; } @@ -2313,7 +2497,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.le = val; hci_cp.simul = 0x00; } else { - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); } @@ -2337,7 +2521,7 @@ unlock: */ static bool pending_eir_or_class(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { switch (cmd->opcode) { @@ -2373,16 +2557,16 @@ static u8 get_uuid_size(const u8 *uuid) static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; hci_dev_lock(hdev); - cmd = mgmt_pending_find(mgmt_op, hdev); + cmd = pending_find(mgmt_op, hdev); if (!cmd) goto unlock; - cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), - hdev->dev_class, 3); + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), hdev->dev_class, 3); mgmt_pending_remove(cmd); @@ -2400,7 +2584,7 @@ static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; struct bt_uuid *uuid; int err; @@ -2410,8 +2594,8 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, + MGMT_STATUS_BUSY); goto failed; } @@ -2437,8 +2621,8 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) if (err != -ENODATA) goto failed; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, + hdev->dev_class, 3); goto failed; } @@ -2460,7 +2644,7 @@ static bool enable_service_cache(struct hci_dev *hdev) if (!hdev_is_powered(hdev)) return false; - if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { + if (!hci_dev_test_and_set_flag(hdev, HCI_SERVICE_CACHE)) { queue_delayed_work(hdev->workqueue, &hdev->service_cache, CACHE_TIMEOUT); return true; @@ -2480,7 +2664,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_uuid *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; struct hci_request req; @@ -2491,8 +2675,8 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_BUSY); goto unlock; } @@ -2500,8 +2684,9 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, hci_uuids_clear(hdev); if (enable_service_cache(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, - 0, hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_UUID, + 0, hdev->dev_class, 3); goto unlock; } @@ -2520,8 +2705,8 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, } if (found == 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } @@ -2536,8 +2721,8 @@ update_class: if (err != -ENODATA) goto unlock; - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, + hdev->dev_class, 3); goto unlock; } @@ -2565,27 +2750,27 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_dev_class *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); goto unlock; } if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } @@ -2593,14 +2778,14 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, hdev->minor_class = cp->minor; if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, + hdev->dev_class, 3); goto unlock; } hci_req_init(&req, hdev); - if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) { hci_dev_unlock(hdev); cancel_delayed_work_sync(&hdev->service_cache); hci_dev_lock(hdev); @@ -2614,8 +2799,8 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, if (err != -ENODATA) goto unlock; - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, - hdev->dev_class, 3); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, + hdev->dev_class, 3); goto unlock; } @@ -2645,15 +2830,15 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { BT_ERR("load_link_keys: too big key_count value %u", key_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + key_count * @@ -2661,13 +2846,13 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, if (expected_len != len) { BT_ERR("load_link_keys: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, key_count); @@ -2676,8 +2861,9 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_link_key_info *key = &cp->keys[i]; if (key->addr.type != BDADDR_BREDR || key->type > 0x08) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -2685,11 +2871,10 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); if (cp->debug_keys) - changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else - changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_KEEP_DEBUG_KEYS); if (changed) new_settings(hdev, NULL); @@ -2707,7 +2892,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, key->type, key->pin_len, NULL); } - cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -2732,7 +2917,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_cp_unpair_device *cp = data; struct mgmt_rp_unpair_device rp; struct hci_cp_disconnect dc; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -2741,20 +2926,21 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); if (cp->disconnect != 0x00 && cp->disconnect != 0x01) - return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -2804,8 +2990,9 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, } if (err < 0) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, - MGMT_STATUS_NOT_PAIRED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_NOT_PAIRED, &rp, + sizeof(rp)); goto unlock; } @@ -2813,8 +3000,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, * link is requested. */ if (!conn) { - err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 0, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 0, + &rp, sizeof(rp)); device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, sk); goto unlock; } @@ -2844,7 +3031,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_disconnect *cp = data; struct mgmt_rp_disconnect rp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -2855,21 +3042,22 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto failed; } - if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + if (pending_find(MGMT_OP_DISCONNECT, hdev)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto failed; } @@ -2880,8 +3068,9 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED, &rp, + sizeof(rp)); goto failed; } @@ -2935,8 +3124,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, + MGMT_STATUS_NOT_POWERED); goto unlock; } @@ -2969,8 +3158,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, /* Recalculate length in case of filtered SCO connections, etc */ rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, - rp_len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, + rp_len); kfree(rp); @@ -2982,7 +3171,7 @@ unlock: static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_pin_code_neg_reply *cp) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, hdev, cp, @@ -3004,7 +3193,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; struct mgmt_cp_pin_code_reply *cp = data; struct hci_cp_pin_code_reply reply; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; int err; BT_DBG(""); @@ -3012,15 +3201,15 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_POWERED); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn) { - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_NOT_CONNECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_CONNECTED); goto failed; } @@ -3033,8 +3222,8 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, err = send_pin_code_neg_reply(sk, hdev, &ncp); if (err >= 0) - err = cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_INVALID_PARAMS); goto failed; } @@ -3068,8 +3257,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS, NULL, 0); hci_dev_lock(hdev); @@ -3080,14 +3269,14 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, 0, NULL, - 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, 0, + NULL, 0); } -static struct pending_cmd *find_pairing(struct hci_conn *conn) +static struct mgmt_pending_cmd *find_pairing(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (cmd->opcode != MGMT_OP_PAIR_DEVICE) @@ -3102,7 +3291,7 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn) return NULL; } -static int pairing_complete(struct pending_cmd *cmd, u8 status) +static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; @@ -3111,8 +3300,8 @@ static int pairing_complete(struct pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -3134,7 +3323,7 @@ static int pairing_complete(struct pending_cmd *cmd, u8 status) void mgmt_smp_complete(struct hci_conn *conn, bool complete) { u8 status = complete ? MGMT_STATUS_SUCCESS : MGMT_STATUS_FAILED; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; cmd = find_pairing(conn); if (cmd) { @@ -3145,7 +3334,7 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete) static void pairing_complete_cb(struct hci_conn *conn, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); @@ -3161,7 +3350,7 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status) static void le_pairing_complete_cb(struct hci_conn *conn, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); @@ -3183,7 +3372,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_pair_device *cp = data; struct mgmt_rp_pair_device rp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 sec_level, auth_type; struct hci_conn *conn; int err; @@ -3195,20 +3384,28 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); if (cp->io_cap > SMP_IO_KEYBOARD_DISPLAY) - return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); + goto unlock; + } + + if (hci_bdaddr_is_paired(hdev, &cp->addr.bdaddr, cp->addr.type)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_ALREADY_PAIRED, &rp, + sizeof(rp)); goto unlock; } @@ -3249,19 +3446,22 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, if (PTR_ERR(conn) == -EBUSY) status = MGMT_STATUS_BUSY; + else if (PTR_ERR(conn) == -EOPNOTSUPP) + status = MGMT_STATUS_NOT_SUPPORTED; + else if (PTR_ERR(conn) == -ECONNREFUSED) + status = MGMT_STATUS_REJECTED; else status = MGMT_STATUS_CONNECT_FAILED; - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - status, &rp, - sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + status, &rp, sizeof(rp)); goto unlock; } if (conn->connect_cfm_cb) { hci_conn_drop(conn); - err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; } @@ -3305,7 +3505,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_addr_info *addr = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; @@ -3314,31 +3514,31 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED); goto unlock; } - cmd = mgmt_pending_find(MGMT_OP_PAIR_DEVICE, hdev); + cmd = pending_find(MGMT_OP_PAIR_DEVICE, hdev); if (!cmd) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } conn = cmd->user_data; if (bacmp(&addr->bdaddr, &conn->dst) != 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED); mgmt_pending_remove(cmd); - err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, - addr, sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, + addr, sizeof(*addr)); unlock: hci_dev_unlock(hdev); return err; @@ -3348,16 +3548,16 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, struct mgmt_addr_info *addr, u16 mgmt_op, u16 hci_op, __le32 passkey) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_POWERED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_POWERED, addr, + sizeof(*addr)); goto done; } @@ -3367,22 +3567,22 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr); if (!conn) { - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_CONNECTED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_CONNECTED, addr, + sizeof(*addr)); goto done; } if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { err = smp_user_confirm_reply(conn, mgmt_op, passkey); if (!err) - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_SUCCESS, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_SUCCESS, addr, + sizeof(*addr)); else - err = cmd_complete(sk, hdev->id, mgmt_op, - MGMT_STATUS_FAILED, addr, - sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_FAILED, addr, + sizeof(*addr)); goto done; } @@ -3434,8 +3634,8 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (len != sizeof(*cp)) - return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, + MGMT_STATUS_INVALID_PARAMS); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_REPLY, @@ -3491,24 +3691,24 @@ static void update_name(struct hci_request *req) static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) goto unlock; cp = cmd->param; if (status) - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, + mgmt_status(status)); else - cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - cp, sizeof(*cp)); + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + cp, sizeof(*cp)); mgmt_pending_remove(cmd); @@ -3520,7 +3720,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_local_name *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -3534,8 +3734,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) && !memcmp(hdev->short_name, cp->short_name, sizeof(hdev->short_name))) { - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - data, len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + data, len); goto failed; } @@ -3544,13 +3744,13 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (!hdev_is_powered(hdev)) { memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, - data, len); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + data, len); if (err < 0) goto failed; - err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, - sk); + err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, + data, len, sk); goto failed; } @@ -3585,10 +3785,70 @@ failed: return err; } +static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) +{ + struct mgmt_rp_read_local_oob_data mgmt_rp; + size_t rp_size = sizeof(mgmt_rp); + struct mgmt_pending_cmd *cmd; + + BT_DBG("%s status %u", hdev->name, status); + + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); + if (!cmd) + return; + + if (status || !skb) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + status ? mgmt_status(status) : MGMT_STATUS_FAILED); + goto remove; + } + + memset(&mgmt_rp, 0, sizeof(mgmt_rp)); + + if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) { + struct hci_rp_read_local_oob_data *rp = (void *) skb->data; + + if (skb->len < sizeof(*rp)) { + mgmt_cmd_status(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_FAILED); + goto remove; + } + + memcpy(mgmt_rp.hash192, rp->hash, sizeof(rp->hash)); + memcpy(mgmt_rp.rand192, rp->rand, sizeof(rp->rand)); + + rp_size -= sizeof(mgmt_rp.hash256) + sizeof(mgmt_rp.rand256); + } else { + struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data; + + if (skb->len < sizeof(*rp)) { + mgmt_cmd_status(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_FAILED); + goto remove; + } + + memcpy(mgmt_rp.hash192, rp->hash192, sizeof(rp->hash192)); + memcpy(mgmt_rp.rand192, rp->rand192, sizeof(rp->rand192)); + + memcpy(mgmt_rp.hash256, rp->hash256, sizeof(rp->hash256)); + memcpy(mgmt_rp.rand256, rp->rand256, sizeof(rp->rand256)); + } + + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_SUCCESS, &mgmt_rp, rp_size); + +remove: + mgmt_pending_remove(cmd); +} + static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("%s", hdev->name); @@ -3596,20 +3856,20 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_NOT_POWERED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_NOT_POWERED); goto unlock; } if (!lmp_ssp_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_NOT_SUPPORTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_NOT_SUPPORTED); goto unlock; } - if (mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_BUSY); goto unlock; } @@ -3619,12 +3879,14 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } + hci_req_init(&req, hdev); + if (bredr_sc_enabled(hdev)) - err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_EXT_DATA, - 0, NULL); + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL); else - err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + err = hci_req_run_skb(&req, read_local_oob_data_complete); if (err < 0) mgmt_pending_remove(cmd); @@ -3642,9 +3904,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s ", hdev->name); if (!bdaddr_type_is_valid(addr->type)) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, addr, - sizeof(*addr)); + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); hci_dev_lock(hdev); @@ -3653,10 +3916,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, u8 status; if (cp->addr.type != BDADDR_BREDR) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -3668,8 +3931,9 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_SUCCESS; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, status, + &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; u8 *rand192, *hash192, *rand256, *hash256; @@ -3681,10 +3945,10 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, */ if (memcmp(cp->rand192, ZERO_KEY, 16) || memcmp(cp->hash192, ZERO_KEY, 16)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - addr, sizeof(*addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); goto unlock; } @@ -3724,12 +3988,13 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_SUCCESS; - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + status, &cp->addr, sizeof(cp->addr)); } else { BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); - err = cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS); } unlock: @@ -3747,9 +4012,10 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (cp->addr.type != BDADDR_BREDR) - return cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -3766,100 +4032,136 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - status, &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, + status, &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); return err; } -static bool trigger_discovery(struct hci_request *req, u8 *status) +static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status) { struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - struct hci_cp_inquiry inq_cp; + struct hci_cp_inquiry cp; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; + + *status = mgmt_bredr_support(hdev); + if (*status) + return false; + + if (hci_dev_test_flag(hdev, HCI_INQUIRY)) { + *status = MGMT_STATUS_BUSY; + return false; + } + + hci_inquiry_cache_flush(hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_BREDR_INQUIRY_LEN; + + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return true; +} + +static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; u8 own_addr_type; int err; - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - *status = mgmt_bredr_support(hdev); - if (*status) - return false; + *status = mgmt_le_support(hdev); + if (*status) + return false; - if (test_bit(HCI_INQUIRY, &hdev->flags)) { - *status = MGMT_STATUS_BUSY; + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { + /* Don't let discovery abort an outgoing connection attempt + * that's using directed advertising. + */ + if (hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + *status = MGMT_STATUS_REJECTED; return false; } - hci_inquiry_cache_flush(hdev); + disable_advertising(req); + } - memset(&inq_cp, 0, sizeof(inq_cp)); - memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); - inq_cp.length = DISCOV_BREDR_INQUIRY_LEN; - hci_req_add(req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); - break; + /* If controller is scanning, it means the background scanning is + * running. Thus, we should temporarily stop it in order to set the + * discovery scanning parameters. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); - case DISCOV_TYPE_LE: - case DISCOV_TYPE_INTERLEAVED: - *status = mgmt_le_support(hdev); - if (*status) - return false; + /* All active scans will be done with either a resolvable private + * address (when privacy feature has been enabled) or non-resolvable + * private address. + */ + err = hci_update_random_address(req, true, &own_addr_type); + if (err < 0) { + *status = MGMT_STATUS_FAILED; + return false; + } - if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { - *status = MGMT_STATUS_NOT_SUPPORTED; + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(interval); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + param_cp.own_address_type = own_addr_type; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); + + return true; +} + +static bool trigger_discovery(struct hci_request *req, u8 *status) +{ + struct hci_dev *hdev = req->hdev; + + switch (hdev->discovery.type) { + case DISCOV_TYPE_BREDR: + if (!trigger_bredr_inquiry(req, status)) return false; - } + break; - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { - /* Don't let discovery abort an outgoing - * connection attempt that's using directed - * advertising. + case DISCOV_TYPE_INTERLEAVED: + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* During simultaneous discovery, we double LE scan + * interval. We must leave some time for the controller + * to do BR/EDR inquiry. */ - if (hci_conn_hash_lookup_state(hdev, LE_LINK, - BT_CONNECT)) { - *status = MGMT_STATUS_REJECTED; + if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2, + status)) return false; - } - disable_advertising(req); - } - - /* If controller is scanning, it means the background scanning - * is running. Thus, we should temporarily stop it in order to - * set the discovery scanning parameters. - */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - hci_req_add_le_scan_disable(req); + if (!trigger_bredr_inquiry(req, status)) + return false; - memset(¶m_cp, 0, sizeof(param_cp)); + return true; + } - /* All active scans will be done with either a resolvable - * private address (when privacy feature has been enabled) - * or non-resolvable private address. - */ - err = hci_update_random_address(req, true, &own_addr_type); - if (err < 0) { - *status = MGMT_STATUS_FAILED; + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + *status = MGMT_STATUS_NOT_SUPPORTED; return false; } + /* fall through */ - param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT); - param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); - param_cp.own_address_type = own_addr_type; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); + case DISCOV_TYPE_LE: + if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status)) + return false; break; default: @@ -3873,16 +4175,16 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) static void start_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; unsigned long timeout; BT_DBG("status %d", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_START_DISCOVERY, hdev); if (!cmd) - cmd = mgmt_pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); @@ -3904,7 +4206,18 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); + /* When running simultaneous discovery, the LE scanning time + * should occupy the whole discovery time sine BR/EDR inquiry + * and LE scanning are scheduled by the controller. + * + * For interleaving discovery in comparison, BR/EDR inquiry + * and LE scanning are done sequentially with separate + * timeouts. + */ + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + else + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); break; case DISCOV_TYPE_BREDR: timeout = 0; @@ -3923,8 +4236,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, */ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && - (hdev->discovery.uuid_count > 0 || - hdev->discovery.rssi != HCI_RSSI_INVALID)) { + hdev->discovery.result_filtering) { hdev->discovery.scan_start = jiffies; hdev->discovery.scan_duration = timeout; } @@ -3941,7 +4253,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 status; int err; @@ -3951,17 +4263,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_NOT_POWERED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_POWERED, + &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || - test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY, &cp->type, + sizeof(cp->type)); goto failed; } @@ -3984,8 +4296,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); if (!trigger_discovery(&req, &status)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + status, &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4003,17 +4315,18 @@ failed: return err; } -static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) +static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, + u8 status) { - return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, - cmd->param, 1); + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_service_discovery *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; @@ -4025,19 +4338,19 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_NOT_POWERED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_NOT_POWERED, + &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || - test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_BUSY, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4045,10 +4358,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, if (uuid_count > max_uuid_count) { BT_ERR("service_discovery: too big uuid_count value %u", uuid_count); - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4056,10 +4369,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, if (expected_len != len) { BT_ERR("service_discovery: expected %u bytes, got %u bytes", expected_len, len); - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, &cp->type, + sizeof(cp->type)); goto failed; } @@ -4077,6 +4390,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, */ hci_discovery_filter_clear(hdev); + hdev->discovery.result_filtering = true; hdev->discovery.type = cp->type; hdev->discovery.rssi = cp->rssi; hdev->discovery.uuid_count = uuid_count; @@ -4085,10 +4399,10 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.uuids = kmemdup(cp->uuids, uuid_count * 16, GFP_KERNEL); if (!hdev->discovery.uuids) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - MGMT_STATUS_FAILED, - &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_FAILED, + &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4097,9 +4411,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); if (!trigger_discovery(&req, &status)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + status, &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } @@ -4119,13 +4433,13 @@ failed: static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status %d", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + cmd = pending_find(MGMT_OP_STOP_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); @@ -4141,7 +4455,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_stop_discovery *mgmt_cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -4150,16 +4464,16 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, - MGMT_STATUS_REJECTED, &mgmt_cp->type, - sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_REJECTED, &mgmt_cp->type, + sizeof(mgmt_cp->type)); goto unlock; } if (hdev->discovery.type != mgmt_cp->type) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, - MGMT_STATUS_INVALID_PARAMS, &mgmt_cp->type, - sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS, + &mgmt_cp->type, sizeof(mgmt_cp->type)); goto unlock; } @@ -4185,8 +4499,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, /* If no HCI commands were sent we're done */ if (err == -ENODATA) { - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, sizeof(mgmt_cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, + &mgmt_cp->type, sizeof(mgmt_cp->type)); hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } @@ -4207,17 +4521,17 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, - MGMT_STATUS_FAILED, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, + MGMT_STATUS_FAILED, &cp->addr, + sizeof(cp->addr)); goto failed; } e = hci_inquiry_cache_lookup_unknown(hdev, &cp->addr.bdaddr); if (!e) { - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, - MGMT_STATUS_INVALID_PARAMS, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, + MGMT_STATUS_INVALID_PARAMS, &cp->addr, + sizeof(cp->addr)); goto failed; } @@ -4229,8 +4543,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_inquiry_cache_update_resolve(hdev, e); } - err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, - sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, + &cp->addr, sizeof(cp->addr)); failed: hci_dev_unlock(hdev); @@ -4247,9 +4561,9 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -4265,8 +4579,8 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, + &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); @@ -4283,9 +4597,9 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); @@ -4301,8 +4615,8 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, status = MGMT_STATUS_SUCCESS; done: - err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, - &cp->addr, sizeof(cp->addr)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, + &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); @@ -4322,8 +4636,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, source = __le16_to_cpu(cp->source); if (source > 0x0002) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -4332,7 +4646,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, hdev->devid_product = __le16_to_cpu(cp->product); hdev->devid_version = __le16_to_cpu(cp->version); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, + NULL, 0); hci_req_init(&req, hdev); update_eir(&req); @@ -4343,10 +4658,17 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } +static void enable_advertising_instance(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + BT_DBG("status %d", status); +} + static void set_advertising_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; + struct hci_request req; hci_dev_lock(hdev); @@ -4358,10 +4680,10 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, goto unlock; } - if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) - set_bit(HCI_ADVERTISING, &hdev->dev_flags); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + hci_dev_set_flag(hdev, HCI_ADVERTISING); else - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, &match); @@ -4371,6 +4693,21 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, if (match.sk) sock_put(match.sk); + /* If "Set Advertising" was just disabled and instance advertising was + * set up earlier, then enable the advertising instance. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + goto unlock; + + hci_req_init(&req, hdev); + + update_adv_data(&req); + enable_advertising(&req); + + if (hci_req_run(&req, enable_advertising_instance) < 0) + BT_ERR("Failed to re-configure advertising"); + unlock: hci_dev_unlock(hdev); } @@ -4379,41 +4716,48 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; - u8 val, enabled, status; + u8 val, status; int err; BT_DBG("request for %s", hdev->name); status = mgmt_le_support(hdev); if (status) - return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - status); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + status); - if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - MGMT_STATUS_INVALID_PARAMS); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); val = !!cp->val; - enabled = test_bit(HCI_ADVERTISING, &hdev->dev_flags); /* The following conditions are ones which mean that we should * not do any HCI communication but directly send a mgmt * response to user space (after toggling the flag if * necessary). */ - if (!hdev_is_powered(hdev) || val == enabled || + if (!hdev_is_powered(hdev) || + (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) || hci_conn_num(hdev, LE_LINK) > 0 || - (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE)) { - bool changed = false; + bool changed; - if (val != test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { - change_bit(HCI_ADVERTISING, &hdev->dev_flags); - changed = true; + if (cp->val) { + changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING); + if (cp->val == 0x02) + hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + else + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + } else { + changed = hci_dev_test_and_clear_flag(hdev, HCI_ADVERTISING); + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev); @@ -4426,10 +4770,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - if (mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev) || - mgmt_pending_find(MGMT_OP_SET_LE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); goto unlock; } @@ -4441,10 +4785,19 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_init(&req, hdev); - if (val) - enable_advertising(&req); + if (cp->val == 0x02) + hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); else + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + + if (val) { + /* Switch to instance "0" for the Set Advertising setting. */ + update_adv_data_for_instance(&req, 0); + update_scan_rsp_data_for_instance(&req, 0); + enable_advertising(&req); + } else { disable_advertising(&req); + } err = hci_req_run(&req, set_advertising_complete); if (err < 0) @@ -4464,34 +4817,38 @@ static int set_static_address(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_REJECTED); if (bacmp(&cp->bdaddr, BDADDR_ANY)) { if (!bacmp(&cp->bdaddr, BDADDR_NONE)) - return cmd_status(sk, hdev->id, - MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); /* Two most significant bits shall be set */ if ((cp->bdaddr.b[5] & 0xc0) != 0xc0) - return cmd_status(sk, hdev->id, - MGMT_OP_SET_STATIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); bacpy(&hdev->static_addr, &cp->bdaddr); - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, 0, NULL, 0); + err = send_settings_rsp(sk, MGMT_OP_SET_STATIC_ADDRESS, hdev); + if (err < 0) + goto unlock; - hci_dev_unlock(hdev); + err = new_settings(hdev, sk); +unlock: + hci_dev_unlock(hdev); return err; } @@ -4505,36 +4862,37 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_NOT_SUPPORTED); interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); window = __le16_to_cpu(cp->window); if (window < 0x0004 || window > 0x4000) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); if (window > interval) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); hdev->le_scan_interval = interval; hdev->le_scan_window = window; - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, + NULL, 0); /* If background scan is running, restart it so new parameters are * loaded. */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->discovery.state == DISCOVERY_STOPPED) { struct hci_request req; @@ -4554,26 +4912,26 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, static void fast_connectable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); + cmd = pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); if (!cmd) goto unlock; if (status) { - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + mgmt_status(status)); } else { struct mgmt_mode *cp = cmd->param; if (cp->val) - set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_FAST_CONNECTABLE); else - clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); @@ -4589,40 +4947,40 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("%s", hdev->name); - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) || + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || hdev->hci_ver < BLUETOOTH_VER_1_2) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_INVALID_PARAMS); - - if (!hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_NOT_POWERED); - - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (!!cp->val == hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) { + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); goto unlock; } - if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { + if (!hdev_is_powered(hdev)) { + hci_dev_change_flag(hdev, HCI_FAST_CONNECTABLE); err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); + new_settings(hdev, sk); goto unlock; } @@ -4639,8 +4997,8 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_FAILED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_FAILED); mgmt_pending_remove(cmd); } @@ -4652,13 +5010,13 @@ unlock: static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_BREDR, hdev); + cmd = pending_find(MGMT_OP_SET_BREDR, hdev); if (!cmd) goto unlock; @@ -4668,9 +5026,9 @@ static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) /* We need to restore the flag if related HCI commands * failed. */ - clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -4685,41 +5043,41 @@ unlock: static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; BT_DBG("request for %s", hdev->name); if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_NOT_SUPPORTED); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (cp->val == test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (cp->val == hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); goto unlock; } if (!hdev_is_powered(hdev)) { if (!cp->val) { - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); - clear_bit(HCI_LINK_SECURITY, &hdev->dev_flags); - clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); + hci_dev_clear_flag(hdev, HCI_LINK_SECURITY); + hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } - change_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_change_flag(hdev, HCI_BREDR_ENABLED); err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); if (err < 0) @@ -4731,8 +5089,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Reject disabling when powered on */ if (!cp->val) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); goto unlock; } else { /* When configuring a dual-mode controller to operate @@ -4749,18 +5107,18 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) * switching BR/EDR back on when secure connections has been * enabled is not a supported transaction. */ - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && (bacmp(&hdev->static_addr, BDADDR_ANY) || - test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_REJECTED); + hci_dev_test_flag(hdev, HCI_SC_ENABLED))) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); goto unlock; } } - if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_BREDR, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_BUSY); goto unlock; } @@ -4773,7 +5131,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* We need to flip the bit already here so that update_adv_data * generates the correct flags. */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); hci_req_init(&req, hdev); @@ -4796,20 +5154,20 @@ unlock: static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; BT_DBG("%s status %u", hdev->name, status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + cmd = pending_find(MGMT_OP_SET_SECURE_CONN, hdev); if (!cmd) goto unlock; if (status) { - cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(status)); + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); goto remove; } @@ -4817,16 +5175,16 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) switch (cp->val) { case 0x00: - clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x01: - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x02: - set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - set_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ENABLED); + hci_dev_set_flag(hdev, HCI_SC_ONLY); break; } @@ -4843,7 +5201,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 val; int err; @@ -4851,37 +5209,37 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (!lmp_sc_capable(hdev) && - !test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_NOT_SUPPORTED); + !hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && lmp_sc_capable(hdev) && - !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_REJECTED); + !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || - !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { bool changed; if (cp->val) { - changed = !test_and_set_bit(HCI_SC_ENABLED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, + HCI_SC_ENABLED); if (cp->val == 0x02) - set_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_SC_ONLY); else - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); } else { - changed = test_and_clear_bit(HCI_SC_ENABLED, - &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_SC_ENABLED); + hci_dev_clear_flag(hdev, HCI_SC_ONLY); } err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); @@ -4894,16 +5252,16 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_BUSY); + if (pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_BUSY); goto failed; } val = !!cp->val; - if (val == test_bit(HCI_SC_ENABLED, &hdev->dev_flags) && - (cp->val == 0x02) == test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + if (val == hci_dev_test_flag(hdev, HCI_SC_ENABLED) && + (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_SC_ONLY)) { err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); goto failed; } @@ -4937,27 +5295,26 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else - changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_KEEP_DEBUG_KEYS); if (cp->val == 0x02) - use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS, - &hdev->dev_flags); + use_changed = !hci_dev_test_and_set_flag(hdev, + HCI_USE_DEBUG_KEYS); else - use_changed = test_and_clear_bit(HCI_USE_DEBUG_KEYS, - &hdev->dev_flags); + use_changed = hci_dev_test_and_clear_flag(hdev, + HCI_USE_DEBUG_KEYS); if (hdev_is_powered(hdev) && use_changed && - test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { u8 mode = (cp->val == 0x02) ? 0x01 : 0x00; hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode), &mode); @@ -4985,32 +5342,32 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_NOT_SUPPORTED); if (cp->privacy != 0x00 && cp->privacy != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_INVALID_PARAMS); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, + MGMT_STATUS_REJECTED); hci_dev_lock(hdev); /* If user space supports this command it is also expected to * handle IRKs. Therefore, set the HCI_RPA_RESOLVING flag. */ - set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); if (cp->privacy) { - changed = !test_and_set_bit(HCI_PRIVACY, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY); memcpy(hdev->irk, cp->irk, sizeof(hdev->irk)); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); } else { - changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY); memset(hdev->irk, 0, sizeof(hdev->irk)); - clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); } err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev); @@ -5053,22 +5410,22 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_NOT_SUPPORTED); irk_count = __le16_to_cpu(cp->irk_count); if (irk_count > max_irk_count) { BT_ERR("load_irks: too big irk_count value %u", irk_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); if (expected_len != len) { BT_ERR("load_irks: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s irk_count %u", hdev->name, irk_count); @@ -5077,9 +5434,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, struct mgmt_irk_info *key = &cp->irks[i]; if (!irk_is_valid(key)) - return cmd_status(sk, hdev->id, - MGMT_OP_LOAD_IRKS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -5099,9 +5456,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, BDADDR_ANY); } - set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); - err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -5139,14 +5496,14 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, BT_DBG("request for %s", hdev->name); if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { BT_ERR("load_ltks: too big key_count value %u", key_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + key_count * @@ -5154,8 +5511,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, if (expected_len != len) { BT_ERR("load_keys: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s key_count %u", hdev->name, key_count); @@ -5164,9 +5521,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, struct mgmt_ltk_info *key = &cp->keys[i]; if (!ltk_is_valid(key)) - return cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); @@ -5211,7 +5568,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key->rand); } - err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); @@ -5219,7 +5576,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int conn_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_conn_info rp; @@ -5237,8 +5594,8 @@ static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) rp.max_tx_power = HCI_TX_POWER_INVALID; } - err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + status, &rp, sizeof(rp)); hci_conn_drop(conn); hci_conn_put(conn); @@ -5250,7 +5607,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, u16 opcode) { struct hci_cp_read_rssi *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; u16 handle; u8 status; @@ -5288,7 +5645,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, goto unlock; } - cmd = mgmt_pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn); + cmd = pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn); if (!cmd) goto unlock; @@ -5315,15 +5672,16 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) - return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -5334,14 +5692,15 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_CONNECTED, &rp, + sizeof(rp)); goto unlock; } - if (mgmt_pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); + if (pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; } @@ -5361,7 +5720,7 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_request req; struct hci_cp_read_tx_power req_txp_cp; struct hci_cp_read_rssi req_rssi_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; hci_req_init(&req, hdev); req_rssi_cp.handle = cpu_to_le16(conn->handle); @@ -5409,8 +5768,8 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.tx_power = conn->tx_power; rp.max_tx_power = conn->max_tx_power; - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); } unlock: @@ -5418,7 +5777,7 @@ unlock: return err; } -static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_clock_info rp; @@ -5443,8 +5802,8 @@ static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) } complete: - err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, - sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { hci_conn_drop(conn); @@ -5457,7 +5816,7 @@ complete: static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_cp_read_clock *hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_conn *conn; BT_DBG("%s status %u", hdev->name, status); @@ -5475,7 +5834,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) conn = NULL; } - cmd = mgmt_pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); + cmd = pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); if (!cmd) goto unlock; @@ -5492,7 +5851,7 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_cp_get_clock_info *cp = data; struct mgmt_rp_get_clock_info rp; struct hci_cp_read_clock hci_cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; struct hci_conn *conn; int err; @@ -5504,15 +5863,16 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, rp.addr.type = cp->addr.type; if (cp->addr.type != BDADDR_BREDR) - return cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_INVALID_PARAMS, - &rp, sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_POWERED, &rp, + sizeof(rp)); goto unlock; } @@ -5520,10 +5880,10 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_NOT_CONNECTED, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_CONNECTED, + &rp, sizeof(rp)); goto unlock; } } else { @@ -5634,13 +5994,13 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev); + cmd = pending_find(MGMT_OP_ADD_DEVICE, hdev); if (!cmd) goto unlock; @@ -5655,7 +6015,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5664,14 +6024,14 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (!bdaddr_type_is_valid(cp->addr.type) || !bacmp(&cp->addr.bdaddr, BDADDR_ANY)) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); if (cp->action != 0x00 && cp->action != 0x01 && cp->action != 0x02) - return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); hci_req_init(&req, hdev); @@ -5757,13 +6117,13 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); hci_dev_lock(hdev); - cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev); + cmd = pending_find(MGMT_OP_REMOVE_DEVICE, hdev); if (!cmd) goto unlock; @@ -5778,7 +6138,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct hci_request req; int err; @@ -5911,15 +6271,15 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, int i; if (!lmp_le_capable(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_NOT_SUPPORTED); param_count = __le16_to_cpu(cp->param_count); if (param_count > max_param_count) { BT_ERR("load_conn_param: too big param_count value %u", param_count); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + param_count * @@ -5927,8 +6287,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, if (expected_len != len) { BT_ERR("load_conn_param: expected %u bytes, got %u bytes", expected_len, len); - return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s param_count %u", hdev->name, param_count); @@ -5983,7 +6343,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); - return cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, NULL, 0); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, + NULL, 0); } static int set_external_config(struct sock *sk, struct hci_dev *hdev, @@ -5996,25 +6357,23 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_REJECTED); if (cp->config != 0x00 && cp->config != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_INVALID_PARAMS); if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (cp->config) - changed = !test_and_set_bit(HCI_EXT_CONFIGURED, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_EXT_CONFIGURED); else - changed = test_and_clear_bit(HCI_EXT_CONFIGURED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_EXT_CONFIGURED); err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev); if (err < 0) @@ -6025,12 +6384,12 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, err = new_options(hdev, sk); - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) == is_configured(hdev)) { + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) == is_configured(hdev)) { mgmt_index_removed(hdev); - if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { - set_bit(HCI_CONFIG, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + if (hci_dev_test_and_change_flag(hdev, HCI_UNCONFIGURED)) { + hci_dev_set_flag(hdev, HCI_CONFIG); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } else { @@ -6054,16 +6413,16 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); if (hdev_is_powered(hdev)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_REJECTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_REJECTED); if (!bacmp(&cp->bdaddr, BDADDR_ANY)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_INVALID_PARAMS); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); if (!hdev->set_bdaddr) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, - MGMT_STATUS_NOT_SUPPORTED); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); @@ -6077,16 +6436,16 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, if (!changed) goto unlock; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) err = new_options(hdev, sk); if (is_configured(hdev)) { mgmt_index_removed(hdev); - clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_UNCONFIGURED); - set_bit(HCI_CONFIG, &hdev->dev_flags); - set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_set_flag(hdev, HCI_CONFIG); + hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } @@ -6096,213 +6455,852 @@ unlock: return err; } -static const struct mgmt_handler { - int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, - u16 data_len); - bool var_len; - size_t data_len; -} mgmt_handlers[] = { - { NULL }, /* 0x0000 (no command) */ - { read_version, false, MGMT_READ_VERSION_SIZE }, - { read_commands, false, MGMT_READ_COMMANDS_SIZE }, - { read_index_list, false, MGMT_READ_INDEX_LIST_SIZE }, - { read_controller_info, false, MGMT_READ_INFO_SIZE }, - { set_powered, false, MGMT_SETTING_SIZE }, - { set_discoverable, false, MGMT_SET_DISCOVERABLE_SIZE }, - { set_connectable, false, MGMT_SETTING_SIZE }, - { set_fast_connectable, false, MGMT_SETTING_SIZE }, - { set_bondable, false, MGMT_SETTING_SIZE }, - { set_link_security, false, MGMT_SETTING_SIZE }, - { set_ssp, false, MGMT_SETTING_SIZE }, - { set_hs, false, MGMT_SETTING_SIZE }, - { set_le, false, MGMT_SETTING_SIZE }, - { set_dev_class, false, MGMT_SET_DEV_CLASS_SIZE }, - { set_local_name, false, MGMT_SET_LOCAL_NAME_SIZE }, - { add_uuid, false, MGMT_ADD_UUID_SIZE }, - { remove_uuid, false, MGMT_REMOVE_UUID_SIZE }, - { load_link_keys, true, MGMT_LOAD_LINK_KEYS_SIZE }, - { load_long_term_keys, true, MGMT_LOAD_LONG_TERM_KEYS_SIZE }, - { disconnect, false, MGMT_DISCONNECT_SIZE }, - { get_connections, false, MGMT_GET_CONNECTIONS_SIZE }, - { pin_code_reply, false, MGMT_PIN_CODE_REPLY_SIZE }, - { pin_code_neg_reply, false, MGMT_PIN_CODE_NEG_REPLY_SIZE }, - { set_io_capability, false, MGMT_SET_IO_CAPABILITY_SIZE }, - { pair_device, false, MGMT_PAIR_DEVICE_SIZE }, - { cancel_pair_device, false, MGMT_CANCEL_PAIR_DEVICE_SIZE }, - { unpair_device, false, MGMT_UNPAIR_DEVICE_SIZE }, - { user_confirm_reply, false, MGMT_USER_CONFIRM_REPLY_SIZE }, - { user_confirm_neg_reply, false, MGMT_USER_CONFIRM_NEG_REPLY_SIZE }, - { user_passkey_reply, false, MGMT_USER_PASSKEY_REPLY_SIZE }, - { user_passkey_neg_reply, false, MGMT_USER_PASSKEY_NEG_REPLY_SIZE }, - { read_local_oob_data, false, MGMT_READ_LOCAL_OOB_DATA_SIZE }, - { add_remote_oob_data, true, MGMT_ADD_REMOTE_OOB_DATA_SIZE }, - { remove_remote_oob_data, false, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE }, - { start_discovery, false, MGMT_START_DISCOVERY_SIZE }, - { stop_discovery, false, MGMT_STOP_DISCOVERY_SIZE }, - { confirm_name, false, MGMT_CONFIRM_NAME_SIZE }, - { block_device, false, MGMT_BLOCK_DEVICE_SIZE }, - { unblock_device, false, MGMT_UNBLOCK_DEVICE_SIZE }, - { set_device_id, false, MGMT_SET_DEVICE_ID_SIZE }, - { set_advertising, false, MGMT_SETTING_SIZE }, - { set_bredr, false, MGMT_SETTING_SIZE }, - { set_static_address, false, MGMT_SET_STATIC_ADDRESS_SIZE }, - { set_scan_params, false, MGMT_SET_SCAN_PARAMS_SIZE }, - { set_secure_conn, false, MGMT_SETTING_SIZE }, - { set_debug_keys, false, MGMT_SETTING_SIZE }, - { set_privacy, false, MGMT_SET_PRIVACY_SIZE }, - { load_irks, true, MGMT_LOAD_IRKS_SIZE }, - { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE }, - { get_clock_info, false, MGMT_GET_CLOCK_INFO_SIZE }, - { add_device, false, MGMT_ADD_DEVICE_SIZE }, - { remove_device, false, MGMT_REMOVE_DEVICE_SIZE }, - { load_conn_param, true, MGMT_LOAD_CONN_PARAM_SIZE }, - { read_unconf_index_list, false, MGMT_READ_UNCONF_INDEX_LIST_SIZE }, - { read_config_info, false, MGMT_READ_CONFIG_INFO_SIZE }, - { set_external_config, false, MGMT_SET_EXTERNAL_CONFIG_SIZE }, - { set_public_address, false, MGMT_SET_PUBLIC_ADDRESS_SIZE }, - { start_service_discovery,true, MGMT_START_SERVICE_DISCOVERY_SIZE }, -}; +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; -int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) + return eir_len; +} + +static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) { - void *buf; - u8 *cp; - struct mgmt_hdr *hdr; - u16 opcode, index, len; - struct hci_dev *hdev = NULL; - const struct mgmt_handler *handler; + const struct mgmt_cp_read_local_oob_ext_data *mgmt_cp; + struct mgmt_rp_read_local_oob_ext_data *mgmt_rp; + u8 *h192, *r192, *h256, *r256; + struct mgmt_pending_cmd *cmd; + u16 eir_len; int err; - BT_DBG("got %zu bytes", msglen); + BT_DBG("%s status %u", hdev->name, status); - if (msglen < sizeof(*hdr)) - return -EINVAL; + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev); + if (!cmd) + return; - buf = kmalloc(msglen, GFP_KERNEL); - if (!buf) - return -ENOMEM; + mgmt_cp = cmd->param; + + if (status) { + status = mgmt_status(status); + eir_len = 0; + + h192 = NULL; + r192 = NULL; + h256 = NULL; + r256 = NULL; + } else if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) { + struct hci_rp_read_local_oob_data *rp; + + if (skb->len != sizeof(*rp)) { + status = MGMT_STATUS_FAILED; + eir_len = 0; + } else { + status = MGMT_STATUS_SUCCESS; + rp = (void *)skb->data; - if (memcpy_from_msg(buf, msg, msglen)) { - err = -EFAULT; + eir_len = 5 + 18 + 18; + h192 = rp->hash; + r192 = rp->rand; + h256 = NULL; + r256 = NULL; + } + } else { + struct hci_rp_read_local_oob_ext_data *rp; + + if (skb->len != sizeof(*rp)) { + status = MGMT_STATUS_FAILED; + eir_len = 0; + } else { + status = MGMT_STATUS_SUCCESS; + rp = (void *)skb->data; + + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { + eir_len = 5 + 18 + 18; + h192 = NULL; + r192 = NULL; + } else { + eir_len = 5 + 18 + 18 + 18 + 18; + h192 = rp->hash192; + r192 = rp->rand192; + } + + h256 = rp->hash256; + r256 = rp->rand256; + } + } + + mgmt_rp = kmalloc(sizeof(*mgmt_rp) + eir_len, GFP_KERNEL); + if (!mgmt_rp) goto done; + + if (status) + goto send_rsp; + + eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + if (h192 && r192) { + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_HASH_C192, h192, 16); + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_RAND_R192, r192, 16); + } + + if (h256 && r256) { + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_HASH_C256, h256, 16); + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_RAND_R256, r256, 16); } - hdr = buf; - opcode = __le16_to_cpu(hdr->opcode); - index = __le16_to_cpu(hdr->index); - len = __le16_to_cpu(hdr->len); +send_rsp: + mgmt_rp->type = mgmt_cp->type; + mgmt_rp->eir_len = cpu_to_le16(eir_len); - if (len != msglen - sizeof(*hdr)) { - err = -EINVAL; + err = mgmt_cmd_complete(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, status, + mgmt_rp, sizeof(*mgmt_rp) + eir_len); + if (err < 0 || status) goto done; + + hci_sock_set_flag(cmd->sk, HCI_MGMT_OOB_DATA_EVENTS); + + err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, + mgmt_rp, sizeof(*mgmt_rp) + eir_len, + HCI_MGMT_OOB_DATA_EVENTS, cmd->sk); +done: + kfree(mgmt_rp); + mgmt_pending_remove(cmd); +} + +static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk, + struct mgmt_cp_read_local_oob_ext_data *cp) +{ + struct mgmt_pending_cmd *cmd; + struct hci_request req; + int err; + + cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev, + cp, sizeof(*cp)); + if (!cmd) + return -ENOMEM; + + hci_req_init(&req, hdev); + + if (bredr_sc_enabled(hdev)) + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL); + else + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + + err = hci_req_run_skb(&req, read_local_oob_ext_data_complete); + if (err < 0) { + mgmt_pending_remove(cmd); + return err; } - if (index != MGMT_INDEX_NONE) { - hdev = hci_dev_get(index); - if (!hdev) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + return 0; +} + +static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_read_local_oob_ext_data *cp = data; + struct mgmt_rp_read_local_oob_ext_data *rp; + size_t rp_len; + u16 eir_len; + u8 status, flags, role, addr[7], hash[16], rand[16]; + int err; + + BT_DBG("%s", hdev->name); + + if (hdev_is_powered(hdev)) { + switch (cp->type) { + case BIT(BDADDR_BREDR): + status = mgmt_bredr_support(hdev); + if (status) + eir_len = 0; + else + eir_len = 5; + break; + case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): + status = mgmt_le_support(hdev); + if (status) + eir_len = 0; + else + eir_len = 9 + 3 + 18 + 18 + 3; + break; + default: + status = MGMT_STATUS_INVALID_PARAMS; + eir_len = 0; + break; } + } else { + status = MGMT_STATUS_NOT_POWERED; + eir_len = 0; + } - if (test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + rp_len = sizeof(*rp) + eir_len; + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) + return -ENOMEM; + + if (status) + goto complete; + + hci_dev_lock(hdev); + + eir_len = 0; + switch (cp->type) { + case BIT(BDADDR_BREDR): + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + err = read_local_ssp_oob_req(hdev, sk, cp); + hci_dev_unlock(hdev); + if (!err) + goto done; + + status = MGMT_STATUS_FAILED; + goto complete; + } else { + eir_len = eir_append_data(rp->eir, eir_len, + EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + } + break; + case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && + smp_generate_oob(hdev, hash, rand) < 0) { + hci_dev_unlock(hdev); + status = MGMT_STATUS_FAILED; + goto complete; + } + + /* This should return the active RPA, but since the RPA + * is only programmed on demand, it is really hard to fill + * this in at the moment. For now disallow retrieving + * local out-of-band data when privacy is in use. + * + * Returning the identity address will not help here since + * pairing happens before the identity resolving key is + * known and thus the connection establishment happens + * based on the RPA and not the identity address. + */ + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { + hci_dev_unlock(hdev); + status = MGMT_STATUS_REJECTED; + goto complete; } - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && - opcode != MGMT_OP_READ_CONFIG_INFO && - opcode != MGMT_OP_SET_EXTERNAL_CONFIG && - opcode != MGMT_OP_SET_PUBLIC_ADDRESS) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + memcpy(addr, &hdev->static_addr, 6); + addr[6] = 0x01; + } else { + memcpy(addr, &hdev->bdaddr, 6); + addr[6] = 0x00; + } + + eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_BDADDR, + addr, sizeof(addr)); + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + role = 0x02; + else + role = 0x01; + + eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_ROLE, + &role, sizeof(role)); + + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) { + eir_len = eir_append_data(rp->eir, eir_len, + EIR_LE_SC_CONFIRM, + hash, sizeof(hash)); + + eir_len = eir_append_data(rp->eir, eir_len, + EIR_LE_SC_RANDOM, + rand, sizeof(rand)); } + + flags = get_adv_discov_flags(hdev); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + eir_len = eir_append_data(rp->eir, eir_len, EIR_FLAGS, + &flags, sizeof(flags)); + break; } - if (opcode >= ARRAY_SIZE(mgmt_handlers) || - mgmt_handlers[opcode].func == NULL) { - BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, index, opcode, - MGMT_STATUS_UNKNOWN_COMMAND); + hci_dev_unlock(hdev); + + hci_sock_set_flag(sk, HCI_MGMT_OOB_DATA_EVENTS); + + status = MGMT_STATUS_SUCCESS; + +complete: + rp->type = cp->type; + rp->eir_len = cpu_to_le16(eir_len); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, + status, rp, sizeof(*rp) + eir_len); + if (err < 0 || status) goto done; + + err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, + rp, sizeof(*rp) + eir_len, + HCI_MGMT_OOB_DATA_EVENTS, sk); + +done: + kfree(rp); + + return err; +} + +static u32 get_supported_adv_flags(struct hci_dev *hdev) +{ + u32 flags = 0; + + flags |= MGMT_ADV_FLAG_CONNECTABLE; + flags |= MGMT_ADV_FLAG_DISCOV; + flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; + flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) + flags |= MGMT_ADV_FLAG_TX_POWER; + + return flags; +} + +static int read_adv_features(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_adv_features *rp; + size_t rp_len; + int err; + bool instance; + u32 supported_flags; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + + rp_len = sizeof(*rp); + + /* Currently only one instance is supported, so just add 1 to the + * response length. + */ + instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE); + if (instance) + rp_len++; + + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + hci_dev_unlock(hdev); + return -ENOMEM; } - if (hdev && (opcode <= MGMT_OP_READ_INDEX_LIST || - opcode == MGMT_OP_READ_UNCONF_INDEX_LIST)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + supported_flags = get_supported_adv_flags(hdev); + + rp->supported_flags = cpu_to_le32(supported_flags); + rp->max_adv_data_len = HCI_MAX_AD_LENGTH; + rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; + rp->max_instances = 1; + + /* Currently only one instance is supported, so simply return the + * current instance number. + */ + if (instance) { + rp->num_instances = 1; + rp->instance[0] = 1; + } else { + rp->num_instances = 0; } - if (!hdev && (opcode > MGMT_OP_READ_INDEX_LIST && - opcode != MGMT_OP_READ_UNCONF_INDEX_LIST)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + hci_dev_unlock(hdev); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, + MGMT_STATUS_SUCCESS, rp, rp_len); + + kfree(rp); + + return err; +} + +static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, + u8 len, bool is_adv_data) +{ + u8 max_len = HCI_MAX_AD_LENGTH; + int i, cur_len; + bool flags_managed = false; + bool tx_power_managed = false; + u32 flags_params = MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS; + + if (is_adv_data && (adv_flags & flags_params)) { + flags_managed = true; + max_len -= 3; } - handler = &mgmt_handlers[opcode]; + if (is_adv_data && (adv_flags & MGMT_ADV_FLAG_TX_POWER)) { + tx_power_managed = true; + max_len -= 3; + } - if ((handler->var_len && len < handler->data_len) || - (!handler->var_len && len != handler->data_len)) { - err = cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_PARAMS); - goto done; + if (len > max_len) + return false; + + /* Make sure that the data is correctly formatted. */ + for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { + cur_len = data[i]; + + if (flags_managed && data[i + 1] == EIR_FLAGS) + return false; + + if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + return false; + + /* If the current field length would exceed the total data + * length, then it's invalid. + */ + if (i + cur_len >= len) + return false; } - if (hdev) - mgmt_init_hdev(sk, hdev); + return true; +} + +static void add_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct mgmt_pending_cmd *cmd; + struct mgmt_rp_add_advertising rp; + + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); + + if (status) { + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + advertising_removed(cmd ? cmd->sk : NULL, hdev, 1); + } + + if (!cmd) + goto unlock; - cp = buf + sizeof(*hdr); + rp.instance = 0x01; + + if (status) + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + else + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static void adv_timeout_expired(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance.timeout_exp.work); + + hdev->adv_instance.timeout = 0; + + hci_dev_lock(hdev); + clear_adv_instance(hdev); + hci_dev_unlock(hdev); +} + +static int add_advertising(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_add_advertising *cp = data; + struct mgmt_rp_add_advertising rp; + u32 flags; + u32 supported_flags; + u8 status; + u16 timeout; + int err; + struct mgmt_pending_cmd *cmd; + struct hci_request req; + + BT_DBG("%s", hdev->name); + + status = mgmt_le_support(hdev); + if (status) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + status); - err = handler->func(sk, hdev, cp, len); + flags = __le32_to_cpu(cp->flags); + timeout = __le16_to_cpu(cp->timeout); + + /* The current implementation only supports adding one instance and only + * a subset of the specified flags. + */ + supported_flags = get_supported_adv_flags(hdev); + if (cp->instance != 0x01 || (flags & ~supported_flags)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (timeout && !hdev_is_powered(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || + pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + cp->scan_rsp_len, false)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } + + INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired); + + hdev->adv_instance.flags = flags; + hdev->adv_instance.adv_data_len = cp->adv_data_len; + hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len; + + if (cp->adv_data_len) + memcpy(hdev->adv_instance.adv_data, cp->data, cp->adv_data_len); + + if (cp->scan_rsp_len) + memcpy(hdev->adv_instance.scan_rsp_data, + cp->data + cp->adv_data_len, cp->scan_rsp_len); + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + hdev->adv_instance.timeout = timeout; + + if (timeout) + queue_delayed_work(hdev->workqueue, + &hdev->adv_instance.timeout_exp, + msecs_to_jiffies(timeout * 1000)); + + if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE)) + advertising_added(sk, hdev, 1); + + /* If the HCI_ADVERTISING flag is set or the device isn't powered then + * we have no HCI communication to make. Simply return. + */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + rp.instance = 0x01; + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + /* We're good to go, update advertising data, parameters, and start + * advertising. + */ + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_ADVERTISING, hdev, data, + data_len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + + update_adv_data(&req); + update_scan_rsp_data(&req); + enable_advertising(&req); + + err = hci_req_run(&req, add_advertising_complete); if (err < 0) - goto done; + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); - err = msglen; + return err; +} -done: - if (hdev) - hci_dev_put(hdev); +static void remove_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct mgmt_pending_cmd *cmd; + struct mgmt_rp_remove_advertising rp; + + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + /* A failure status here only means that we failed to disable + * advertising. Otherwise, the advertising instance has been removed, + * so report success. + */ + cmd = pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev); + if (!cmd) + goto unlock; + + rp.instance = 1; + + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS, + &rp, sizeof(rp)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static int remove_advertising(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_remove_advertising *cp = data; + struct mgmt_rp_remove_advertising rp; + int err; + struct mgmt_pending_cmd *cmd; + struct hci_request req; + + BT_DBG("%s", hdev->name); + + /* The current implementation only allows modifying instance no 1. A + * value of 0 indicates that all instances should be cleared. + */ + if (cp->instance > 1) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || + pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } + + if (hdev->adv_instance.timeout) + cancel_delayed_work(&hdev->adv_instance.timeout_exp); + + memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); + + advertising_removed(sk, hdev, 1); + + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + + /* If the HCI_ADVERTISING flag is set or the device isn't powered then + * we have no HCI communication to make. Simply return. + */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + rp.instance = 1; + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADVERTISING, hdev, data, + data_len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + disable_advertising(&req); + + err = hci_req_run(&req, remove_advertising_complete); + if (err < 0) + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); - kfree(buf); return err; } +static const struct hci_mgmt_handler mgmt_handlers[] = { + { NULL }, /* 0x0000 (no command) */ + { read_version, MGMT_READ_VERSION_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_commands, MGMT_READ_COMMANDS_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_index_list, MGMT_READ_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_controller_info, MGMT_READ_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, + { set_powered, MGMT_SETTING_SIZE }, + { set_discoverable, MGMT_SET_DISCOVERABLE_SIZE }, + { set_connectable, MGMT_SETTING_SIZE }, + { set_fast_connectable, MGMT_SETTING_SIZE }, + { set_bondable, MGMT_SETTING_SIZE }, + { set_link_security, MGMT_SETTING_SIZE }, + { set_ssp, MGMT_SETTING_SIZE }, + { set_hs, MGMT_SETTING_SIZE }, + { set_le, MGMT_SETTING_SIZE }, + { set_dev_class, MGMT_SET_DEV_CLASS_SIZE }, + { set_local_name, MGMT_SET_LOCAL_NAME_SIZE }, + { add_uuid, MGMT_ADD_UUID_SIZE }, + { remove_uuid, MGMT_REMOVE_UUID_SIZE }, + { load_link_keys, MGMT_LOAD_LINK_KEYS_SIZE, + HCI_MGMT_VAR_LEN }, + { load_long_term_keys, MGMT_LOAD_LONG_TERM_KEYS_SIZE, + HCI_MGMT_VAR_LEN }, + { disconnect, MGMT_DISCONNECT_SIZE }, + { get_connections, MGMT_GET_CONNECTIONS_SIZE }, + { pin_code_reply, MGMT_PIN_CODE_REPLY_SIZE }, + { pin_code_neg_reply, MGMT_PIN_CODE_NEG_REPLY_SIZE }, + { set_io_capability, MGMT_SET_IO_CAPABILITY_SIZE }, + { pair_device, MGMT_PAIR_DEVICE_SIZE }, + { cancel_pair_device, MGMT_CANCEL_PAIR_DEVICE_SIZE }, + { unpair_device, MGMT_UNPAIR_DEVICE_SIZE }, + { user_confirm_reply, MGMT_USER_CONFIRM_REPLY_SIZE }, + { user_confirm_neg_reply, MGMT_USER_CONFIRM_NEG_REPLY_SIZE }, + { user_passkey_reply, MGMT_USER_PASSKEY_REPLY_SIZE }, + { user_passkey_neg_reply, MGMT_USER_PASSKEY_NEG_REPLY_SIZE }, + { read_local_oob_data, MGMT_READ_LOCAL_OOB_DATA_SIZE }, + { add_remote_oob_data, MGMT_ADD_REMOTE_OOB_DATA_SIZE, + HCI_MGMT_VAR_LEN }, + { remove_remote_oob_data, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE }, + { start_discovery, MGMT_START_DISCOVERY_SIZE }, + { stop_discovery, MGMT_STOP_DISCOVERY_SIZE }, + { confirm_name, MGMT_CONFIRM_NAME_SIZE }, + { block_device, MGMT_BLOCK_DEVICE_SIZE }, + { unblock_device, MGMT_UNBLOCK_DEVICE_SIZE }, + { set_device_id, MGMT_SET_DEVICE_ID_SIZE }, + { set_advertising, MGMT_SETTING_SIZE }, + { set_bredr, MGMT_SETTING_SIZE }, + { set_static_address, MGMT_SET_STATIC_ADDRESS_SIZE }, + { set_scan_params, MGMT_SET_SCAN_PARAMS_SIZE }, + { set_secure_conn, MGMT_SETTING_SIZE }, + { set_debug_keys, MGMT_SETTING_SIZE }, + { set_privacy, MGMT_SET_PRIVACY_SIZE }, + { load_irks, MGMT_LOAD_IRKS_SIZE, + HCI_MGMT_VAR_LEN }, + { get_conn_info, MGMT_GET_CONN_INFO_SIZE }, + { get_clock_info, MGMT_GET_CLOCK_INFO_SIZE }, + { add_device, MGMT_ADD_DEVICE_SIZE }, + { remove_device, MGMT_REMOVE_DEVICE_SIZE }, + { load_conn_param, MGMT_LOAD_CONN_PARAM_SIZE, + HCI_MGMT_VAR_LEN }, + { read_unconf_index_list, MGMT_READ_UNCONF_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_config_info, MGMT_READ_CONFIG_INFO_SIZE, + HCI_MGMT_UNCONFIGURED | + HCI_MGMT_UNTRUSTED }, + { set_external_config, MGMT_SET_EXTERNAL_CONFIG_SIZE, + HCI_MGMT_UNCONFIGURED }, + { set_public_address, MGMT_SET_PUBLIC_ADDRESS_SIZE, + HCI_MGMT_UNCONFIGURED }, + { start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE, + HCI_MGMT_VAR_LEN }, + { read_local_oob_ext_data, MGMT_READ_LOCAL_OOB_EXT_DATA_SIZE }, + { read_ext_index_list, MGMT_READ_EXT_INDEX_LIST_SIZE, + HCI_MGMT_NO_HDEV | + HCI_MGMT_UNTRUSTED }, + { read_adv_features, MGMT_READ_ADV_FEATURES_SIZE }, + { add_advertising, MGMT_ADD_ADVERTISING_SIZE, + HCI_MGMT_VAR_LEN }, + { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, +}; + void mgmt_index_added(struct hci_dev *hdev) { - if (hdev->dev_type != HCI_BREDR) - return; + struct mgmt_ev_ext_index ev; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) - mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL); - else - mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); + switch (hdev->dev_type) { + case HCI_BREDR: + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, + NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; + } + break; + case HCI_AMP: + ev.type = 0x02; + break; + default: + return; + } + + ev.bus = hdev->bus; + + mgmt_index_event(MGMT_EV_EXT_INDEX_ADDED, hdev, &ev, sizeof(ev), + HCI_MGMT_EXT_INDEX_EVENTS); } void mgmt_index_removed(struct hci_dev *hdev) { + struct mgmt_ev_ext_index ev; u8 status = MGMT_STATUS_INVALID_INDEX; - if (hdev->dev_type != HCI_BREDR) + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + switch (hdev->dev_type) { + case HCI_BREDR: + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, + NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; + } + break; + case HCI_AMP: + ev.type = 0x02; + break; + default: return; + } - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + ev.bus = hdev->bus; - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) - mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL); - else - mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); + mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev), + HCI_MGMT_EXT_INDEX_EVENTS); } /* This function requires the caller holds hdev->lock */ @@ -6367,7 +7365,7 @@ static int powered_update_hci(struct hci_dev *hdev) hci_req_init(&req, hdev); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && !lmp_host_ssp_capable(hdev)) { u8 mode = 0x01; @@ -6381,7 +7379,7 @@ static int powered_update_hci(struct hci_dev *hdev) } } - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && lmp_bredr_capable(hdev)) { struct hci_cp_write_le_host_supported cp; @@ -6402,24 +7400,28 @@ static int powered_update_hci(struct hci_dev *hdev) * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { update_adv_data(&req); update_scan_rsp_data(&req); } - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) enable_advertising(&req); restart_le_actions(&req); } - link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(link_sec), &link_sec); if (lmp_bredr_capable(hdev)) { - write_fast_connectable(&req, false); + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) + write_fast_connectable(&req, true); + else + write_fast_connectable(&req, false); __hci_update_page_scan(&req); update_class(&req); update_name(&req); @@ -6435,7 +7437,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) u8 status, zero_cod[] = { 0, 0, 0 }; int err; - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_MGMT)) return 0; if (powered) { @@ -6456,7 +7458,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) * been triggered, potentially causing misleading DISCONNECTED * status responses. */ - if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) status = MGMT_STATUS_INVALID_INDEX; else status = MGMT_STATUS_NOT_POWERED; @@ -6464,8 +7466,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); new_settings: err = new_settings(hdev, match.sk); @@ -6478,10 +7480,10 @@ new_settings: void mgmt_set_powered_failed(struct hci_dev *hdev, int err) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status; - cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev); + cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return; @@ -6490,7 +7492,7 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err) else status = MGMT_STATUS_FAILED; - cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); mgmt_pending_remove(cmd); } @@ -6506,17 +7508,23 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) * of a timeout triggered from general discoverable, it is * safe to unconditionally clear the flag. */ - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_req_init(&req, hdev); - if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { u8 scan = SCAN_PAGE; hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); } update_class(&req); - update_adv_data(&req); + + /* Advertising instances don't use the global discoverable setting, so + * only update AD if advertising was enabled using Set Advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + update_adv_data(&req); + hci_req_run(&req, NULL); hdev->discov_timeout = 0; @@ -6654,7 +7662,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); - ev.key.master = csrk->master; + ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL); @@ -6681,17 +7689,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, mgmt_event(MGMT_EV_NEW_CONN_PARAM, hdev, &ev, sizeof(ev), NULL); } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u32 flags, u8 *name, u8 name_len) { @@ -6729,7 +7726,7 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + eir_len, NULL); } -static void disconnect_rsp(struct pending_cmd *cmd, void *data) +static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct sock **sk = data; @@ -6741,7 +7738,7 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_remove(cmd); } -static void unpair_device_rsp(struct pending_cmd *cmd, void *data) +static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct hci_dev *hdev = data; struct mgmt_cp_unpair_device *cp = cmd->param; @@ -6754,10 +7751,10 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data) bool mgmt_powering_down(struct hci_dev *hdev) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; - cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev); + cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return false; @@ -6809,12 +7806,12 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, { u8 bdaddr_type = link_to_bdaddr(link_type, addr_type); struct mgmt_cp_disconnect *cp; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, hdev); - cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, hdev); + cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) return; @@ -6864,9 +7861,9 @@ void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure) void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); + cmd = pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); if (!cmd) return; @@ -6877,9 +7874,9 @@ void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); + cmd = pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); if (!cmd) return; @@ -6922,9 +7919,9 @@ static int user_pairing_resp_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status, u8 opcode) { - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_find(opcode, hdev); + cmd = pending_find(opcode, hdev); if (!cmd) return -ENOENT; @@ -6983,7 +7980,7 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status) { struct mgmt_ev_auth_failed ev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; u8 status = mgmt_status(hci_status); bacpy(&ev.addr.bdaddr, &conn->dst); @@ -7014,11 +8011,9 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) } if (test_bit(HCI_AUTH, &hdev->flags)) - changed = !test_and_set_bit(HCI_LINK_SECURITY, - &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_LINK_SECURITY); else - changed = test_and_clear_bit(HCI_LINK_SECURITY, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, &match); @@ -7054,9 +8049,9 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - if (enable && test_and_clear_bit(HCI_SSP_ENABLED, - &hdev->dev_flags)) { - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + if (enable && hci_dev_test_and_clear_flag(hdev, + HCI_SSP_ENABLED)) { + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); new_settings(hdev, NULL); } @@ -7066,14 +8061,14 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) } if (enable) { - changed = !test_and_set_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); } else { - changed = test_and_clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); if (!changed) - changed = test_and_clear_bit(HCI_HS_ENABLED, - &hdev->dev_flags); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_HS_ENABLED); else - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); @@ -7086,8 +8081,8 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_init(&req, hdev); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { - if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(enable), &enable); update_eir(&req); @@ -7098,7 +8093,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_run(&req, NULL); } -static void sk_lookup(struct pending_cmd *cmd, void *data) +static void sk_lookup(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -7118,8 +8113,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, - NULL); + mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + dev_class, 3, NULL); if (match.sk) sock_put(match.sk); @@ -7128,7 +8123,7 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) { struct mgmt_cp_set_local_name ev; - struct pending_cmd *cmd; + struct mgmt_pending_cmd *cmd; if (status) return; @@ -7137,55 +8132,19 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); - cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) { memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); /* If this is a HCI command related to powering on the * HCI dev don't send any mgmt signals. */ - if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) + if (pending_find(MGMT_OP_SET_POWERED, hdev)) return; } - mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); -} - -void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, - u8 *rand192, u8 *hash256, u8 *rand256, - u8 status) -{ - struct pending_cmd *cmd; - - BT_DBG("%s status %u", hdev->name, status); - - cmd = mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); - if (!cmd) - return; - - if (status) { - cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - mgmt_status(status)); - } else { - struct mgmt_rp_read_local_oob_data rp; - size_t rp_size = sizeof(rp); - - memcpy(rp.hash192, hash192, sizeof(rp.hash192)); - memcpy(rp.rand192, rand192, sizeof(rp.rand192)); - - if (bredr_sc_enabled(hdev) && hash256 && rand256) { - memcpy(rp.hash256, hash256, sizeof(rp.hash256)); - memcpy(rp.rand256, rand256, sizeof(rp.rand256)); - } else { - rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256); - } - - cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, rp_size); - } - - mgmt_pending_remove(cmd); + mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) @@ -7258,7 +8217,7 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) static void restart_le_scan(struct hci_dev *hdev) { /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, @@ -7270,14 +8229,58 @@ static void restart_le_scan(struct hci_dev *hdev) DISCOV_LE_RESTART_DELAY); } +static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, + u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) +{ + /* If a RSSI threshold has been specified, and + * HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, then all results with + * a RSSI smaller than the RSSI threshold will be dropped. If the quirk + * is set, let it through for further processing, as we might need to + * restart the scan. + * + * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, + * the results are also dropped. + */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + (rssi == HCI_RSSI_INVALID || + (rssi < hdev->discovery.rssi && + !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) + return false; + + if (hdev->discovery.uuid_count != 0) { + /* If a list of UUIDs is provided in filter, results with no + * matching UUID should be dropped. + */ + if (!eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, + hdev->discovery.uuids) && + !eir_has_uuids(scan_rsp, scan_rsp_len, + hdev->discovery.uuid_count, + hdev->discovery.uuids)) + return false; + } + + /* If duplicate filtering does not report RSSI changes, then restart + * scanning to ensure updated result with updated RSSI values. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { + restart_le_scan(hdev); + + /* Validate RSSI value against the RSSI threshold once more. */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + rssi < hdev->discovery.rssi) + return false; + } + + return true; +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { char buf[512]; - struct mgmt_ev_device_found *ev = (void *) buf; + struct mgmt_ev_device_found *ev = (void *)buf; size_t ev_size; - bool match; /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which @@ -7290,21 +8293,12 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } - /* When using service discovery with a RSSI threshold, then check - * if such a RSSI threshold is specified. If a RSSI threshold has - * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, - * then all results with a RSSI smaller than the RSSI threshold will be - * dropped. If the quirk is set, let it through for further processing, - * as we might need to restart the scan. - * - * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, - * the results are also dropped. - */ - if (hdev->discovery.rssi != HCI_RSSI_INVALID && - (rssi == HCI_RSSI_INVALID || - (rssi < hdev->discovery.rssi && - !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) - return; + if (hdev->discovery.result_filtering) { + /* We are using service discovery */ + if (!is_filter_match(hdev, rssi, eir, eir_len, scan_rsp, + scan_rsp_len)) + return; + } /* Make sure that the buffer is big enough. The 5 extra bytes * are for the potential CoD field. @@ -7331,87 +8325,17 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, ev->rssi = rssi; ev->flags = cpu_to_le32(flags); - if (eir_len > 0) { - /* When using service discovery and a list of UUID is - * provided, results with no matching UUID should be - * dropped. In case there is a match the result is - * kept and checking possible scan response data - * will be skipped. - */ - if (hdev->discovery.uuid_count > 0) { - match = eir_has_uuids(eir, eir_len, - hdev->discovery.uuid_count, - hdev->discovery.uuids); - /* If duplicate filtering does not report RSSI changes, - * then restart scanning to ensure updated result with - * updated RSSI values. - */ - if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks)) - restart_le_scan(hdev); - } else { - match = true; - } - - if (!match && !scan_rsp_len) - return; - + if (eir_len > 0) /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); - } else { - /* When using service discovery and a list of UUID is - * provided, results with empty EIR or advertising data - * should be dropped since they do not match any UUID. - */ - if (hdev->discovery.uuid_count > 0 && !scan_rsp_len) - return; - - match = false; - } if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); - if (scan_rsp_len > 0) { - /* When using service discovery and a list of UUID is - * provided, results with no matching UUID should be - * dropped if there is no previous match from the - * advertising data. - */ - if (hdev->discovery.uuid_count > 0) { - if (!match && !eir_has_uuids(scan_rsp, scan_rsp_len, - hdev->discovery.uuid_count, - hdev->discovery.uuids)) - return; - - /* If duplicate filtering does not report RSSI changes, - * then restart scanning to ensure updated result with - * updated RSSI values. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks)) - restart_le_scan(hdev); - } - + if (scan_rsp_len > 0) /* Append scan response data to event */ memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); - } else { - /* When using service discovery and a list of UUID is - * provided, results with empty scan response and no - * previous matched advertising data should be dropped. - */ - if (hdev->discovery.uuid_count > 0 && !match) - return; - } - - /* Validate the reported RSSI value against the RSSI threshold once more - * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE - * scanning. - */ - if (hdev->discovery.rssi != HCI_RSSI_INVALID && - rssi < hdev->discovery.rssi) - return; ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); ev_size = sizeof(*ev) + eir_len + scan_rsp_len; @@ -7464,10 +8388,28 @@ void mgmt_reenable_advertising(struct hci_dev *hdev) { struct hci_request req; - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) return; hci_req_init(&req, hdev); enable_advertising(&req); hci_req_run(&req, adv_enable_complete); } + +static struct hci_mgmt_chan chan = { + .channel = HCI_CHANNEL_CONTROL, + .handler_count = ARRAY_SIZE(mgmt_handlers), + .handlers = mgmt_handlers, + .hdev_init = mgmt_init_hdev, +}; + +int mgmt_init(void) +{ + return hci_mgmt_chan_register(&chan); +} + +void mgmt_exit(void) +{ + hci_mgmt_chan_unregister(&chan); +} diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c new file mode 100644 index 000000000000..8c30c7eb8bef --- /dev/null +++ b/net/bluetooth/mgmt_util.c @@ -0,0 +1,210 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2015 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> + +#include "mgmt_util.h" + +int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, + void *data, u16 data_len, int flag, struct sock *skip_sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + if (hdev) + hdr->index = cpu_to_le16(hdev->id); + else + hdr->index = cpu_to_le16(MGMT_INDEX_NONE); + hdr->len = cpu_to_le16(data_len); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + /* Time stamp */ + __net_timestamp(skb); + + hci_send_to_channel(channel, skb, flag, skip_sk); + kfree_skb(skb); + + return 0; +} + +int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_status *ev; + int err; + + BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(sizeof(*ev)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + ev->status = status; + ev->opcode = cpu_to_le16(cmd); + + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + + return err; +} + +int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, + void *rp, size_t rp_len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + int err; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); + + ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); + ev->opcode = cpu_to_le16(cmd); + ev->status = status; + + if (rp) + memcpy(ev->data, rp, rp_len); + + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + + return err; +} + +struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, + struct hci_dev *hdev) +{ + struct mgmt_pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (hci_sock_get_channel(cmd->sk) != channel) + continue; + if (cmd->opcode == opcode) + return cmd; + } + + return NULL; +} + +struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, + u16 opcode, + struct hci_dev *hdev, + const void *data) +{ + struct mgmt_pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (cmd->user_data != data) + continue; + if (cmd->opcode == opcode) + return cmd; + } + + return NULL; +} + +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, + void (*cb)(struct mgmt_pending_cmd *cmd, void *data), + void *data) +{ + struct mgmt_pending_cmd *cmd, *tmp; + + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { + if (opcode > 0 && cmd->opcode != opcode) + continue; + + cb(cmd, data); + } +} + +struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_pending_cmd *cmd; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->opcode = opcode; + cmd->index = hdev->id; + + cmd->param = kmemdup(data, len, GFP_KERNEL); + if (!cmd->param) { + kfree(cmd); + return NULL; + } + + cmd->param_len = len; + + cmd->sk = sk; + sock_hold(sk); + + list_add(&cmd->list, &hdev->mgmt_pending); + + return cmd; +} + +void mgmt_pending_free(struct mgmt_pending_cmd *cmd) +{ + sock_put(cmd->sk); + kfree(cmd->param); + kfree(cmd); +} + +void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) +{ + list_del(&cmd->list); + mgmt_pending_free(cmd); +} diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h new file mode 100644 index 000000000000..6559f189213c --- /dev/null +++ b/net/bluetooth/mgmt_util.h @@ -0,0 +1,53 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2015 Intel Coropration + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct mgmt_pending_cmd { + struct list_head list; + u16 opcode; + int index; + void *param; + size_t param_len; + struct sock *sk; + void *user_data; + int (*cmd_complete)(struct mgmt_pending_cmd *cmd, u8 status); +}; + +int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, + void *data, u16 data_len, int flag, struct sock *skip_sk); +int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status); +int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, + void *rp, size_t rp_len); + +struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, + struct hci_dev *hdev); +struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, + u16 opcode, + struct hci_dev *hdev, + const void *data); +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, + void (*cb)(struct mgmt_pending_cmd *cmd, void *data), + void *data); +struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + struct hci_dev *hdev, + void *data, u16 len); +void mgmt_pending_free(struct mgmt_pending_cmd *cmd); +void mgmt_pending_remove(struct mgmt_pending_cmd *cmd); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 3c6d2c8ac1a4..825e8fb5114b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * return 0; } -static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -615,8 +615,8 @@ done: return sent; } -static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return 0; } - len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags); + len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 76321b546e84..4322c833e748 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len return 0; } -static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; int err; @@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) } } -static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct sco_pinfo *pi = sco_pi(sk); @@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); - return bt_sock_recvmsg(iocb, sock, msg, len, flags); + return bt_sock_recvmsg(sock, msg, len, flags); } static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) @@ -1083,9 +1083,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -void sco_connect_cfm(struct hci_conn *hcon, __u8 status) +static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); + if (!status) { struct sco_conn *conn; @@ -1096,8 +1100,11 @@ void sco_connect_cfm(struct hci_conn *hcon, __u8 status) sco_conn_del(hcon, bt_to_errno(status)); } -void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1122,6 +1129,12 @@ drop: return 0; } +static struct hci_cb sco_cb = { + .name = "SCO", + .connect_cfm = sco_connect_cfm, + .disconn_cfm = sco_disconn_cfm, +}; + static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; @@ -1203,6 +1216,8 @@ int __init sco_init(void) BT_INFO("SCO socket layer initialized"); + hci_register_cb(&sco_cb); + if (IS_ERR_OR_NULL(bt_debugfs)) return 0; @@ -1216,12 +1231,14 @@ error: return err; } -void __exit sco_exit(void) +void sco_exit(void) { bt_procfs_cleanup(&init_net, "sco"); debugfs_remove(sco_debugfs); + hci_unregister_cb(&sco_cb); + bt_sock_unregister(BTPROTO_SCO); proto_unregister(&sco_proto); diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index 378f4064952c..dc688f13e496 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/debugfs.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -154,6 +156,21 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], return 0; } +static char test_ecdh_buffer[32]; + +static ssize_t test_ecdh_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(user_buf, count, ppos, test_ecdh_buffer, + strlen(test_ecdh_buffer)); +} + +static const struct file_operations test_ecdh_fops = { + .open = simple_open, + .read = test_ecdh_read, + .llseek = default_llseek, +}; + static int __init test_ecdh(void) { ktime_t calltime, delta, rettime; @@ -165,19 +182,19 @@ static int __init test_ecdh(void) err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); if (err) { BT_ERR("ECDH sample 1 failed"); - return err; + goto done; } err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); if (err) { BT_ERR("ECDH sample 2 failed"); - return err; + goto done; } err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); if (err) { BT_ERR("ECDH sample 3 failed"); - return err; + goto done; } rettime = ktime_get(); @@ -186,7 +203,17 @@ static int __init test_ecdh(void) BT_INFO("ECDH test passed in %llu usecs", duration); - return 0; +done: + if (!err) + snprintf(test_ecdh_buffer, sizeof(test_ecdh_buffer), + "PASS (%llu usecs)\n", duration); + else + snprintf(test_ecdh_buffer, sizeof(test_ecdh_buffer), "FAIL\n"); + + debugfs_create_file("selftest_ecdh", 0444, bt_debugfs, NULL, + &test_ecdh_fops); + + return err; } #else diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c09a821f381d..1ab3dc9c8f99 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -52,7 +52,7 @@ #define SMP_TIMEOUT msecs_to_jiffies(30000) -#define AUTH_REQ_MASK(dev) (test_bit(HCI_SC_ENABLED, &(dev)->dev_flags) ? \ +#define AUTH_REQ_MASK(dev) (hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \ 0x1f : 0x07) #define KEY_DIST_MASK 0x07 @@ -70,7 +70,19 @@ enum { SMP_FLAG_DEBUG_KEY, SMP_FLAG_WAIT_USER, SMP_FLAG_DHKEY_PENDING, - SMP_FLAG_OOB, + SMP_FLAG_REMOTE_OOB, + SMP_FLAG_LOCAL_OOB, +}; + +struct smp_dev { + /* Secure Connections OOB data */ + u8 local_pk[64]; + u8 local_sk[32]; + u8 local_rand[16]; + bool debug_key; + + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; }; struct smp_chan { @@ -84,7 +96,8 @@ struct smp_chan { u8 rrnd[16]; /* SMP Pairing Random (remote) */ u8 pcnf[16]; /* SMP Pairing Confirm */ u8 tk[16]; /* SMP Temporary Key */ - u8 rr[16]; + u8 rr[16]; /* Remote OOB ra/rb value */ + u8 lr[16]; /* Local OOB ra/rb value */ u8 enc_key_size; u8 remote_key_dist; bdaddr_t id_addr; @@ -478,18 +491,18 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], const bdaddr_t *bdaddr) { struct l2cap_chan *chan = hdev->smp_data; - struct crypto_blkcipher *tfm; + struct smp_dev *smp; u8 hash[3]; int err; if (!chan || !chan->data) return false; - tfm = chan->data; + smp = chan->data; BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk); - err = smp_ah(tfm, irk, &bdaddr->b[3], hash); + err = smp_ah(smp->tfm_aes, irk, &bdaddr->b[3], hash); if (err) return false; @@ -499,20 +512,20 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) { struct l2cap_chan *chan = hdev->smp_data; - struct crypto_blkcipher *tfm; + struct smp_dev *smp; int err; if (!chan || !chan->data) return -EOPNOTSUPP; - tfm = chan->data; + smp = chan->data; get_random_bytes(&rpa->b[3], 3); rpa->b[5] &= 0x3f; /* Clear two most significant bits */ rpa->b[5] |= 0x40; /* Set second most significant bit */ - err = smp_ah(tfm, irk, &rpa->b[3], rpa->b); + err = smp_ah(smp->tfm_aes, irk, &rpa->b[3], rpa->b); if (err < 0) return err; @@ -521,6 +534,53 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) return 0; } +int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) +{ + struct l2cap_chan *chan = hdev->smp_data; + struct smp_dev *smp; + int err; + + if (!chan || !chan->data) + return -EOPNOTSUPP; + + smp = chan->data; + + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { + BT_DBG("Using debug keys"); + memcpy(smp->local_pk, debug_pk, 64); + memcpy(smp->local_sk, debug_sk, 32); + smp->debug_key = true; + } else { + while (true) { + /* Generate local key pair for Secure Connections */ + if (!ecc_make_key(smp->local_pk, smp->local_sk)) + return -EIO; + + /* This is unlikely, but we need to check that + * we didn't accidentially generate a debug key. + */ + if (memcmp(smp->local_sk, debug_sk, 32)) + break; + } + smp->debug_key = false; + } + + SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); + SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); + SMP_DBG("OOB Private Key: %32phN", smp->local_sk); + + get_random_bytes(smp->local_rand, 16); + + err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->local_pk, + smp->local_rand, 0, hash); + if (err < 0) + return err; + + memcpy(rand, smp->local_rand, 16); + + return 0; +} + static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) { struct l2cap_chan *chan = conn->smp; @@ -589,7 +649,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, struct hci_dev *hdev = hcon->hdev; u8 local_dist = 0, remote_dist = 0, oob_flag = SMP_OOB_NOT_PRESENT; - if (test_bit(HCI_BONDABLE, &conn->hcon->hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) { local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; authreq |= SMP_AUTH_BONDING; @@ -597,18 +657,18 @@ static void build_pairing_cmd(struct l2cap_conn *conn, authreq &= ~SMP_AUTH_BONDING; } - if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING)) remote_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) local_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags) && + if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && (authreq & SMP_AUTH_SC)) { struct oob_data *oob_data; u8 bdaddr_type; - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { local_dist |= SMP_DIST_LINK_KEY; remote_dist |= SMP_DIST_LINK_KEY; } @@ -621,10 +681,12 @@ static void build_pairing_cmd(struct l2cap_conn *conn, oob_data = hci_find_remote_oob_data(hdev, &hcon->dst, bdaddr_type); if (oob_data && oob_data->present) { - set_bit(SMP_FLAG_OOB, &smp->flags); + set_bit(SMP_FLAG_REMOTE_OOB, &smp->flags); oob_flag = SMP_OOB_PRESENT; memcpy(smp->rr, oob_data->rand256, 16); memcpy(smp->pcnf, oob_data->hash256, 16); + SMP_DBG("OOB Remote Confirmation: %16phN", smp->pcnf); + SMP_DBG("OOB Remote Random: %16phN", smp->rr); } } else { @@ -681,9 +743,9 @@ static void smp_chan_destroy(struct l2cap_conn *conn) complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags); mgmt_smp_complete(hcon, complete); - kfree(smp->csrk); - kfree(smp->slave_csrk); - kfree(smp->link_key); + kzfree(smp->csrk); + kzfree(smp->slave_csrk); + kzfree(smp->link_key); crypto_free_blkcipher(smp->tfm_aes); crypto_free_hash(smp->tfm_cmac); @@ -692,7 +754,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) * support hasn't been explicitly enabled. */ if (smp->ltk && smp->ltk->type == SMP_LTK_P256_DEBUG && - !test_bit(HCI_KEEP_DEBUG_KEYS, &hcon->hdev->dev_flags)) { + !hci_dev_test_flag(hcon->hdev, HCI_KEEP_DEBUG_KEYS)) { list_del_rcu(&smp->ltk->list); kfree_rcu(smp->ltk, rcu); smp->ltk = NULL; @@ -717,7 +779,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) } chan->data = NULL; - kfree(smp); + kzfree(smp); hci_conn_drop(hcon); } @@ -818,6 +880,12 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, return 0; } + /* If this function is used for SC -> legacy fallback we + * can only recover the just-works case. + */ + if (test_bit(SMP_FLAG_SC, &smp->flags)) + return -EINVAL; + /* Not Just Works/Confirm results in MITM Authentication */ if (smp->method != JUST_CFM) { set_bit(SMP_FLAG_MITM_AUTH, &smp->flags); @@ -1052,7 +1120,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) /* Don't keep debug keys around if the relevant * flag is not set. */ - if (!test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS) && key->type == HCI_LK_DEBUG_COMBINATION) { list_del_rcu(&key->list); kfree_rcu(key, rcu); @@ -1097,13 +1165,13 @@ static void sc_generate_link_key(struct smp_chan *smp) return; if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) { - kfree(smp->link_key); + kzfree(smp->link_key); smp->link_key = NULL; return; } if (smp_h6(smp->tfm_cmac, smp->link_key, lebr, smp->link_key)) { - kfree(smp->link_key); + kzfree(smp->link_key); smp->link_key = NULL; return; } @@ -1252,7 +1320,10 @@ static void smp_distribute_keys(struct smp_chan *smp) csrk = kzalloc(sizeof(*csrk), GFP_KERNEL); if (csrk) { - csrk->master = 0x00; + if (hcon->sec_level > BT_SECURITY_MEDIUM) + csrk->type = MGMT_CSRK_LOCAL_AUTHENTICATED; + else + csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED; memcpy(csrk->val, sign.csrk, sizeof(csrk->val)); } smp->slave_csrk = csrk; @@ -1297,7 +1368,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(smp->tfm_aes)) { BT_ERR("Unable to create ECB crypto context"); - kfree(smp); + kzfree(smp); return NULL; } @@ -1305,7 +1376,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) if (IS_ERR(smp->tfm_cmac)) { BT_ERR("Unable to create CMAC crypto context"); crypto_free_blkcipher(smp->tfm_aes); - kfree(smp); + kzfree(smp); return NULL; } @@ -1601,15 +1672,15 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp, struct hci_dev *hdev = conn->hcon->hdev; u8 local_dist = 0, remote_dist = 0; - if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) { + if (hci_dev_test_flag(hdev, HCI_BONDABLE)) { local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; } - if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING)) remote_dist |= SMP_DIST_ID_KEY; - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) local_dist |= SMP_DIST_ID_KEY; if (!rsp) { @@ -1661,22 +1732,29 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); - if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && (auth & SMP_AUTH_BONDING)) return SMP_PAIRING_NOTSUPP; - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); skb_pull(skb, sizeof(*req)); + /* If the remote side's OOB flag is set it means it has + * successfully received our local OOB data - therefore set the + * flag to indicate that local OOB is in use. + */ + if (req->oob_flag == SMP_OOB_PRESENT) + set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags); + /* SMP over BR/EDR requires special treatment */ if (conn->hcon->type == ACL_LINK) { /* We must have a BR/EDR SC link */ if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) && - !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return SMP_CROSS_TRANSP_NOT_ALLOWED; set_bit(SMP_FLAG_SC, &smp->flags); @@ -1734,14 +1812,19 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) clear_bit(SMP_FLAG_INITIATOR, &smp->flags); + /* Strictly speaking we shouldn't allow Pairing Confirm for the + * SC case, however some implementations incorrectly copy RFU auth + * req bits from our security request, which may create a false + * positive SC enablement. + */ + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); + if (test_bit(SMP_FLAG_SC, &smp->flags)) { SMP_ALLOW_CMD(smp, SMP_CMD_PUBLIC_KEY); /* Clear bits which are generated but not distributed */ smp->remote_key_dist &= ~SMP_SC_NO_DIST; /* Wait for Public Key from Initiating Device */ return 0; - } else { - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); } /* Request setup of TK */ @@ -1758,7 +1841,26 @@ static u8 sc_send_public_key(struct smp_chan *smp) BT_DBG(""); - if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) { + if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) { + struct l2cap_chan *chan = hdev->smp_data; + struct smp_dev *smp_dev; + + if (!chan || !chan->data) + return SMP_UNSPECIFIED; + + smp_dev = chan->data; + + memcpy(smp->local_pk, smp_dev->local_pk, 64); + memcpy(smp->local_sk, smp_dev->local_sk, 32); + memcpy(smp->lr, smp_dev->local_rand, 16); + + if (smp_dev->debug_key) + set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); + + goto done; + } + + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); memcpy(smp->local_pk, debug_pk, 64); memcpy(smp->local_sk, debug_sk, 32); @@ -1777,8 +1879,9 @@ static u8 sc_send_public_key(struct smp_chan *smp) } } +done: SMP_DBG("Local Public Key X: %32phN", smp->local_pk); - SMP_DBG("Local Public Key Y: %32phN", &smp->local_pk[32]); + SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); SMP_DBG("Local Private Key: %32phN", smp->local_sk); smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); @@ -1813,9 +1916,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) auth = rsp->auth_req & AUTH_REQ_MASK(hdev); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; + /* If the remote side's OOB flag is set it means it has + * successfully received our local OOB data - therefore set the + * flag to indicate that local OOB is in use. + */ + if (rsp->oob_flag == SMP_OOB_PRESENT) + set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags); + smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], rsp, sizeof(*rsp)); @@ -1882,10 +1992,6 @@ static u8 sc_check_confirm(struct smp_chan *smp) BT_DBG(""); - /* Public Key exchange must happen before any other steps */ - if (!test_bit(SMP_FLAG_REMOTE_PK, &smp->flags)) - return SMP_UNSPECIFIED; - if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); @@ -1898,6 +2004,47 @@ static u8 sc_check_confirm(struct smp_chan *smp) return 0; } +/* Work-around for some implementations that incorrectly copy RFU bits + * from our security request and thereby create the impression that + * we're doing SC when in fact the remote doesn't support it. + */ +static int fixup_sc_false_positive(struct smp_chan *smp) +{ + struct l2cap_conn *conn = smp->conn; + struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; + struct smp_cmd_pairing *req, *rsp; + u8 auth; + + /* The issue is only observed when we're in slave role */ + if (hcon->out) + return SMP_UNSPECIFIED; + + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { + BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); + return SMP_UNSPECIFIED; + } + + BT_ERR("Trying to fall back to legacy SMP"); + + req = (void *) &smp->preq[1]; + rsp = (void *) &smp->prsp[1]; + + /* Rebuild key dist flags which may have been cleared for SC */ + smp->remote_key_dist = (req->init_key_dist & rsp->resp_key_dist); + + auth = req->auth_req & AUTH_REQ_MASK(hdev); + + if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { + BT_ERR("Failed to fall back to legacy SMP"); + return SMP_UNSPECIFIED; + } + + clear_bit(SMP_FLAG_SC, &smp->flags); + + return 0; +} + static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { struct l2cap_chan *chan = conn->smp; @@ -1911,8 +2058,19 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf)); skb_pull(skb, sizeof(smp->pcnf)); - if (test_bit(SMP_FLAG_SC, &smp->flags)) - return sc_check_confirm(smp); + if (test_bit(SMP_FLAG_SC, &smp->flags)) { + int ret; + + /* Public Key exchange must happen before any other steps */ + if (test_bit(SMP_FLAG_REMOTE_PK, &smp->flags)) + return sc_check_confirm(smp); + + BT_ERR("Unexpected SMP Pairing Confirm"); + + ret = fixup_sc_false_positive(smp); + if (ret) + return ret; + } if (conn->hcon->out) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -1923,8 +2081,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) return smp_confirm(smp); - else - set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); + + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); return 0; } @@ -2083,7 +2241,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) auth = rp->auth_req & AUTH_REQ_MASK(hdev); - if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC)) + if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC)) return SMP_AUTH_REQUIREMENTS; if (hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT) @@ -2104,7 +2262,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!smp) return SMP_UNSPECIFIED; - if (!test_bit(HCI_BONDABLE, &hcon->hdev->dev_flags) && + if (!hci_dev_test_flag(hdev, HCI_BONDABLE) && (auth & SMP_AUTH_BONDING)) return SMP_PAIRING_NOTSUPP; @@ -2138,7 +2296,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) chan = conn->smp; - if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) + if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) @@ -2167,7 +2325,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq = seclevel_to_authreq(sec_level); - if (test_bit(HCI_SC_ENABLED, &hcon->hdev->dev_flags)) + if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED)) authreq |= SMP_AUTH_SC; /* Require MITM if IO Capability allows or the security level @@ -2352,7 +2510,10 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) csrk = kzalloc(sizeof(*csrk), GFP_KERNEL); if (csrk) { - csrk->master = 0x01; + if (conn->hcon->sec_level > BT_SECURITY_MEDIUM) + csrk->type = MGMT_CSRK_REMOTE_AUTHENTICATED; + else + csrk->type = MGMT_CSRK_REMOTE_UNAUTHENTICATED; memcpy(csrk->val, rp->csrk, sizeof(csrk->val)); } smp->csrk = csrk; @@ -2368,7 +2529,8 @@ static u8 sc_select_method(struct smp_chan *smp) struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; - if (test_bit(SMP_FLAG_OOB, &smp->flags)) + if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags) || + test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) return REQ_OOB; /* The preq/prsp contain the raw Pairing Request/Response PDUs @@ -2422,6 +2584,16 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) memcpy(smp->remote_pk, key, 64); + if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) { + err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->remote_pk, + smp->rr, 0, cfm.confirm_val); + if (err) + return SMP_UNSPECIFIED; + + if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + return SMP_CONFIRM_FAILED; + } + /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ @@ -2432,7 +2604,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); - SMP_DBG("Remote Public Key Y: %32phN", &smp->remote_pk[32]); + SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); if (!ecdh_shared_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) return SMP_UNSPECIFIED; @@ -2470,14 +2642,6 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->remote_pk, - smp->rr, 0, cfm.confirm_val); - if (err) - return SMP_UNSPECIFIED; - - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) - return SMP_CONFIRM_FAILED; - if (hcon->out) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2550,6 +2714,8 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) put_unaligned_le32(hcon->passkey_notify, r); + else if (smp->method == REQ_OOB) + memcpy(r, smp->lr, 16); err = smp_f6(smp->tfm_cmac, smp->mackey, smp->rrnd, smp->prnd, r, io_cap, remote_addr, local_addr, e); @@ -2600,7 +2766,7 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb) if (skb->len < 1) return -EILSEQ; - if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) { + if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) { reason = SMP_PAIRING_NOTSUPP; goto done; } @@ -2738,16 +2904,16 @@ static void bredr_pairing(struct l2cap_chan *chan) return; /* Secure Connections support must be enabled */ - if (!test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_SC_ENABLED)) return; /* BR/EDR must use Secure Connections for SMP */ if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) && - !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return; /* If our LE support is not enabled don't do anything */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return; /* Don't bother if remote LE support is not enabled */ @@ -2851,7 +3017,7 @@ static struct sk_buff *smp_alloc_skb_cb(struct l2cap_chan *chan, return ERR_PTR(-ENOMEM); skb->priority = HCI_PRIO_MAX; - bt_cb(skb)->chan = chan; + bt_cb(skb)->l2cap.chan = chan; return skb; } @@ -2924,51 +3090,63 @@ static const struct l2cap_ops smp_root_chan_ops = { static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) { struct l2cap_chan *chan; - struct crypto_blkcipher *tfm_aes; + struct smp_dev *smp; + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; if (cid == L2CAP_CID_SMP_BREDR) { - tfm_aes = NULL; + smp = NULL; goto create_chan; } - tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0); + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (!smp) + return ERR_PTR(-ENOMEM); + + tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_aes)) { - BT_ERR("Unable to create crypto context"); + BT_ERR("Unable to create ECB crypto context"); + kzfree(smp); return ERR_CAST(tfm_aes); } + tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_cmac)) { + BT_ERR("Unable to create CMAC crypto context"); + crypto_free_blkcipher(tfm_aes); + kzfree(smp); + return ERR_CAST(tfm_cmac); + } + + smp->tfm_aes = tfm_aes; + smp->tfm_cmac = tfm_cmac; + create_chan: chan = l2cap_chan_create(); if (!chan) { - crypto_free_blkcipher(tfm_aes); + if (smp) { + crypto_free_blkcipher(smp->tfm_aes); + crypto_free_hash(smp->tfm_cmac); + kzfree(smp); + } return ERR_PTR(-ENOMEM); } - chan->data = tfm_aes; + chan->data = smp; l2cap_add_scid(chan, cid); l2cap_chan_set_defaults(chan); if (cid == L2CAP_CID_SMP) { - /* If usage of static address is forced or if the devices - * does not have a public address, then listen on the static - * address. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configued, then listen on - * the static address instead. - */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - bacpy(&chan->src, &hdev->static_addr); - chan->src_type = BDADDR_LE_RANDOM; - } else { - bacpy(&chan->src, &hdev->bdaddr); + u8 bdaddr_type; + + hci_copy_identity_address(hdev, &chan->src, &bdaddr_type); + + if (bdaddr_type == ADDR_LE_DEV_PUBLIC) chan->src_type = BDADDR_LE_PUBLIC; - } + else + chan->src_type = BDADDR_LE_RANDOM; } else { bacpy(&chan->src, &hdev->bdaddr); chan->src_type = BDADDR_BREDR; @@ -2987,14 +3165,18 @@ create_chan: static void smp_del_chan(struct l2cap_chan *chan) { - struct crypto_blkcipher *tfm_aes; + struct smp_dev *smp; BT_DBG("chan %p", chan); - tfm_aes = chan->data; - if (tfm_aes) { + smp = chan->data; + if (smp) { chan->data = NULL; - crypto_free_blkcipher(tfm_aes); + if (smp->tfm_aes) + crypto_free_blkcipher(smp->tfm_aes); + if (smp->tfm_cmac) + crypto_free_hash(smp->tfm_cmac); + kzfree(smp); } l2cap_chan_put(chan); @@ -3007,7 +3189,7 @@ static ssize_t force_bredr_smp_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -3029,7 +3211,7 @@ static ssize_t force_bredr_smp_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + if (enable == hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return -EALREADY; if (enable) { @@ -3048,7 +3230,7 @@ static ssize_t force_bredr_smp_write(struct file *file, smp_del_chan(chan); } - change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags); + hci_dev_change_flag(hdev, HCI_FORCE_BREDR_SMP); return count; } @@ -3367,6 +3549,21 @@ static int __init test_h6(struct crypto_hash *tfm_cmac) return 0; } +static char test_smp_buffer[32]; + +static ssize_t test_smp_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(user_buf, count, ppos, test_smp_buffer, + strlen(test_smp_buffer)); +} + +static const struct file_operations test_smp_fops = { + .open = simple_open, + .read = test_smp_read, + .llseek = default_llseek, +}; + static int __init run_selftests(struct crypto_blkcipher *tfm_aes, struct crypto_hash *tfm_cmac) { @@ -3379,49 +3576,49 @@ static int __init run_selftests(struct crypto_blkcipher *tfm_aes, err = test_ah(tfm_aes); if (err) { BT_ERR("smp_ah test failed"); - return err; + goto done; } err = test_c1(tfm_aes); if (err) { BT_ERR("smp_c1 test failed"); - return err; + goto done; } err = test_s1(tfm_aes); if (err) { BT_ERR("smp_s1 test failed"); - return err; + goto done; } err = test_f4(tfm_cmac); if (err) { BT_ERR("smp_f4 test failed"); - return err; + goto done; } err = test_f5(tfm_cmac); if (err) { BT_ERR("smp_f5 test failed"); - return err; + goto done; } err = test_f6(tfm_cmac); if (err) { BT_ERR("smp_f6 test failed"); - return err; + goto done; } err = test_g2(tfm_cmac); if (err) { BT_ERR("smp_g2 test failed"); - return err; + goto done; } err = test_h6(tfm_cmac); if (err) { BT_ERR("smp_h6 test failed"); - return err; + goto done; } rettime = ktime_get(); @@ -3430,7 +3627,17 @@ static int __init run_selftests(struct crypto_blkcipher *tfm_aes, BT_INFO("SMP test passed in %llu usecs", duration); - return 0; +done: + if (!err) + snprintf(test_smp_buffer, sizeof(test_smp_buffer), + "PASS (%llu usecs)\n", duration); + else + snprintf(test_smp_buffer, sizeof(test_smp_buffer), "FAIL\n"); + + debugfs_create_file("selftest_smp", 0444, bt_debugfs, NULL, + &test_smp_fops); + + return err; } int __init bt_selftest_smp(void) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 60c5b73fcb4b..6cf872563ea7 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -188,6 +188,7 @@ int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey); bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], const bdaddr_t *bdaddr); int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); +int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]); int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ffd379db5938..4ff77a16956c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -25,6 +25,9 @@ #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) +const struct nf_br_ops __rcu *nf_br_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_br_ops); + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -33,16 +36,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + const struct nf_br_ops *nf_ops; u16 vid = 0; rcu_read_lock(); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); + nf_ops = rcu_dereference(nf_br_ops); + if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { rcu_read_unlock(); return NETDEV_TX_OK; } -#endif u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..e97572b5d2cc 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,11 +35,9 @@ static inline int should_deliver(const struct net_bridge_port *p, p->state == BR_STATE_FORWARDING; } -int br_dev_queue_push_xmit(struct sk_buff *skb) +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb) || - !is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); @@ -51,9 +49,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); -int br_forward_finish(struct sk_buff *skb) +int br_forward_finish(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } @@ -77,7 +76,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) return; } - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_forward_finish); } @@ -98,7 +98,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; skb_forward_csum(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb, + indev, skb->dev, br_forward_finish); } @@ -188,6 +189,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; + if ((p->flags & BR_PROXYARP_WIFI) && + BR_INPUT_SKB_CB(skb)->proxyarp_replied) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2aa7be3a847..f921a5dce22d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -55,12 +55,13 @@ static int br_pass_frame_up(struct sk_buff *skb) if (!skb) return NET_RX_DROP; - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, - netif_receive_skb); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + indev, NULL, + netif_receive_skb_sk); } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid) + u16 vid, struct net_bridge_port *p) { struct net_device *dev = br->dev; struct neighbour *n; @@ -68,6 +69,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + if (dev->flags & IFF_NOARP) return; @@ -105,16 +108,19 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } f = __br_fdb_get(br, n->ha, vid); - if (f) + if (f && ((p->flags & BR_PROXYARP) || + (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, sha, n->ha, sha); + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } neigh_release(n); } } /* note: already called with rcu_read_lock */ -int br_handle_frame_finish(struct sk_buff *skb) +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = br_port_get_rcu(skb->dev); @@ -153,12 +159,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) { - if (IS_ENABLED(CONFIG_INET) && - p->flags & BR_PROXYARP && - skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid); + if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) + br_do_proxy_arp(skb, br, vid, p); + if (is_broadcast_ether_addr(dest)) { skb2 = skb; unicast = false; } else if (is_multicast_ether_addr(dest)) { @@ -204,7 +208,7 @@ drop: EXPORT_SYMBOL_GPL(br_handle_frame_finish); /* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct sk_buff *skb) +static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -274,8 +278,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) } /* Deliver packet to local host only */ - if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) { + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ } else { *pskb = skb; @@ -299,7 +303,8 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb, + skb->dev, NULL, br_handle_frame_finish); break; default: diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c465876c7861..4b6722f8f179 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -814,7 +814,8 @@ static void __br_multicast_send_query(struct net_bridge *br, if (port) { skb->dev = port->dev; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0ee453fad3de..ab55e2472beb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,17 +37,16 @@ #include <net/route.h> #include <net/netfilter/br_netfilter.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + #include <asm/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif -#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ - (skb->nf_bridge->data))->daddr.ipv4) -#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) -#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; @@ -112,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +#endif + +static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -154,6 +171,18 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } +static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + static inline void nf_bridge_push_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); @@ -178,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) skb->network_header += len; } -static inline void nf_bridge_save_header(struct sk_buff *skb) -{ - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - - skb_copy_from_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); -} - /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format @@ -239,17 +260,31 @@ drop: return -1; } +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + switch (skb->nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: + skb->protocol = htons(ETH_P_8021Q); + break; + case BRNF_PROTO_PPPOE: + skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; @@ -263,7 +298,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -274,9 +310,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) +static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct neighbour *neigh; struct dst_entry *dst; @@ -286,12 +321,13 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if (neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; - ret = br_handle_frame_finish(skb); + ret = br_handle_frame_finish(sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and @@ -299,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, + nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; @@ -314,6 +350,22 @@ free_skb: return 0; } +static bool dnat_took_place(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_untracked(ct)) + return false; + + return test_bit(IPS_DST_NAT_BIT, &ct->status); +#else + return false; +#endif +} + /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * @@ -352,11 +404,11 @@ free_skb: * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ -static int br_nf_pre_routing_finish(struct sk_buff *skb) +static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; int frag_max_size; @@ -364,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) frag_max_size = IPCB(skb)->frag_max_size; BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { @@ -405,7 +457,7 @@ bridged_dnat: nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - skb, skb->dev, NULL, + sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; @@ -425,7 +477,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -448,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct /* Some common code for IPv4/IPv6 */ static struct net_device *setup_pre_routing(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = brnf_get_logical_dev(skb, skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) - nf_bridge->mask |= BRNF_8021Q; + nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) - nf_bridge->mask |= BRNF_PPPoE; + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); @@ -527,9 +581,7 @@ bad: * to ip6tables, which doesn't support NAT, so things are fairly simple. */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct ipv6hdr *hdr; u32 pkt_len; @@ -563,7 +615,8 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, return NF_DROP; skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; @@ -577,9 +630,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; @@ -588,7 +639,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, if (unlikely(!pskb_may_pull(skb, len))) return NF_DROP; - p = br_port_get_rcu(in); + p = br_port_get_rcu(state->in); if (p == NULL) return NF_DROP; br = p->br; @@ -598,7 +649,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, state); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -617,10 +668,11 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; - store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; @@ -636,25 +688,30 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, * prevent this from happening. */ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { br_drop_fake_rtable(skb); return NF_ACCEPT; } /* PF_BRIDGE/FORWARD *************************************************/ -static int br_nf_forward_finish(struct sk_buff *skb) +static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + int frag_max_size; + + if (skb->protocol == htons(ETH_P_IP)) { + frag_max_size = IPCB(skb)->frag_max_size; + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + } + in = nf_bridge->physindev; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { @@ -662,8 +719,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, - skb->dev, br_forward_finish, 1); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, + in, skb->dev, br_forward_finish, 1); return 0; } @@ -675,9 +732,7 @@ static int br_nf_forward_finish(struct sk_buff *skb) * bridge ports. */ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; struct net_device *parent; @@ -691,7 +746,11 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (!nf_bridge_unshare(skb)) return NF_DROP; - parent = bridge_parent(out); + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + + parent = bridge_parent(state->out); if (!parent) return NF_DROP; @@ -704,40 +763,42 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header(skb); - nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } - if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) - return NF_DROP; + if (pf == NFPROTO_IPV4) { + int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; + + if (br_parse_ip_options(skb)) + return NF_DROP; + + IPCB(skb)->frag_max_size = frag_max; + } - /* The physdev module checks on this */ - nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent, - br_nf_forward_finish); + NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, + brnf_get_logical_dev(skb, state->in), + parent, br_nf_forward_finish); return NF_STOLEN; } static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); - p = br_port_get_rcu(out); + p = br_port_get_rcu(state->out); if (p == NULL) return NF_ACCEPT; br = p->br; @@ -756,55 +817,93 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, nf_bridge_push_encap_header(skb); return NF_ACCEPT; } - *d = (struct net_device *)in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, - (struct net_device *)out, br_nf_forward_finish); + *d = state->in; + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, + state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) +{ + struct brnf_frag_data *data; + int err; + + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); + + if (err) { + kfree_skb(skb); + return 0; + } + + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + + return br_dev_queue_push_xmit(sk, skb); +} + +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; int frag_max_size; + unsigned int mtu_reserved; + + if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) + return br_dev_queue_push_xmit(sk, skb); + mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->protocol == htons(ETH_P_IP) && - skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) { + if (skb->len + mtu_reserved > skb->dev->mtu) { + struct brnf_frag_data *data; + frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_dev_queue_push_xmit); - } else - ret = br_dev_queue_push_xmit(skb); + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); + } else { + ret = br_dev_queue_push_xmit(sk, skb); + } return ret; } #else -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); } #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; - if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) @@ -821,17 +920,17 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); - nf_bridge_save_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, + NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, + NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; @@ -842,9 +941,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * for the second time. */ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (skb->nf_bridge && !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { @@ -854,6 +951,44 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); + skb->dev = nf_bridge->physindev; + br_handle_frame_finish(NULL, skb); +} + +static int br_nf_dev_xmit(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} + +static const struct nf_br_ops br_ops = { + .br_dev_xmit_hook = br_nf_dev_xmit, +}; + void br_netfilter_enable(void) { } @@ -991,12 +1126,14 @@ static int __init br_netfilter_init(void) return -ENOMEM; } #endif + RCU_INIT_POINTER(nf_br_ops, &br_ops); printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; } static void __exit br_netfilter_fini(void) { + RCU_INIT_POINTER(nf_br_ops, NULL); nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(brnf_sysctl_header); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4fbcea0e7ecb..0e4ddb81610d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -22,6 +22,85 @@ #include "br_private.h" #include "br_private_stp.h" +static int br_get_num_vlan_infos(const struct net_port_vlans *pv, + u32 filter_mask) +{ + u16 vid_range_start = 0, vid_range_end = 0; + u16 vid_range_flags = 0; + u16 pvid, vid, flags; + int num_vlans = 0; + + if (filter_mask & RTEXT_FILTER_BRVLAN) + return pv->num_vlans; + + if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; + + /* Count number of vlan info's + */ + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + flags = 0; + if (vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = vid; + continue; + } else { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } +initvars: + vid_range_start = vid; + vid_range_end = vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } + + return num_vlans; +} + +static size_t br_get_link_af_size_filtered(const struct net_device *dev, + u32 filter_mask) +{ + struct net_port_vlans *pv; + int num_vlan_infos; + + rcu_read_lock(); + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + pv = NULL; + if (pv) + num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask); + else + num_vlan_infos = 0; + rcu_read_unlock(); + + if (!num_vlan_infos) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); +} + static inline size_t br_port_info_size(void) { return nla_total_size(1) /* IFLA_BRPORT_STATE */ @@ -36,7 +115,7 @@ static inline size_t br_port_info_size(void) + 0; } -static inline size_t br_nlmsg_size(void) +static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -45,7 +124,9 @@ static inline size_t br_nlmsg_size(void) + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */ + + nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */ + + nla_total_size(br_get_link_af_size_filtered(dev, + filter_mask)); /* IFLA_AF_SPEC */ } static int br_port_fill_attrs(struct sk_buff *skb, @@ -62,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || - nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP))) + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, + !!(p->flags & BR_PROXYARP_WIFI))) return -EMSGSIZE; return 0; @@ -222,8 +305,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_put_u8(skb, IFLA_OPERSTATE, operstate) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; if (event == RTM_NEWLINK && port) { @@ -280,6 +363,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; if (!port) return; @@ -288,11 +372,11 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); - skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -471,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); @@ -648,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, + [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 }, + [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, + [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -677,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_AGEING_TIME]) { + u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); + + br->ageing_time = clock_t_to_jiffies(ageing_time); + } + + if (data[IFLA_BR_STP_STATE]) { + u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); + + br_stp_set_enabled(br, stp_enabled); + } + + if (data[IFLA_BR_PRIORITY]) { + u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]); + + br_stp_set_bridge_priority(br, priority); + } + return 0; } @@ -685,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev) return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ 0; } @@ -694,10 +803,16 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 forward_delay = jiffies_to_clock_t(br->forward_delay); u32 hello_time = jiffies_to_clock_t(br->hello_time); u32 age_time = jiffies_to_clock_t(br->max_age); + u32 ageing_time = jiffies_to_clock_t(br->ageing_time); + u32 stp_enabled = br->stp_enabled; + u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || - nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time)) + nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) || + nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || + nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || + nla_put_u16(skb, IFLA_BR_PRIORITY, priority)) return -EMSGSIZE; return 0; diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 387cb3bd017c..20cbb727df4d 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -54,7 +54,6 @@ static unsigned int fake_mtu(const struct dst_entry *dst) static struct dst_ops fake_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, .redirect = fake_redirect, .cow_metrics = fake_cow_metrics, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..6ca0251cb478 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -305,6 +305,7 @@ struct br_input_skb_cb { #endif u16 frag_max_size; + bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; @@ -409,10 +410,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); -int br_dev_queue_push_xmit(struct sk_buff *skb); +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); -int br_forward_finish(struct sk_buff *skb); +int br_forward_finish(struct sock *sk, struct sk_buff *skb); void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2, bool unicast); @@ -430,7 +431,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); void br_manage_promisc(struct net_bridge *br); /* br_input.c */ -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) @@ -762,6 +763,11 @@ static inline int br_vlan_enabled(struct net_bridge *br) } #endif +struct nf_br_ops { + int (*br_dev_xmit_hook)(struct sk_buff *skb); +}; +extern const struct nf_br_ops __rcu *nf_br_ops; + /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) int br_nf_core_init(void); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index bdb459d21ad8..534fc4cd263e 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -54,8 +54,9 @@ static void br_send_bpdu(struct net_bridge_port *p, skb_reset_mac_header(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - dev_queue_xmit); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, + dev_queue_xmit_sk); } static inline void br_set_ticks(unsigned char *dest, int j) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2de5d91199e8..4905845a94e9 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); +BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_fast_leave, #endif &brport_attr_proxyarp, + &brport_attr_proxyarp_wifi, NULL }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index ce205aabf9c5..8a3f63b2e807 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -58,20 +58,18 @@ static const struct ebt_table frame_filter = { static unsigned int ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->in)->xt.frame_filter); } static unsigned int ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index a0ac2984fb6c..c5ef5b1ab678 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -58,20 +58,18 @@ static struct ebt_table frame_nat = { static unsigned int ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->in)->xt.frame_nat); } static unsigned int ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 19473a9371b8..a343e62442b1 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -67,47 +67,43 @@ EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out) + const struct nf_hook_state *state) { if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(pkt, ops, skb, state); else - nft_set_pktinfo(pkt, ops, skb, in, out); + nft_set_pktinfo(pkt, ops, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out) + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, ops, skb, in, out) == 0) + nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, ops, skb, in, out); + nft_set_pktinfo(pkt, ops, skb, state); } static unsigned int nft_do_chain_bridge(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state); break; default: - nft_set_pktinfo(&pkt, ops, skb, in, out); + nft_set_pktinfo(&pkt, ops, skb, state); break; } diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 4f02109d708f..a21269b83f16 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -19,12 +19,12 @@ #include "../br_private.h" static void nft_meta_bridge_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct net_device *in = pkt->in, *out = pkt->out; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; switch (priv->key) { @@ -40,12 +40,12 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, goto out; } - strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); return; out: - return nft_meta_get_eval(expr, data, pkt); + return nft_meta_get_eval(expr, regs, pkt); err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, @@ -53,27 +53,21 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); - int err; + unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_BRI_IIFNAME: case NFT_META_BRI_OIFNAME: + len = IFNAMSIZ; break; default: return nft_meta_get_init(ctx, expr, tb); } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - return 0; + priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); } static struct nft_expr_type nft_meta_bridge_type; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 3244aead0926..858d848564ee 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -21,6 +21,7 @@ #include <net/ip.h> #include <net/ip6_checksum.h> #include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv6.h> #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) + * or the bridge port (NF_BRIDGE PREROUTING). + */ +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; struct iphdr *niph; @@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, - u8 code) +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct iphdr *niph; @@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, unsigned int len; void *payload; __wsum csum; + u8 proto; - if (!nft_bridge_iphdr_validate(oldskb)) + if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, if (!pskb_may_pull(oldskb, len)) return; - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) + return; + + if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || + ip_hdr(oldskb)->protocol == IPPROTO_UDP) + proto = ip_hdr(oldskb)->protocol; + else + proto = 0; + + if (!skb_csum_unnecessary(oldskb) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + @@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_br_send_v6_tcp_reset(struct net *net, - struct sk_buff *oldskb, int hook) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; const struct tcphdr *oth; @@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); +} + +static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } static void nft_reject_br_send_v6_unreach(struct net *net, - struct sk_buff *oldskb, int hook, - u8 code) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct ipv6hdr *nip6h; @@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net, if (!pskb_may_pull(oldskb, len)) return; + if (!reject6_br_csum_ok(oldskb, hook)) + return; + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + LL_MAX_HEADER + len, GFP_ATOMIC); if (!nskb) @@ -205,12 +253,12 @@ static void nft_reject_br_send_v6_unreach(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_bridge_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, + nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmp_code(priv->icmp_code)); break; @@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmpv6_code(priv->icmp_code)); break; @@ -262,7 +310,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, break; } out: - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static int nft_reject_bridge_validate(const struct nft_ctx *ctx, @@ -323,6 +371,8 @@ static int nft_reject_bridge_dump(struct sk_buff *skb, if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index a6e2da0bc718..4ec0c803aef1 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk) * Copied from unix_dgram_recvmsg, but removed credit checks, * changed locking, address handling and added MSG_TRUNC. */ -static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) +static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, + size_t len, int flags) { struct sock *sk = sock->sk; @@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * Copied from unix_stream_recvmsg, but removed credit checks, * changed locking calls, changed address handling. */ -static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ -static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -586,8 +585,8 @@ err: * Changed removed permission handling and added waiting for flow on * and other minor adaptations. */ -static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); diff --git a/net/can/bcm.c b/net/can/bcm.c index ee9ffd956552..b523453585be 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, * containing the interface index. */ - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); + sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; @@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ -static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); @@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, return 0; } -static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/can/raw.c b/net/can/raw.c index 00c13ef23661..31b9748cbb4e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -74,6 +74,12 @@ MODULE_ALIAS("can-proto-1"); * storing the single filter in dfilter, to avoid using dynamic memory. */ +struct uniqframe { + ktime_t tstamp; + const struct sk_buff *skb; + unsigned int join_rx_count; +}; + struct raw_sock { struct sock sk; int bound; @@ -82,10 +88,12 @@ struct raw_sock { int loopback; int recv_own_msgs; int fd_frames; + int join_filters; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ struct can_filter *filter; /* pointer to filter(s) */ can_err_mask_t err_mask; + struct uniqframe __percpu *uniq; }; /* @@ -95,8 +103,8 @@ struct raw_sock { */ static inline unsigned int *raw_flags(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) + - sizeof(unsigned int))); + sock_skb_cb_check_size(sizeof(struct sockaddr_can) + + sizeof(unsigned int)); /* return pointer after struct sockaddr_can */ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); @@ -123,6 +131,26 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!ro->fd_frames && oskb->len != CAN_MTU) return; + /* eliminate multiple filter matches for the same skb */ + if (this_cpu_ptr(ro->uniq)->skb == oskb && + ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + if (ro->join_filters) { + this_cpu_inc(ro->uniq->join_rx_count); + /* drop frame until all enabled filters matched */ + if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) + return; + } else { + return; + } + } else { + this_cpu_ptr(ro->uniq)->skb = oskb; + this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->join_rx_count = 1; + /* drop first frame to check all enabled filters? */ + if (ro->join_filters && ro->count > 1) + return; + } + /* clone the given skb to be able to enqueue it into the rcv queue */ skb = skb_clone(oskb, GFP_ATOMIC); if (!skb) @@ -135,7 +163,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) * containing the interface index. */ - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); + sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; @@ -296,6 +324,12 @@ static int raw_init(struct sock *sk) ro->loopback = 1; ro->recv_own_msgs = 0; ro->fd_frames = 0; + ro->join_filters = 0; + + /* alloc_percpu provides zero'ed memory */ + ro->uniq = alloc_percpu(struct uniqframe); + if (unlikely(!ro->uniq)) + return -ENOMEM; /* set notifier */ ro->notifier.notifier_call = raw_notifier; @@ -339,6 +373,7 @@ static int raw_release(struct socket *sock) ro->ifindex = 0; ro->bound = 0; ro->count = 0; + free_percpu(ro->uniq); sock_orphan(sk); sock->sk = NULL; @@ -583,6 +618,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, break; + case CAN_RAW_JOIN_FILTERS: + if (optlen != sizeof(ro->join_filters)) + return -EINVAL; + + if (copy_from_user(&ro->join_filters, optval, optlen)) + return -EFAULT; + + break; + default: return -ENOPROTOOPT; } @@ -647,6 +691,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, val = &ro->fd_frames; break; + case CAN_RAW_JOIN_FILTERS: + if (len > sizeof(int)) + len = sizeof(int); + val = &ro->join_filters; + break; + default: return -ENOPROTOOPT; } @@ -658,8 +708,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -728,8 +777,8 @@ send_failed: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/compat.c b/net/compat.c index f7bd286a8280..5cfd26a0006f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -31,10 +31,10 @@ #include <asm/uaccess.h> #include <net/compat.h> -ssize_t get_compat_msghdr(struct msghdr *kmsg, - struct compat_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec **iov) +int get_compat_msghdr(struct msghdr *kmsg, + struct compat_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec **iov) { compat_uptr_t uaddr, uiov, tmp3; compat_size_t nr_segs; @@ -79,13 +79,11 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; - err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE, - compat_ptr(uiov), nr_segs, - UIO_FASTIOV, *iov, iov); - if (err >= 0) - iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE, - *iov, nr_segs, err); - return err; + kmsg->msg_iocb = NULL; + + return compat_import_iovec(save_addr ? READ : WRITE, + compat_ptr(uiov), nr_segs, + UIO_FASTIOV, iov, &kmsg->msg_iter); } /* Bleech... */ @@ -515,25 +513,25 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, struct compat_group_req { __u32 gr_interface; struct __kernel_sockaddr_storage gr_group - __attribute__ ((aligned(4))); + __aligned(4); } __packed; struct compat_group_source_req { __u32 gsr_interface; struct __kernel_sockaddr_storage gsr_group - __attribute__ ((aligned(4))); + __aligned(4); struct __kernel_sockaddr_storage gsr_source - __attribute__ ((aligned(4))); + __aligned(4); } __packed; struct compat_group_filter { __u32 gf_interface; struct __kernel_sockaddr_storage gf_group - __attribute__ ((aligned(4))); + __aligned(4); __u32 gf_fmode; __u32 gf_numsrc; struct __kernel_sockaddr_storage gf_slist[1] - __attribute__ ((aligned(4))); + __aligned(4); } __packed; #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ diff --git a/net/core/datagram.c b/net/core/datagram.c index df493d68330c..b80fb91bb3f7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (!chunk) return 0; - if (iov_iter_count(&msg->msg_iter) < chunk) { + if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) goto csum_error; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) diff --git a/net/core/dev.c b/net/core/dev.c index 45109b70664e..af4a1b0adc10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -660,6 +660,27 @@ __setup("netdev=", netdev_boot_setup); *******************************************************************************/ /** + * dev_get_iflink - get 'iflink' value of a interface + * @dev: targeted interface + * + * Indicates the ifindex the interface is linked to. + * Physical interfaces have the same 'ifindex' and 'iflink' values. + */ + +int dev_get_iflink(const struct net_device *dev) +{ + if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) + return dev->netdev_ops->ndo_get_iflink(dev); + + /* If dev->rtnl_link_ops is set, it's a virtual interface. */ + if (dev->rtnl_link_ops) + return 0; + + return dev->ifindex; +} +EXPORT_SYMBOL(dev_get_iflink); + +/** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace * @name: name to find @@ -1385,7 +1406,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1420,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1443,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -1607,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +#ifdef CONFIG_NET_CLS_ACT +static struct static_key ingress_needed __read_mostly; + +void net_inc_ingress_queue(void) +{ + static_key_slow_inc(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_ingress_queue); + +void net_dec_ingress_queue(void) +{ + static_key_slow_dec(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_ingress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -1694,6 +1733,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb, true); + skb->priority = 0; skb->protocol = eth_type_trans(skb, dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -1737,7 +1777,8 @@ static inline int deliver_skb(struct sk_buff *skb, static inline void deliver_ptype_list_skb(struct sk_buff *skb, struct packet_type **pt, - struct net_device *dev, __be16 type, + struct net_device *orig_dev, + __be16 type, struct list_head *ptype_list) { struct packet_type *ptype, *pt_prev = *pt; @@ -1746,7 +1787,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb, if (ptype->type != type) continue; if (pt_prev) - deliver_skb(skb, pt_prev, dev); + deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } *pt = pt_prev; @@ -2559,12 +2600,26 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2576,34 +2631,17 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged(skb)) features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } @@ -2857,7 +2895,7 @@ EXPORT_SYMBOL(xmit_recursion); * dev_loopback_xmit - loop back @skb * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sk_buff *skb) +int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -2995,11 +3033,11 @@ out: return rc; } -int dev_queue_xmit(struct sk_buff *skb) +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit); +EXPORT_SYMBOL(dev_queue_xmit_sk); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3525,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) - goto out; + return skb; if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); @@ -3539,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return NULL; } -out: - skb->tc_verd = 0; return skb; } #endif @@ -3676,12 +3712,15 @@ another_round: skip_taps: #ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto unlock; + if (static_key_false(&ingress_needed)) { + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto unlock; + } + + skb->tc_verd = 0; ncls: #endif - if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; @@ -3831,13 +3870,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb(struct sk_buff *skb) +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb); +EXPORT_SYMBOL(netif_receive_skb_sk); /* Network device is going away, flush any packets still pending * Called with irqs disabled. @@ -5914,6 +5953,24 @@ int dev_get_phys_port_id(struct net_device *dev, EXPORT_SYMBOL(dev_get_phys_port_id); /** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + +/** * dev_new_index - allocate an ifindex * @net: the applicable net namespace * @@ -5970,7 +6027,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -6297,8 +6354,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); - dev->iflink = -1; - ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -6328,9 +6383,6 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev->iflink == -1) - dev->iflink = dev->ifindex; - /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ @@ -6843,8 +6895,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); @@ -7045,12 +7095,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_net_set(dev, net); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); + if (__dev_get_by_index(net, dev->ifindex)) dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index aa378ecef186..1d00b8922902 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -790,7 +790,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ops->get_rxfh_indir_size) dev_indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) - dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + dev_key_size = ops->get_rxfh_key_size(dev); if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e4fdc9dfb2c7..9a12668f7d62 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; - r->fr_net = hold_net(ops->fro_net); + r->fr_net = ops->fro_net; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops) if (ops->family == o->family) goto errout; - hold_net(net); list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: @@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } } -static void fib_rules_put_rcu(struct rcu_head *head) -{ - struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); - struct net *net = ops->fro_net; - - release_net(net); - kfree(ops); -} - void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) spin_unlock(&net->rules_mod_lock); fib_rules_cleanup_ops(ops); - call_rcu(&ops->rcu, fib_rules_put_rcu); + kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) err = -ENOMEM; goto errout; } - rule->fr_net = hold_net(net); + rule->fr_net = net; if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) return 0; errout_free: - release_net(rule->fr_net); kfree(rule); errout: rules_ops_put(ops); @@ -492,6 +481,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) goto errout; } + if (ops->delete) { + err = ops->delete(rule); + if (err) + goto errout; + } + list_del_rcu(&rule->list); if (rule->action == FR_ACT_GOTO) { @@ -517,8 +512,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); - if (ops->delete) - ops->delete(rule); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/core/filter.c b/net/core/filter.c index f6bdc2b1ba01..b669e75d2b36 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,10 +150,62 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } +static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (skb_field) { + case SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; + + case SKF_AD_PKTTYPE: + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +#endif + break; + + case SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, queue_mapping)); + break; + + case SKF_AD_VLAN_TAG: + case SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_tci)); + if (skb_field == SKF_AD_VLAN_TAG) { + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, + ~VLAN_TAG_PRESENT); + } else { + /* dst_reg >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); + /* dst_reg &= 1 */ + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); + } + break; + } + + return insn - insn_buf; +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct bpf_insn **insnp) { struct bpf_insn *insn = *insnp; + u32 cnt; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -167,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - PKT_TYPE_OFFSET()); - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); -#ifdef __BIG_ENDIAN_BITFIELD - insn++; - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); -#endif + cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_IFINDEX: @@ -197,10 +244,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, mark)); + cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_RXHASH: @@ -211,29 +256,30 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, queue_mapping)); + cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: + cnt = convert_skb_access(SKF_AD_VLAN_TAG, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; + + case SKF_AD_OFF + SKF_AD_VLAN_TPID: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ + /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, - ~VLAN_TAG_PRESENT); - } else { - /* A >>= 12 */ - *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - /* A &= 1 */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); - } + offsetof(struct sk_buff, vlan_proto)); + /* A = ntohs(A) [emitting a nop or swap16] */ + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PAY_OFFSET: @@ -814,7 +860,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) static void __bpf_prog_release(struct bpf_prog *prog) { - if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); } else { bpf_release_orig_filter(prog); @@ -1019,6 +1065,32 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + fp->prog = prog; + atomic_set(&fp->refcnt, 0); + + if (!sk_filter_charge(sk, fp)) { + kfree(fp); + return -ENOMEM; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -1031,7 +1103,6 @@ EXPORT_SYMBOL_GPL(bpf_prog_destroy); */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp, *old_fp; unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); struct bpf_prog *prog; @@ -1068,36 +1139,20 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { __bpf_prog_release(prog); - return -ENOMEM; - } - fp->prog = prog; - - atomic_set(&fp->refcnt, 0); - - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; + return err; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); - - if (old_fp) - sk_filter_uncharge(sk, old_fp); - return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); -#ifdef CONFIG_BPF_SYSCALL int sk_attach_bpf(u32 ufd, struct sock *sk) { - struct sk_filter *fp, *old_fp; struct bpf_prog *prog; + int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; @@ -1106,40 +1161,168 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { - /* valid fd, but invalid program type */ + if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); return -EINVAL; } - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { bpf_prog_put(prog); - return -ENOMEM; + return err; } - fp->prog = prog; - atomic_set(&fp->refcnt, 0); + return 0; +} - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; +#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) + +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + char buf[16]; + void *ptr; + + /* bpf verifier guarantees that: + * 'from' pointer points to bpf program stack + * 'len' bytes of it were initialized + * 'len' > 0 + * 'skb' is a valid pointer to 'struct sk_buff' + * + * so check for invalid 'offset' and too large 'len' + */ + if (unlikely(offset > 0xffff || len > sizeof(buf))) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, buf); + if (unlikely(!ptr)) + return -EFAULT; + + if (BPF_RECOMPUTE_CSUM(flags)) + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + + if (ptr == buf) + /* skb_store_bits cannot return -EFAULT here */ + skb_store_bits(skb, offset, ptr, len); + + if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + return 0; +} + +const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, + .arg5_type = ARG_ANYTHING, +}; + +#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) +#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) + +static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + __sum16 sum, *ptr; + + if (unlikely(offset > 0xffff)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); + if (unlikely(!ptr)) + return -EFAULT; + + switch (BPF_HEADER_FIELD_SIZE(flags)) { + case 2: + csum_replace2(ptr, from, to); + break; + case 4: + csum_replace4(ptr, from, to); + break; + default: + return -EINVAL; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); + if (ptr == &sum) + /* skb_store_bits guaranteed to not return -EFAULT here */ + skb_store_bits(skb, offset, ptr, sizeof(sum)); - if (old_fp) - sk_filter_uncharge(sk, old_fp); + return 0; +} + +const struct bpf_func_proto bpf_l3_csum_replace_proto = { + .func = bpf_l3_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + __sum16 sum, *ptr; + + if (unlikely(offset > 0xffff)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); + if (unlikely(!ptr)) + return -EFAULT; + + switch (BPF_HEADER_FIELD_SIZE(flags)) { + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); + break; + case 4: + inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); + break; + default: + return -EINVAL; + } + + if (ptr == &sum) + /* skb_store_bits guaranteed to not return -EFAULT here */ + skb_store_bits(skb, offset, ptr, sizeof(sum)); return 0; } -/* allow socket filters to call - * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() - */ -static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +const struct bpf_func_proto bpf_l4_csum_replace_proto = { + .func = bpf_l4_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto * +sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1148,39 +1331,144 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return NULL; } } -static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +static const struct bpf_func_proto * +tc_cls_act_func_proto(enum bpf_func_id func_id) { - /* skb fields cannot be accessed yet */ - return false; + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_l3_csum_replace: + return &bpf_l3_csum_replace_proto; + case BPF_FUNC_l4_csum_replace: + return &bpf_l4_csum_replace_proto; + default: + return sk_filter_func_proto(func_id); + } +} + +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* check bounds */ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + /* all __sk_buff fields are __u32 */ + if (size != 4) + return false; + + return true; +} + +static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, len): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, len)); + break; + + case offsetof(struct __sk_buff, protocol): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, protocol)); + break; + + case offsetof(struct __sk_buff, vlan_proto): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_proto)); + break; + + case offsetof(struct __sk_buff, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); + break; + + case offsetof(struct __sk_buff, mark): + return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, pkt_type): + return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, queue_mapping): + return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_present): + return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_tci): + return convert_skb_access(SKF_AD_VLAN_TAG, + dst_reg, src_reg, insn); + } + + return insn - insn_buf; } -static struct bpf_verifier_ops sock_filter_ops = { - .get_func_proto = sock_filter_func_proto, - .is_valid_access = sock_filter_is_valid_access, +static const struct bpf_verifier_ops sk_filter_ops = { + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, +}; + +static const struct bpf_verifier_ops tc_cls_act_ops = { + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; -static struct bpf_prog_type_list tl = { - .ops = &sock_filter_ops, +static struct bpf_prog_type_list sk_filter_type __read_mostly = { + .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static int __init register_sock_filter_ops(void) +static struct bpf_prog_type_list sched_cls_type __read_mostly = { + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_CLS, +}; + +static struct bpf_prog_type_list sched_act_type __read_mostly = { + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_ACT, +}; + +static int __init register_sk_filter_ops(void) { - bpf_register_prog_type(&tl); + bpf_register_prog_type(&sk_filter_type); + bpf_register_prog_type(&sched_cls_type); + bpf_register_prog_type(&sched_act_type); + return 0; } -late_initcall(register_sock_filter_ops); -#else -int sk_attach_bpf(u32 ufd, struct sock *sk) -{ - return -EOPNOTSUPP; -} -#endif +late_initcall(register_sk_filter_ops); + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 49a9e3e06c08..982861607f88 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) - return (dev->ifindex != dev->iflink ? + return (dev->ifindex != dev_get_iflink(dev) ? IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN); if (netif_dormant(dev)) @@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (!netif_running(dev)) return false; - if (dev->ifindex != dev->iflink) + if (dev->ifindex != dev_get_iflink(dev)) return true; if (dev->priv_flags & IFF_TEAM_PORT) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 70fe9e10ac86..3de654256028 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; - int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } + n = __neigh_lookup_noref(tbl, pkey, dev); + if (n) { + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; + NEIGH_CACHE_STAT_INC(tbl, hits); } rcu_read_unlock_bh(); @@ -601,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; - write_pnet(&n->net, hold_net(net)); + write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -610,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (tbl->pconstructor && tbl->pconstructor(n)) { if (dev) dev_put(dev); - release_net(net); kfree(n); n = NULL; goto out; @@ -644,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); return 0; } @@ -667,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); continue; } @@ -830,10 +817,9 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); - if (!(n->nud_state & NUD_PROBE)) - max_probes += NEIGH_VAR(p, MCAST_PROBES); - return max_probes; + return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + + (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : + NEIGH_VAR(p, MCAST_PROBES)); } static void neigh_invalidate(struct neighbour *neigh) @@ -1263,10 +1249,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, EXPORT_SYMBOL(neigh_event_ns); /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) +static void neigh_hh_init(struct neighbour *n) { - struct net_device *dev = dst->dev; - __be16 prot = dst->ops->protocol; + struct net_device *dev = n->dev; + __be16 prot = n->tbl->protocol; struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); @@ -1280,43 +1266,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) write_unlock_bh(&n->lock); } -/* This function can be used in contexts, where only old dev_queue_xmit - * worked, f.e. if you want to override normal output path (eql, shaper), - * but resolution is not made yet. - */ - -int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - - __skb_pull(skb, skb_network_offset(skb)); - - if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev_rebuild_header(skb)) - return 0; - - return dev_queue_xmit(skb); -} -EXPORT_SYMBOL(neigh_compat_output); - /* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); int rc = 0; - if (!dst) - goto discard; - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; unsigned int seq; if (dev->header_ops->cache && !neigh->hh.hh_len) - neigh_hh_init(neigh, dst); + neigh_hh_init(neigh); do { __skb_pull(skb, skb_network_offset(skb)); @@ -1332,8 +1294,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) } out: return rc; -discard: - neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1464,11 +1424,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; - write_pnet(&p->net, hold_net(net)); + write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - release_net(net); dev_put(dev); kfree(p); return NULL; @@ -1508,7 +1467,6 @@ EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(neigh_parms_net(parms)); kfree(parms); } @@ -1783,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, UCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_PROBES, NEIGH_VAR(parms, MCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_REPROBES, + NEIGH_VAR(parms, MCAST_REPROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || @@ -1942,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, @@ -2042,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) NEIGH_VAR_SET(p, MCAST_PROBES, nla_get_u32(tbp[i])); break; + case NDTPA_MCAST_REPROBES: + NEIGH_VAR_SET(p, MCAST_REPROBES, + nla_get_u32(tbp[i])); + break; case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); @@ -2427,6 +2392,40 @@ void __neigh_for_each_release(struct neigh_table *tbl, } EXPORT_SYMBOL(__neigh_for_each_release); +int neigh_xmit(int index, struct net_device *dev, + const void *addr, struct sk_buff *skb) +{ + int err = -EAFNOSUPPORT; + if (likely(index < NEIGH_NR_TABLES)) { + struct neigh_table *tbl; + struct neighbour *neigh; + + tbl = neigh_tables[index]; + if (!tbl) + goto out; + neigh = __neigh_lookup_noref(tbl, addr, dev); + if (!neigh) + neigh = __neigh_create(tbl, addr, dev, false); + err = PTR_ERR(neigh); + if (IS_ERR(neigh)) + goto out_kfree_skb; + err = neigh->output(neigh, skb); + } + else if (index == NEIGH_LINK_TABLE) { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + addr, NULL, skb->len); + if (err < 0) + goto out_kfree_skb; + err = dev_queue_xmit(skb); + } +out: + return err; +out_kfree_skb: + kfree_skb(skb); + goto out; +} +EXPORT_SYMBOL(neigh_xmit); + #ifdef CONFIG_PROC_FS static struct neighbour *neigh_get_first(struct seq_file *seq) @@ -2994,6 +2993,7 @@ static struct neigh_sysctl_table { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f2aa73bfb0e4..4238d6da5c60 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/jiffies.h> #include <linux/pm_runtime.h> +#include <linux/of.h> #include "net-sysfs.h" @@ -108,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex); NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); -NETDEVICE_SHOW_RO(iflink, fmt_dec); NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *ndev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, dev_get_iflink(ndev)); +} +static DEVICE_ATTR_RO(iflink); + static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) { return sprintf(buf, fmt_dec, dev->name_assign_type); @@ -417,6 +426,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -464,6 +495,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; @@ -950,6 +982,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } +#ifdef CONFIG_XPS +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + +static ssize_t show_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + return sprintf(buf, "%lu\n", queue->tx_maxrate); +} + +static ssize_t set_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + int err, index = get_netdev_queue_index(queue); + u32 rate = 0; + + err = kstrtou32(buf, 10, &rate); + if (err < 0) + return err; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = -EOPNOTSUPP; + if (dev->netdev_ops->ndo_set_tx_maxrate) + err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + + rtnl_unlock(); + if (!err) { + queue->tx_maxrate = rate; + return len; + } + return err; +} + +static struct netdev_queue_attribute queue_tx_maxrate = + __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, + show_tx_maxrate, set_tx_maxrate); +#endif + static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); @@ -1064,18 +1150,6 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static unsigned int get_netdev_queue_index(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned int i; - - i = queue - dev->_tx; - BUG_ON(i >= dev->num_tx_queues); - - return i; -} - - static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { @@ -1152,6 +1226,7 @@ static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &queue_tx_maxrate.attr, #endif NULL }; @@ -1374,6 +1449,30 @@ static struct class net_class = { .namespace = net_namespace, }; +#ifdef CONFIG_OF_NET +static int of_dev_node_match(struct device *dev, const void *data) +{ + int ret = 0; + + if (dev->parent) + ret = dev->parent->of_node == data; + + return ret == 0 ? dev->of_node == data : ret; +} + +struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(&net_class, NULL, np, of_dev_node_match); + if (!dev) + return NULL; + + return to_net_dev(dev); +} +EXPORT_SYMBOL(of_find_net_device_by_node); +#endif + /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 70d3450588b2..a3abb719221f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id); static int alloc_netid(struct net *net, struct net *peer, int reqid) { - int min = 0, max = 0; + int min = 0, max = 0, id; ASSERT_RTNL(); @@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid) max = reqid + 1; } - return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + if (id >= 0) + rtnl_net_notifyid(net, peer, RTM_NEWNSID, id); + + return id; } /* This function is used by idr_for_each(). If net is equal to peer, the @@ -238,10 +244,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); -#ifdef NETNS_REFCNT_DEBUG - atomic_set(&net->use_count, 0); -#endif - list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) @@ -296,13 +298,6 @@ out_free: static void net_free(struct net *net) { -#ifdef NETNS_REFCNT_DEBUG - if (unlikely(atomic_read(&net->use_count) != 0)) { - pr_emerg("network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } -#endif kfree(rcu_access_pointer(net->gen)); kmem_cache_free(net_cachep, net); } @@ -370,8 +365,10 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id = __peernet2id(tmp, net, false); - if (id >= 0) + if (id >= 0) { + rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); idr_remove(&tmp->netns_ids, id); + } } idr_destroy(&net->netns_ids); @@ -542,7 +539,8 @@ static int rtnl_net_get_size(void) } static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, - int cmd, struct net *net, struct net *peer) + int cmd, struct net *net, struct net *peer, + int nsid) { struct nlmsghdr *nlh; struct rtgenmsg *rth; @@ -557,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; - id = __peernet2id(net, peer, false); - if (id < 0) - id = NETNSA_NSID_NOT_ASSIGNED; + if (nsid >= 0) { + id = nsid; + } else { + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + } if (nla_put_s32(skb, NETNSA_NSID, id)) goto nla_put_failure; @@ -576,8 +578,8 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct sk_buff *msg; - int err = -ENOBUFS; struct net *peer; + int err; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy); @@ -600,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) } err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_GETNSID, net, peer); + RTM_GETNSID, net, peer, -1); if (err < 0) goto err_out; @@ -614,6 +616,75 @@ out: return err; } +struct rtnl_net_dump_cb { + struct net *net; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; + int s_idx; +}; + +static int rtnl_net_dumpid_one(int id, void *peer, void *data) +{ + struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; + int ret; + + if (net_cb->idx < net_cb->s_idx) + goto cont; + + ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, + net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWNSID, net_cb->net, peer, id); + if (ret < 0) + return ret; + +cont: + net_cb->idx++; + return 0; +} + +static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct rtnl_net_dump_cb net_cb = { + .net = net, + .skb = skb, + .cb = cb, + .idx = 0, + .s_idx = cb->args[0], + }; + + ASSERT_RTNL(); + + idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); + + cb->args[0] = net_cb.idx; + return skb->len; +} + +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id) +{ + struct sk_buff *msg; + int err = -ENOMEM; + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) + goto out; + + err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); + if (err < 0) + goto err_out; + + rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); + return; + +err_out: + nlmsg_free(msg); +out: + rtnl_set_sk_err(net, RTNLGRP_NSID, err); +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -648,7 +719,8 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, + NULL); return 0; } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 04db318e6218..87b22c0bc08c 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -58,14 +58,14 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, return -ENOMEM; get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - rwlock_init(&queue->syn_wait_lock); + spin_lock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; lopt->max_qlen_log = ilog2(nr_table_entries); - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return 0; } @@ -81,10 +81,10 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk( { struct listen_sock *lopt; - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); lopt = queue->listen_opt; queue->listen_opt = NULL; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return lopt; } @@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - if (lopt->qlen != 0) { + if (listen_sock_qlen(lopt) != 0) { unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; + spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - reqsk_free(req); + atomic_inc(&lopt->qlen_dec); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + reqsk_put(req); } + spin_unlock_bh(&queue->syn_wait_lock); } } - WARN_ON(lopt->qlen != 0); + if (WARN_ON(listen_sock_qlen(lopt) != 0)) + pr_err("qlen %u\n", listen_sock_qlen(lopt)); kvfree(lopt); } @@ -153,24 +158,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) * case might also exist in tcp_v4_hnd_req() that will trigger this locking * order. * - * When a TFO req is created, it needs to sock_hold its listener to prevent - * the latter data structure from going away. - * - * This function also sets "treq->listener" to NULL and unreference listener - * socket. treq->listener is used by the listener so it is protected by the + * This function also sets "treq->tfo_listener" to false. + * treq->tfo_listener is used by the listener so it is protected by the * fastopenq->lock in this function. */ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset) { - struct sock *lsk = tcp_rsk(req)->listener; - struct fastopen_queue *fastopenq = - inet_csk(lsk)->icsk_accept_queue.fastopenq; + struct sock *lsk = req->rsk_listener; + struct fastopen_queue *fastopenq; + + fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; if (req->sk) /* the child socket hasn't been accepted yet */ goto out; @@ -179,8 +182,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * special RST handling below. */ spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); - reqsk_free(req); + reqsk_put(req); return; } /* Wait for 60secs before removing a req that has triggered RST. @@ -190,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * * For more details see CoNext'11 "TCP Fast Open" paper. */ - req->expires = jiffies + 60*HZ; + req->rsk_timer.expires = jiffies + 60*HZ; if (fastopenq->rskq_rst_head == NULL) fastopenq->rskq_rst_head = req; else @@ -201,5 +203,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq->qlen++; out: spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7ebed55b5f7d..358d52a38533 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + - nla_total_size(sizeof(struct ifla_vf_link_state))); + nla_total_size(sizeof(struct ifla_vf_link_state)) + + nla_total_size(sizeof(struct ifla_vf_rss_query_en))); return size; } else return 0; @@ -982,6 +983,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1037,8 +1056,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink)) || + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || (upper_dev && nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || @@ -1072,6 +1091,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; @@ -1111,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_rss_query_en vf_rss_query_en; /* * Not all SR-IOV capable drivers support the - * spoofcheck query. Preset to -1 so the user - * space tool can detect that the driver didn't - * report anything. + * spoofcheck and "RSS query enable" query. Preset to + * -1 so the user space tool can detect that the driver + * didn't report anything. */ ivi.spoofchk = -1; + ivi.rss_query_en = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1131,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = - vf_linkstate.vf = ivi.vf; + vf_linkstate.vf = + vf_rss_query_en.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1141,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; + vf_rss_query_en.setting = ivi.rss_query_en; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1155,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), &vf_spoofchk) || nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), - &vf_linkstate)) + &vf_linkstate) || + nla_put(skb, IFLA_VF_RSS_QUERY_EN, + sizeof(vf_rss_query_en), + &vf_rss_query_en)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1269,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, + [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1479,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivl->link_state); break; } + case IFLA_VF_RSS_QUERY_EN: { + struct ifla_vf_rss_query_en *ivrssq_en; + + ivrssq_en = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, + ivrssq_en->vf, + ivrssq_en->setting); + break; + } default: err = -EINVAL; break; @@ -1815,6 +1856,42 @@ errout: return err; } +static int rtnl_group_dellink(const struct net *net, int group) +{ + struct net_device *dev, *aux; + LIST_HEAD(list_kill); + bool found = false; + + if (!group) + return -EPERM; + + for_each_netdev(net, dev) { + if (dev->group == group) { + const struct rtnl_link_ops *ops; + + found = true; + ops = dev->rtnl_link_ops; + if (!ops || !ops->dellink) + return -EOPNOTSUPP; + } + } + + if (!found) + return -ENODEV; + + for_each_netdev_safe(net, dev, aux) { + if (dev->group == group) { + const struct rtnl_link_ops *ops; + + ops = dev->rtnl_link_ops; + ops->dellink(dev, &list_kill); + } + } + unregister_netdevice_many(&list_kill); + + return 0; +} + static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -1838,6 +1915,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_GROUP]) + return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP])); else return -EINVAL; @@ -1873,7 +1952,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) EXPORT_SYMBOL(rtnl_configure_link); struct net_device *rtnl_create_link(struct net *net, - char *ifname, unsigned char name_assign_type, + const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; @@ -2345,7 +2424,7 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, - u8 *addr, u32 pid, u32 seq, + u8 *addr, u16 vid, u32 pid, u32 seq, int type, unsigned int flags, int nlflags) { @@ -2367,6 +2446,9 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; + if (vid) + if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2381,7 +2463,7 @@ static inline size_t rtnl_fdb_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); } -static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) +static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2391,7 +2473,8 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); + err = nlmsg_populate_fdb_fill(skb, dev, addr, vid, + 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2526,7 +2609,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) nlh->nlmsg_flags); if (!err) { - rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2627,7 +2710,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); if (!err) { - rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2652,7 +2735,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, if (*idx < cb->args[0]) goto skip; - err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, + err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, portid, seq, RTM_NEWNEIGH, NTF_SELF, NLM_F_MULTI); @@ -2695,7 +2778,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net_device *dev; struct nlattr *tb[IFLA_MAX+1]; - struct net_device *bdev = NULL; struct net_device *br_dev = NULL; const struct net_device_ops *ops = NULL; const struct net_device_ops *cops = NULL; @@ -2719,7 +2801,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; ops = br_dev->netdev_ops; - bdev = br_dev; } for_each_netdev(net, dev) { @@ -2732,7 +2813,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cops = br_dev->netdev_ops; } - bdev = dev; } else { if (dev != br_dev && !(dev->priv_flags & IFF_BRIDGE_PORT)) @@ -2742,7 +2822,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) !(dev->priv_flags & IFF_EBRIDGE)) continue; - bdev = br_dev; cops = ops; } @@ -2804,8 +2883,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e4ac97c8477..3b6e5830256e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * @from: search offset * @to: search limit * @config: textsearch configuration - * @state: uninitialized textsearch state variable * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve @@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state) + unsigned int to, struct ts_config *config) { + struct ts_state state; unsigned int ret; config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; - skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); + skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); - ret = textsearch_find(config, state); + ret = textsearch_find(config, &state); return (ret <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); @@ -3207,10 +3206,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); - struct sk_buff *nskb, *lp, *p = *head; unsigned int len = skb_gro_len(skb); + struct sk_buff *lp, *p = *head; unsigned int delta_truesize; - unsigned int headroom; if (unlikely(p->len + len >= 65536)) return -E2BIG; @@ -3277,48 +3275,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } - /* switch back to head shinfo */ - pinfo = skb_shinfo(p); - - if (pinfo->frag_list) - goto merge; - if (skb_gro_len(p) != pinfo->gso_size) - return -E2BIG; - - headroom = skb_headroom(p); - nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); - if (unlikely(!nskb)) - return -ENOMEM; - - __copy_skb_header(nskb, p); - nskb->mac_len = p->mac_len; - - skb_reserve(nskb, headroom); - __skb_put(nskb, skb_gro_offset(p)); - - skb_set_mac_header(nskb, skb_mac_header(p) - p->data); - skb_set_network_header(nskb, skb_network_offset(p)); - skb_set_transport_header(nskb, skb_transport_offset(p)); - - __skb_pull(p, skb_gro_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), - p->data - skb_mac_header(p)); - - skb_shinfo(nskb)->frag_list = p; - skb_shinfo(nskb)->gso_size = pinfo->gso_size; - pinfo->gso_size = 0; - __skb_header_release(p); - NAPI_GRO_CB(nskb)->last = p; - - nskb->data_len += p->len; - nskb->truesize += p->truesize; - nskb->len += p->len; - - *head = nskb; - nskb->next = p->next; - p->next = NULL; - - p = nskb; merge: delta_truesize = skb->truesize; @@ -3796,7 +3752,6 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); - /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set diff --git a/net/core/sock.c b/net/core/sock.c index 71e3e5f1eaa0..e891bcf325ca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_dst_force(skb); spin_lock_irqsave(&list->lock, flags); - skb->dropcount = atomic_read(&sk->sk_drops); + sock_skb_set_dropcount(sk, skb); __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); @@ -947,8 +947,6 @@ set_rcvbuf: sk->sk_mark = val; break; - /* We implement the SO_SNDLOWAT etc to - not be settable (1003.1g 5.3) */ case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; @@ -1253,6 +1251,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ return -ENOPROTOOPT; } @@ -1473,9 +1474,8 @@ void sk_release_kernel(struct sock *sk) return; sock_hold(sk); - sock_release(sk->sk_socket); - release_net(sock_net(sk)); sock_net_set(sk, get_net(&init_net)); + sock_release(sk->sk_socket); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); @@ -1557,6 +1557,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -1684,19 +1685,6 @@ void sock_efree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_efree); -#ifdef CONFIG_INET -void sock_edemux(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} -EXPORT_SYMBOL(sock_edemux); -#endif - kuid_t sock_i_uid(struct sock *sk) { kuid_t uid; @@ -2186,15 +2174,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_no_getsockopt); -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len, int flags) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, + int flags) { return -EOPNOTSUPP; } @@ -2566,14 +2553,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, EXPORT_SYMBOL(compat_sock_common_getsockopt); #endif -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; @@ -2750,6 +2737,42 @@ static inline void release_proto_idx(struct proto *prot) } #endif +static void req_prot_cleanup(struct request_sock_ops *rsk_prot) +{ + if (!rsk_prot) + return; + kfree(rsk_prot->slab_name); + rsk_prot->slab_name = NULL; + if (rsk_prot->slab) { + kmem_cache_destroy(rsk_prot->slab); + rsk_prot->slab = NULL; + } +} + +static int req_prot_init(const struct proto *prot) +{ + struct request_sock_ops *rsk_prot = prot->rsk_prot; + + if (!rsk_prot) + return 0; + + rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", + prot->name); + if (!rsk_prot->slab_name) + return -ENOMEM; + + rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, + rsk_prot->obj_size, 0, + 0, NULL); + + if (!rsk_prot->slab) { + pr_crit("%s: Can't create request sock SLAB cache!\n", + prot->name); + return -ENOMEM; + } + return 0; +} + int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { @@ -2763,21 +2786,8 @@ int proto_register(struct proto *prot, int alloc_slab) goto out; } - if (prot->rsk_prot != NULL) { - prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); - if (prot->rsk_prot->slab_name == NULL) - goto out_free_sock_slab; - - prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, - prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); - - if (prot->rsk_prot->slab == NULL) { - pr_crit("%s: Can't create request sock SLAB cache!\n", - prot->name); - goto out_free_request_sock_slab_name; - } - } + if (req_prot_init(prot)) + goto out_free_request_sock_slab; if (prot->twsk_prot != NULL) { prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); @@ -2789,8 +2799,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, - SLAB_HWCACHE_ALIGN | - prot->slab_flags, + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; @@ -2806,14 +2815,8 @@ int proto_register(struct proto *prot, int alloc_slab) out_free_timewait_sock_slab_name: kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - if (prot->rsk_prot && prot->rsk_prot->slab) { - kmem_cache_destroy(prot->rsk_prot->slab); - prot->rsk_prot->slab = NULL; - } -out_free_request_sock_slab_name: - if (prot->rsk_prot) - kfree(prot->rsk_prot->slab_name); -out_free_sock_slab: + req_prot_cleanup(prot->rsk_prot); + kmem_cache_destroy(prot->slab); prot->slab = NULL; out: @@ -2833,11 +2836,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; } - if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(prot->rsk_prot->slab_name); - prot->rsk_prot->slab = NULL; - } + req_prot_cleanup(prot->rsk_prot); if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { kmem_cache_destroy(prot->twsk_prot->twsk_slab); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ad704c757bb4..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); -int sock_diag_check_cookie(void *sk, __u32 *cookie) +static u64 sock_gen_cookie(struct sock *sk) { - if ((cookie[0] != INET_DIAG_NOCOOKIE || - cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) - return -ESTALE; - else + while (1) { + u64 res = atomic64_read(&sk->sk_cookie); + + if (res) + return res; + res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + atomic64_cmpxchg(&sk->sk_cookie, 0, res); + } +} + +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) +{ + u64 res; + + if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) return 0; + + res = sock_gen_cookie(sk); + if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) + return -ESTALE; + + return 0; } EXPORT_SYMBOL_GPL(sock_diag_check_cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie) +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) { - cookie[0] = (u32)(unsigned long)sk; - cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + u64 res = sock_gen_cookie(sk); + + cookie[0] = (u32)res; + cookie[1] = (u32)(res >> 32); } EXPORT_SYMBOL_GPL(sock_diag_save_cookie); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8ce351ffceb1..95b6139d710c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,7 +24,6 @@ static int zero = 0; static int one = 1; -static int ushort_max = USHRT_MAX; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; @@ -403,7 +402,6 @@ static struct ctl_table netns_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .extra1 = &zero, - .extra2 = &ushort_max, .proc_handler = proc_dointvec_minmax }, { } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 93ea80196f0e..5b21f6f88e97 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { [DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)}, [DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED}, [DCB_ATTR_IEEE_MAXRATE] = {.len = sizeof(struct ieee_maxrate)}, + [DCB_ATTR_IEEE_QCN] = {.len = sizeof(struct ieee_qcn)}, + [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)}, }; static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { @@ -1030,7 +1032,7 @@ nla_put_failure: return err; } -/* Handle IEEE 802.1Qaz GET commands. */ +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { struct nlattr *ieee, *app; @@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) } } + if (ops->ieee_getqcn) { + struct ieee_qcn qcn; + + memset(&qcn, 0, sizeof(qcn)); + err = ops->ieee_getqcn(netdev, &qcn); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN, + sizeof(qcn), &qcn); + if (err) + return -EMSGSIZE; + } + } + + if (ops->ieee_getqcnstats) { + struct ieee_qcn_stats qcn_stats; + + memset(&qcn_stats, 0, sizeof(qcn_stats)); + err = ops->ieee_getqcnstats(netdev, &qcn_stats); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS, + sizeof(qcn_stats), &qcn_stats); + if (err) + return -EMSGSIZE; + } + } + if (ops->ieee_getpfc) { struct ieee_pfc pfc; memset(&pfc, 0, sizeof(pfc)); @@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, } EXPORT_SYMBOL(dcbnl_cee_notify); -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands. + * If any requested operation can not be completed + * the entire msg is aborted and error value is returned. * No attempt is made to reconcile the case where only part of the * cmd can be completed. */ @@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, goto err; } + if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) { + struct ieee_qcn *qcn = + nla_data(ieee[DCB_ATTR_IEEE_QCN]); + + err = ops->ieee_setqcn(netdev, qcn); + if (err) + goto err; + } + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e4c144fa706f..bebc735f5afc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -280,8 +280,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); + struct request_sock *req); int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); @@ -310,16 +309,15 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); #endif int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, int flags, - int *addr_len); +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); unsigned int dccp_poll(struct file *file, struct socket *sock, poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void dccp_req_err(struct sock *sk, u64 seq); struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 028fc43aacbd..5a45f8de5d99 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -49,13 +49,14 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); } -static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) +static int dccp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e45b968613a4..2b4f21d34df6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -89,10 +89,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->inet_saddr == 0) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; - + sk_rcv_saddr_set(sk, inet->inet_saddr); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -196,6 +195,32 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } +void dccp_req_err(struct sock *sk, u64 seq) + { + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + WARN_ON(req->sk); + + if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); + } else { + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + } +} +EXPORT_SYMBOL(dccp_req_err); + /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -228,10 +253,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(net, &dccp_hashinfo, - iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); - if (sk == NULL) { + sk = __inet_lookup_established(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, + iph->saddr, ntohs(dh->dccph_sport), + inet_iif(skb)); + if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -240,6 +266,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = dccp_hdr_seq(dh); + if (sk->sk_state == DCCP_NEW_SYN_RECV) + return dccp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -252,7 +281,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -289,35 +317,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req , **prev; - case DCCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - req = inet_csk_search_req(sk, &prev, dh->dccph_dport, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - WARN_ON(req->sk); - - if (!between48(seq, dccp_rsk(req)->dreq_iss, - dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - /* - * Still in RESPOND, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req, prev); - goto out; - case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { @@ -408,8 +407,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; newinet->inet_opt = ireq->opt; ireq->opt = NULL; @@ -449,14 +448,14 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) const struct dccp_hdr *dh = dccp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; - struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, - dh->dccph_sport, + struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, iph->saddr, iph->daddr); - if (req != NULL) - return dccp_check_req(sk, skb, req, prev); - + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, @@ -575,7 +574,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +void dccp_syn_ack_timeout(const struct request_sock *req) { } EXPORT_SYMBOL(dccp_syn_ack_timeout); @@ -624,7 +623,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); if (req == NULL) goto drop; @@ -639,8 +638,10 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->ireq_family = AF_INET; + ireq->ir_iif = sk->sk_bound_dev_if; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6bcaa33cd804..9d0551092c6c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -40,19 +40,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; -static void dccp_v6_hash(struct sock *sk) -{ - if (sk->sk_state != DCCP_CLOSED) { - if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) { - inet_hash(sk); - return; - } - local_bh_disable(); - __inet6_hash(sk, NULL); - local_bh_enable(); - } -} - /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, const struct in6_addr *saddr, @@ -98,11 +85,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } - sk = inet6_lookup(net, &dccp_hashinfo, - &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); + sk = __inet6_lookup_established(net, &dccp_hashinfo, + &hdr->daddr, dh->dccph_dport, + &hdr->saddr, ntohs(dh->dccph_sport), + inet6_iif(skb)); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -112,6 +100,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet_twsk_put(inet_twsk(sk)); return; } + seq = dccp_hdr_seq(dh); + if (sk->sk_state == DCCP_NEW_SYN_RECV) + return dccp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -121,7 +112,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -162,32 +152,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; - case DCCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, - &hdr->daddr, &hdr->saddr, - inet6_iif(skb)); - if (req == NULL) - goto out; - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - WARN_ON(req->sk != NULL); - - if (!between48(seq, dccp_rsk(req)->dreq_iss, - dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - inet_csk_reqsk_queue_drop(sk, req, prev); - goto out; - case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ @@ -330,17 +294,16 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); const struct ipv6hdr *iph = ipv6_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet6_csk_search_req(sk, &prev, - dh->dccph_sport, - &iph->saddr, - &iph->daddr, - inet6_iif(skb)); - if (req != NULL) - return dccp_check_req(sk, skb, req, prev); + req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, + &iph->daddr, inet6_iif(skb)); + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), @@ -386,7 +349,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); if (req == NULL) goto drop; @@ -403,6 +366,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ireq_family = AF_INET6; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -469,11 +433,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -591,7 +551,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, dccp_done(newsk); goto out; } - __inet6_hash(newsk, NULL); + __inet_hash(newsk, NULL); return newsk; @@ -916,9 +876,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); - + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -1061,7 +1019,7 @@ static struct proto dccp_v6_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v6_do_rcv, - .hash = dccp_v6_hash, + .hash = inet_hash, .unhash = inet_unhash, .accept = inet_csk_accept, .get_port = inet_csk_get_port, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b50dc436db1f..5f566663e47f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -27,28 +27,16 @@ struct inet_timewait_death_row dccp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock), .hashinfo = &dccp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&dccp_death_row), - .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&dccp_death_row), }; EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; + struct inet_timewait_sock *tw; - if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &dccp_death_row, state); if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; - inet_twsk_schedule(tw, &dccp_death_row, timeo, - DCCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -152,8 +139,7 @@ EXPORT_SYMBOL_GPL(dccp_create_openreq_child); * as an request_sock. */ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev) + struct request_sock *req) { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); @@ -200,7 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: @@ -212,7 +198,7 @@ drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); goto out; } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 595ddf0459db..d8346d0eadeb 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -72,8 +72,7 @@ static void printl(const char *fmt, ...) wake_up(&dccpw.wait); } -static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { const struct inet_sock *inet = inet_sk(sk); struct ccid3_hc_tx_sock *hc = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e171b780b499..52a94016526d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) return 0; } -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; @@ -806,8 +805,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1cd46a345cb0..3ef7acef3ce8 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -161,33 +161,11 @@ out: sock_put(sk); } -/* - * Timer for listening sockets - */ -static void dccp_response_timer(struct sock *sk) -{ - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, - DCCP_RTO_MAX); -} - static void dccp_keepalive_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - /* Only process if socket is not in use. */ - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - inet_csk_reset_keepalive_timer(sk, HZ / 20); - goto out; - } - - if (sk->sk_state == DCCP_LISTEN) { - dccp_response_timer(sk); - goto out; - } -out: - bh_unlock_sock(sk); + pr_err("dccp should not use a keepalive timer !\n"); sock_put(sk); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 810228646de3..754484b3cd0e 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int } -static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, return skb; } -static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 7ca7c3143da3..4507b188fc51 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -49,41 +49,17 @@ #include <net/dn_route.h> static int dn_neigh_construct(struct neighbour *); -static void dn_long_error_report(struct neighbour *, struct sk_buff *); -static void dn_short_error_report(struct neighbour *, struct sk_buff *); -static int dn_long_output(struct neighbour *, struct sk_buff *); -static int dn_short_output(struct neighbour *, struct sk_buff *); -static int dn_phase3_output(struct neighbour *, struct sk_buff *); - - -/* - * For talking to broadcast devices: Ethernet & PPP - */ -static const struct neigh_ops dn_long_ops = { - .family = AF_DECnet, - .error_report = dn_long_error_report, - .output = dn_long_output, - .connected_output = dn_long_output, -}; +static void dn_neigh_error_report(struct neighbour *, struct sk_buff *); +static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb); /* - * For talking to pointopoint and multidrop devices: DDCMP and X.25 + * Operations for adding the link layer header. */ -static const struct neigh_ops dn_short_ops = { +static const struct neigh_ops dn_neigh_ops = { .family = AF_DECnet, - .error_report = dn_short_error_report, - .output = dn_short_output, - .connected_output = dn_short_output, -}; - -/* - * For talking to DECnet phase III nodes - */ -static const struct neigh_ops dn_phase3_ops = { - .family = AF_DECnet, - .error_report = dn_short_error_report, /* Can use short version here */ - .output = dn_phase3_output, - .connected_output = dn_phase3_output, + .error_report = dn_neigh_error_report, + .output = dn_neigh_output, + .connected_output = dn_neigh_output, }; static u32 dn_neigh_hash(const void *pkey, @@ -93,11 +69,18 @@ static u32 dn_neigh_hash(const void *pkey, return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]); } +static bool dn_key_eq(const struct neighbour *neigh, const void *pkey) +{ + return neigh_key_eq16(neigh, pkey); +} + struct neigh_table dn_neigh_table = { .family = PF_DECnet, .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), .key_len = sizeof(__le16), + .protocol = cpu_to_be16(ETH_P_DNA_RT), .hash = dn_neigh_hash, + .key_eq = dn_key_eq, .constructor = dn_neigh_construct, .id = "dn_neigh_cache", .parms ={ @@ -146,16 +129,9 @@ static int dn_neigh_construct(struct neighbour *neigh) __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - - if (dn_db->use_long) - neigh->ops = &dn_long_ops; - else - neigh->ops = &dn_short_ops; rcu_read_unlock(); - if (dn->flags & DN_NDFLAG_P3) - neigh->ops = &dn_phase3_ops; - + neigh->ops = &dn_neigh_ops; neigh->nud_state = NUD_NOARP; neigh->output = neigh->ops->connected_output; @@ -187,24 +163,16 @@ static int dn_neigh_construct(struct neighbour *neigh) return 0; } -static void dn_long_error_report(struct neighbour *neigh, struct sk_buff *skb) -{ - printk(KERN_DEBUG "dn_long_error_report: called\n"); - kfree_skb(skb); -} - - -static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb) +static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb) { - printk(KERN_DEBUG "dn_short_error_report: called\n"); + printk(KERN_DEBUG "dn_neigh_error_report: called\n"); kfree_skb(skb); } -static int dn_neigh_output_packet(struct sk_buff *skb) +static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = rt->n; struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; unsigned int seq; @@ -226,7 +194,20 @@ static int dn_neigh_output_packet(struct sk_buff *skb) return err; } -static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct dn_route *rt = (struct dn_route *)dst; + struct neighbour *neigh = rt->n; + + return neigh->output(neigh, skb); +} + +/* + * For talking to broadcast devices: Ethernet & PPP + */ +static int dn_long_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; @@ -265,11 +246,15 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } -static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) +/* + * For talking to pointopoint and multidrop devices: DDCMP and X.25 + */ +static int dn_short_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -301,15 +286,17 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } /* - * Phase 3 output is the same is short output, execpt that + * For talking to DECnet phase III nodes + * Phase 3 output is the same as short output, execpt that * it clears the area bits before transmission. */ -static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -340,8 +327,34 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); +} + +int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *neigh = rt->n; + struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_dev *dn_db; + bool use_long; + + rcu_read_lock(); + dn_db = rcu_dereference(neigh->dev->dn_ptr); + if (dn_db == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + use_long = dn_db->use_long; + rcu_read_unlock(); + + if (dn->flags & DN_NDFLAG_P3) + return dn_phase3_output(neigh, sk, skb); + if (use_long) + return dn_long_output(neigh, sk, skb); + else + return dn_short_output(neigh, sk, skb); } /* @@ -362,7 +375,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb) /* * Ethernet router hello message received */ -int dn_neigh_router_hello(struct sk_buff *skb) +int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) { struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; @@ -424,7 +437,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) /* * Endnode hello message received */ -int dn_neigh_endnode_hello(struct sk_buff *skb) +int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb) { struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; struct neighbour *neigh; diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index fe5f01485d33..a321eac9fd0c 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -714,7 +714,7 @@ out: return ret; } -static int dn_nsp_rx_packet(struct sk_buff *skb) +static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk = NULL; @@ -814,7 +814,8 @@ free_out: int dn_nsp_rx(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb, + skb->dev, NULL, dn_nsp_rx_packet); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 3b81092771f8..03227ffd19ce 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -136,7 +136,6 @@ int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { .family = PF_DECnet, - .protocol = cpu_to_be16(ETH_P_DNA_RT), .gc_thresh = 128, .gc = dn_dst_gc, .check = dn_dst_check, @@ -513,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb) * * Returns: result of input function if route is found, error code otherwise */ -static int dn_route_rx_packet(struct sk_buff *skb) +static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb) { struct dn_skb_cb *cb; int err; @@ -574,7 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb) ptr++; cb->hops = *ptr++; /* Visit Count */ - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -601,7 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb) ptr += 2; cb->hops = *ptr & 0x3f; - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -609,7 +610,7 @@ drop_it: return NET_RX_DROP; } -static int dn_route_discard(struct sk_buff *skb) +static int dn_route_discard(struct sock *sk, struct sk_buff *skb) { /* * I know we drop the packet here, but thats considered success in @@ -619,7 +620,7 @@ static int dn_route_discard(struct sk_buff *skb) return NET_RX_SUCCESS; } -static int dn_route_ptp_hello(struct sk_buff *skb) +static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb) { dn_dev_hello(skb); dn_neigh_pointopoint_hello(skb); @@ -705,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type switch (flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_HELO: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_ptp_hello); case DN_RT_PKT_L1RT: case DN_RT_PKT_L2RT: return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_discard); case DN_RT_PKT_ERTH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_router_hello); case DN_RT_PKT_EEDH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_endnode_hello); } } else { @@ -743,15 +744,6 @@ out: return NET_RX_DROP; } -static int dn_to_neigh_output(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - - return n->output(n, skb); -} - static int dn_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -778,7 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; cb->hops = 0; - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb, + NULL, dev, dn_to_neigh_output); error: @@ -826,7 +819,8 @@ static int dn_forward(struct sk_buff *skb) if (rt->rt_flags & RTCF_DOREDIRECT) cb->rt_flags |= DN_RT_F_IE; - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb, + dev, skb->dev, dn_to_neigh_output); drop: diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index e4d9560a910b..af34fc9bdf69 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -89,9 +89,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { dnrmg_send_peer(skb); return NF_ACCEPT; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 5f8ac404535b..ff7736f7ff42 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -5,9 +5,12 @@ config HAVE_NET_DSA # Drivers must select NET_DSA and the appropriate tagging format config NET_DSA - tristate - depends on HAVE_NET_DSA + tristate "Distributed Switch Architecture" + depends on HAVE_NET_DSA && NET_SWITCHDEV select PHYLIB + ---help--- + Say Y if you want to enable support for the hardware switches supported + by the Distributed Switch Architecture. if NET_DSA diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 4dea2e0681d1..5eaadabe23a1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> +#include <linux/of_net.h> #include <linux/sysfs.h> #include "dsa_priv.h" @@ -175,43 +176,14 @@ __ATTRIBUTE_GROUPS(dsa_hwmon); #endif /* CONFIG_NET_DSA_HWMON */ /* basic switch operations **************************************************/ -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct device *host_dev) +static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_chip_data *pd = dst->pd->chip + index; - struct dsa_switch_driver *drv; - struct dsa_switch *ds; - int ret; - char *name; - int i; + struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_chip_data *pd = ds->pd; bool valid_name_found = false; - - /* - * Probe for switch model. - */ - drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); - if (drv == NULL) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); - if (ds == NULL) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->pd = dst->pd->chip + index; - ds->drv = drv; - ds->master_dev = host_dev; + int index = ds->index; + int i, ret; /* * Validate supplied switch configuration. @@ -256,7 +228,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, * switch. */ if (dst->cpu_switch == index) { - switch (drv->tag_protocol) { + switch (ds->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case DSA_TAG_PROTO_DSA: dst->rcv = dsa_netdev_ops.rcv; @@ -284,7 +256,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, goto out; } - dst->tag_protocol = drv->tag_protocol; + dst->tag_protocol = ds->tag_protocol; } /* @@ -314,19 +286,15 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, * Create network devices for physical switch ports. */ for (i = 0; i < DSA_MAX_PORTS; i++) { - struct net_device *slave_dev; - if (!(ds->phys_port_mask & (1 << i))) continue; - slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); - if (slave_dev == NULL) { + ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (ret < 0) { netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n", index, i, pd->port_names[i]); - continue; + ret = 0; } - - ds->ports[i] = slave_dev; } #ifdef CONFIG_NET_DSA_HWMON @@ -354,13 +322,57 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, } #endif /* CONFIG_NET_DSA_HWMON */ - return ds; + return ret; out_free: mdiobus_free(ds->slave_mii_bus); out: kfree(ds); - return ERR_PTR(ret); + return ret; +} + +static struct dsa_switch * +dsa_switch_setup(struct dsa_switch_tree *dst, int index, + struct device *parent, struct device *host_dev) +{ + struct dsa_chip_data *pd = dst->pd->chip + index; + struct dsa_switch_driver *drv; + struct dsa_switch *ds; + int ret; + char *name; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); + if (drv == NULL) { + netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + index); + return ERR_PTR(-EINVAL); + } + netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + index, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return NULL; + + ds->dst = dst; + ds->index = index; + ds->pd = pd; + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + ds->master_dev = host_dev; + + ret = dsa_switch_setup_one(ds, parent); + if (ret) + return NULL; + + return ds; } static void dsa_switch_destroy(struct dsa_switch *ds) @@ -378,7 +390,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds) /* Suspend slave network devices */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!dsa_is_port_initialized(ds, i)) continue; ret = dsa_slave_suspend(ds->ports[i]); @@ -404,7 +416,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) /* Resume slave network devices */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!dsa_is_port_initialized(ds, i)) continue; ret = dsa_slave_resume(ds->ports[i]); @@ -558,12 +570,12 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) kfree(pd->chip); } -static int dsa_of_probe(struct platform_device *pdev) +static int dsa_of_probe(struct device *dev) { - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct device_node *child, *mdio, *ethernet, *port, *link; struct mii_bus *mdio_bus; - struct platform_device *ethernet_dev; + struct net_device *ethernet_dev; struct dsa_platform_data *pd; struct dsa_chip_data *cd; const char *port_name; @@ -578,22 +590,22 @@ static int dsa_of_probe(struct platform_device *pdev) mdio_bus = of_mdio_find_bus(mdio); if (!mdio_bus) - return -EINVAL; + return -EPROBE_DEFER; ethernet = of_parse_phandle(np, "dsa,ethernet", 0); if (!ethernet) return -EINVAL; - ethernet_dev = of_find_device_by_node(ethernet); + ethernet_dev = of_find_net_device_by_node(ethernet); if (!ethernet_dev) - return -ENODEV; + return -EPROBE_DEFER; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return -ENOMEM; - pdev->dev.platform_data = pd; - pd->netdev = ðernet_dev->dev; + dev->platform_data = pd; + pd->of_netdev = ethernet_dev; pd->nr_chips = of_get_available_child_count(np); if (pd->nr_chips > DSA_MAX_SWITCHES) pd->nr_chips = DSA_MAX_SWITCHES; @@ -665,72 +677,35 @@ out_free_chip: dsa_of_free_platform_data(pd); out_free: kfree(pd); - pdev->dev.platform_data = NULL; + dev->platform_data = NULL; return ret; } -static void dsa_of_remove(struct platform_device *pdev) +static void dsa_of_remove(struct device *dev) { - struct dsa_platform_data *pd = pdev->dev.platform_data; + struct dsa_platform_data *pd = dev->platform_data; - if (!pdev->dev.of_node) + if (!dev->of_node) return; dsa_of_free_platform_data(pd); kfree(pd); } #else -static inline int dsa_of_probe(struct platform_device *pdev) +static inline int dsa_of_probe(struct device *dev) { return 0; } -static inline void dsa_of_remove(struct platform_device *pdev) +static inline void dsa_of_remove(struct device *dev) { } #endif -static int dsa_probe(struct platform_device *pdev) +static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) { - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int i, ret; - - pr_notice_once("Distributed Switch Architecture driver version %s\n", - dsa_driver_version); - - if (pdev->dev.of_node) { - ret = dsa_of_probe(pdev); - if (ret) - return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || pd->netdev == NULL) - return -EINVAL; - - dev = dev_to_net_device(pd->netdev); - if (dev == NULL) { - ret = -EINVAL; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); + int i; dst->pd = pd; dst->master_netdev = dev; @@ -740,7 +715,7 @@ static int dsa_probe(struct platform_device *pdev) for (i = 0; i < pd->nr_chips; i++) { struct dsa_switch *ds; - ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev); + ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); if (IS_ERR(ds)) { netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", i, PTR_ERR(ds)); @@ -768,18 +743,67 @@ static int dsa_probe(struct platform_device *pdev) dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); add_timer(&dst->link_poll_timer); } +} + +static int dsa_probe(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct dsa_switch_tree *dst; + int ret; + + pr_notice_once("Distributed Switch Architecture driver version %s\n", + dsa_driver_version); + + if (pdev->dev.of_node) { + ret = dsa_of_probe(&pdev->dev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + + if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) + return -EINVAL; + + if (pd->of_netdev) { + dev = pd->of_netdev; + dev_hold(dev); + } else { + dev = dev_to_net_device(pd->netdev); + } + if (dev == NULL) { + ret = -EPROBE_DEFER; + goto out; + } + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + ret = -EEXIST; + goto out; + } + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (dst == NULL) { + dev_put(dev); + ret = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dst); + + dsa_setup_dst(dst, dev, &pdev->dev, pd); return 0; out: - dsa_of_remove(pdev); + dsa_of_remove(&pdev->dev); return ret; } -static int dsa_remove(struct platform_device *pdev) +static void dsa_remove_dst(struct dsa_switch_tree *dst) { - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); int i; if (dst->link_poll_needed) @@ -793,8 +817,14 @@ static int dsa_remove(struct platform_device *pdev) if (ds != NULL) dsa_switch_destroy(ds); } +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - dsa_of_remove(pdev); + dsa_remove_dst(dst); + dsa_of_remove(&pdev->dev); return 0; } @@ -821,6 +851,10 @@ static struct packet_type dsa_pack_type __read_mostly = { .func = dsa_switch_rcv, }; +static struct notifier_block dsa_netdevice_nb __read_mostly = { + .notifier_call = dsa_slave_netdevice_event, +}; + #ifdef CONFIG_PM_SLEEP static int dsa_suspend(struct device *d) { @@ -879,6 +913,8 @@ static int __init dsa_init_module(void) { int rc; + register_netdevice_notifier(&dsa_netdevice_nb); + rc = platform_driver_register(&dsa_driver); if (rc) return rc; @@ -891,6 +927,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + unregister_netdevice_notifier(&dsa_netdevice_nb); dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index dc9756d3154c..d5f1f9b862ea 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -45,6 +45,8 @@ struct dsa_slave_priv { int old_link; int old_pause; int old_duplex; + + struct net_device *bridge_dev; }; /* dsa.c */ @@ -53,11 +55,12 @@ extern char dsa_driver_version[]; /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -struct net_device *dsa_slave_create(struct dsa_switch *ds, - struct device *parent, - int port, char *name); +int dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); +int dsa_slave_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f23deadf42a0..827cda560a55 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -10,10 +10,14 @@ #include <linux/list.h> #include <linux/etherdevice.h> +#include <linux/netdevice.h> #include <linux/phy.h> #include <linux/phy_fixed.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <net/rtnetlink.h> +#include <net/switchdev.h> +#include <linux/if_bridge.h> #include "dsa_priv.h" /* slave mii_bus handling ***************************************************/ @@ -51,13 +55,16 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ -static int dsa_slave_init(struct net_device *dev) +static int dsa_slave_get_iflink(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - dev->iflink = p->parent->dst->master_netdev->ifindex; + return p->parent->dst->master_netdev->ifindex; +} - return 0; +static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) +{ + return !!p->bridge_dev; } static int dsa_slave_open(struct net_device *dev) @@ -65,6 +72,8 @@ static int dsa_slave_open(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = p->parent->dst->master_netdev; struct dsa_switch *ds = p->parent; + u8 stp_state = dsa_port_is_bridged(p) ? + BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -93,6 +102,9 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } + if (ds->drv->port_stp_update) + ds->drv->port_stp_update(ds, p->port, stp_state); + if (p->phy) phy_start(p->phy); @@ -133,6 +145,9 @@ static int dsa_slave_close(struct net_device *dev) if (ds->drv->port_disable) ds->drv->port_disable(ds, p->port, p->phy); + if (ds->drv->port_stp_update) + ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED); + return 0; } @@ -184,6 +199,105 @@ out: return 0; } +static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlm_flags) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_add) + ret = ds->drv->fdb_add(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_del) + ret = ds->drv->fdb_del(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, + const unsigned char *addr, u16 vid, + bool is_static, + u32 portid, u32 seq, int type, + unsigned int flags) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_EXT_LEARNED; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about entries, in response to GETNEIGH */ +static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned char addr[ETH_ALEN] = { 0 }; + int ret; + + if (!ds->drv->fdb_getnext) + return -EOPNOTSUPP; + + for (; ; idx++) { + bool is_static; + + ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); + if (ret < 0) + break; + + if (idx < cb->args[0]) + continue; + + ret = dsa_slave_fill_info(dev, skb, addr, 0, + is_static, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI); + if (ret < 0) + break; + } + + return idx; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -194,6 +308,92 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } +/* Return a bitmask of all ports being currently bridged within a given bridge + * device. Note that on leave, the mask will still return the bitmask of ports + * currently bridged, prior to port removal, and this is exactly what we want. + */ +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds, + struct net_device *bridge) +{ + struct dsa_slave_priv *p; + unsigned int port; + u32 mask = 0; + + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!dsa_is_port_initialized(ds, port)) + continue; + + p = netdev_priv(ds->ports[port]); + + if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT && + p->bridge_dev == bridge) + mask |= 1 << port; + } + + return mask; +} + +static int dsa_slave_stp_update(struct net_device *dev, u8 state) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->port_stp_update) + ret = ds->drv->port_stp_update(ds, p->port, state); + + return ret; +} + +static int dsa_slave_bridge_port_join(struct net_device *dev, + struct net_device *br) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + p->bridge_dev = br; + + if (ds->drv->port_join_bridge) + ret = ds->drv->port_join_bridge(ds, p->port, + dsa_slave_br_port_mask(ds, br)); + + return ret; +} + +static int dsa_slave_bridge_port_leave(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + + if (ds->drv->port_leave_bridge) + ret = ds->drv->port_leave_bridge(ds, p->port, + dsa_slave_br_port_mask(ds, p->bridge_dev)); + + p->bridge_dev = NULL; + + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, + * so allow it to be in BR_STATE_FORWARDING to be kept functional + */ + dsa_slave_stp_update(dev, BR_STATE_FORWARDING); + + return ret; +} + +static int dsa_slave_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + psid->id_len = sizeof(ds->index); + memcpy(&psid->id, &ds->index, psid->id_len); + + return 0; +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -462,14 +662,22 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { }; static const struct net_device_ops dsa_slave_netdev_ops = { - .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, .ndo_start_xmit = dsa_slave_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_fdb_add = dsa_slave_fdb_add, + .ndo_fdb_del = dsa_slave_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, + .ndo_get_iflink = dsa_slave_get_iflink, +}; + +static const struct swdev_ops dsa_slave_swdev_ops = { + .swdev_parent_id_get = dsa_slave_parent_id_get, + .swdev_port_stp_update = dsa_slave_stp_update, }; static void dsa_slave_adjust_link(struct net_device *dev) @@ -513,6 +721,24 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, } /* slave device setup *******************************************************/ +static int dsa_slave_phy_connect(struct dsa_slave_priv *p, + struct net_device *slave_dev, + int addr) +{ + struct dsa_switch *ds = p->parent; + + p->phy = ds->slave_mii_bus->phy_map[addr]; + if (!p->phy) + return -ENODEV; + + /* Use already configured phy mode */ + p->phy_interface = p->phy->interface; + phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); + + return 0; +} + static int dsa_slave_phy_setup(struct dsa_slave_priv *p, struct net_device *slave_dev) { @@ -546,10 +772,25 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, if (ds->drv->get_phy_flags) phy_flags = ds->drv->get_phy_flags(ds, p->port); - if (phy_dn) - p->phy = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, phy_flags, - p->phy_interface); + if (phy_dn) { + ret = of_mdio_parse_addr(&slave_dev->dev, phy_dn); + /* If this PHY address is part of phys_mii_mask, which means + * that we need to divert reads and writes to/from it, then we + * want to bind this device using the slave MII bus created by + * DSA to make that happen. + */ + if (!phy_is_fixed && ret >= 0 && + (ds->phys_mii_mask & (1 << ret))) { + ret = dsa_slave_phy_connect(p, slave_dev, ret); + if (ret) + return ret; + } else { + p->phy = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); + } + } if (p->phy && phy_is_fixed) fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); @@ -558,14 +799,9 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, * MDIO bus instead */ if (!p->phy) { - p->phy = ds->slave_mii_bus->phy_map[p->port]; - if (!p->phy) - return -ENODEV; - - /* Use already configured phy mode */ - p->phy_interface = p->phy->interface; - phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); + ret = dsa_slave_phy_connect(p, slave_dev, p->port); + if (ret) + return ret; } else { netdev_info(slave_dev, "attached PHY at address %d [%s]\n", p->phy->addr, p->phy->drv->name); @@ -605,9 +841,8 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -struct net_device * -dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, char *name) +int dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) { struct net_device *master = ds->dst->master_netdev; struct net_device *slave_dev; @@ -617,13 +852,14 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, NET_NAME_UNKNOWN, ether_setup); if (slave_dev == NULL) - return slave_dev; + return -ENOMEM; slave_dev->features = master->vlan_features; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; + slave_dev->swdev_ops = &dsa_slave_swdev_ops; SET_NETDEV_DEV(slave_dev, parent); slave_dev->dev.of_node = ds->pd->port_dn[port]; @@ -667,19 +903,64 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, ret = dsa_slave_phy_setup(p, slave_dev); if (ret) { free_netdev(slave_dev); - return NULL; + return ret; } + ds->ports[port] = slave_dev; ret = register_netdev(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); phy_disconnect(p->phy); + ds->ports[port] = NULL; free_netdev(slave_dev); - return NULL; + return ret; } netif_carrier_off(slave_dev); - return slave_dev; + return 0; +} + +static bool dsa_slave_dev_check(struct net_device *dev) +{ + return dev->netdev_ops == &dsa_slave_netdev_ops; +} + +static int dsa_slave_master_changed(struct net_device *dev) +{ + struct net_device *master = netdev_master_upper_dev_get(dev); + struct dsa_slave_priv *p = netdev_priv(dev); + int err = 0; + + if (master && master->rtnl_link_ops && + !strcmp(master->rtnl_link_ops->kind, "bridge")) + err = dsa_slave_bridge_port_join(dev, master); + else if (dsa_port_is_bridged(p)) + err = dsa_slave_bridge_port_leave(dev); + + return err; +} + +int dsa_slave_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev; + int err = 0; + + switch (event) { + case NETDEV_CHANGEUPPER: + dev = netdev_notifier_info_to_dev(ptr); + if (!dsa_slave_dev_check(dev)) + goto out; + + err = dsa_slave_master_changed(dev); + if (err) + netdev_warn(dev, "failed to reflect master change\n"); + + break; + } + +out: + return NOTIFY_DONE; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 238f38d21641..f3bad41d725f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -104,7 +104,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { - memset(eth->h_dest, 0, ETH_ALEN); + eth_zero_addr(eth->h_dest); return ETH_HLEN; } @@ -113,39 +113,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, EXPORT_SYMBOL(eth_header); /** - * eth_rebuild_header- rebuild the Ethernet MAC header. - * @skb: socket buffer to update - * - * This is called after an ARP or IPV6 ndisc it's resolution on this - * sk_buff. We now let protocol (ARP) fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - */ -int eth_rebuild_header(struct sk_buff *skb) -{ - struct ethhdr *eth = (struct ethhdr *)skb->data; - struct net_device *dev = skb->dev; - - switch (eth->h_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif - default: - netdev_dbg(dev, - "%s: unable to resolve type %X addresses.\n", - dev->name, ntohs(eth->h_proto)); - - memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); - break; - } - - return 0; -} -EXPORT_SYMBOL(eth_rebuild_header); - -/** * eth_get_headlen - determine the the length of header for an ethernet frame * @data: pointer to start of frame * @len: total length of frame @@ -369,7 +336,6 @@ EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, - .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, }; @@ -391,7 +357,7 @@ void ether_setup(struct net_device *dev) dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; - memset(dev->broadcast, 0xFF, ETH_ALEN); + eth_broadcast_addr(dev->broadcast); } EXPORT_SYMBOL(ether_setup); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 055fbb71ba6f..0ae5822ef944 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -113,7 +113,7 @@ static void lowpan_setup(struct net_device *dev) { dev->addr_len = IEEE802154_ADDR_LEN; memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_IEEE802154; + dev->type = ARPHRD_6LOWPAN; /* Frame Control + Sequence Number + Address fields + Security Header */ dev->hard_header_len = 2 + 1 + 20 + 14; dev->needed_tailroom = 2; /* FCS */ @@ -126,6 +126,7 @@ static void lowpan_setup(struct net_device *dev) dev->header_ops = &lowpan_header_ops; dev->ml_priv = &lowpan_mlme; dev->destructor = free_netdev; + dev->features |= NETIF_F_NETNS_LOCAL; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -148,10 +149,11 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, pr_debug("adding new link\n"); - if (!tb[IFLA_LINK]) + if (!tb[IFLA_LINK] || + !net_eq(dev_net(dev), &init_net)) return -EINVAL; /* find and hold real wpan device */ - real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; if (real_dev->type != ARPHRD_IEEE802154) { diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 18bc7e738507..2ee00e8a0308 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -25,6 +25,9 @@ #include "sysfs.h" #include "core.h" +/* name for sysfs, %d is appended */ +#define PHY_NAME "phy" + /* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg802154_rdev_list); int cfg802154_rdev_list_generation; @@ -122,7 +125,7 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) INIT_LIST_HEAD(&rdev->wpan_dev_list); device_initialize(&rdev->wpan_phy.dev); - dev_set_name(&rdev->wpan_phy.dev, "wpan-phy%d", rdev->wpan_phy_idx); + dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx); rdev->wpan_phy.dev.class = &wpan_phy_class; rdev->wpan_phy.dev.platform_data = rdev; @@ -225,6 +228,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, switch (state) { /* TODO NETDEV_DEVTYPE */ case NETDEV_REGISTER: + dev->features |= NETIF_F_NETNS_LOCAL; wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 9105265920fe..2b4955d7aae5 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -76,7 +76,6 @@ nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } -EXPORT_SYMBOL(ieee802154_nl_start_confirm); static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 2878d8ca6d3b..b60c65f70346 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; - return sk->sk_prot->sendmsg(iocb, sk, msg, len); + return sk->sk_prot->sendmsg(sk, msg, len); } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, @@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -327,8 +326,8 @@ out: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags) return 0; } -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -715,9 +713,8 @@ out: return err; } -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index dff55c2d87f3..133b4280660c 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -48,49 +48,6 @@ static ssize_t name_show(struct device *dev, } static DEVICE_ATTR_RO(name); -#define MASTER_SHOW_COMPLEX(name, format_string, args...) \ -static ssize_t name ## _show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); \ - int ret; \ - \ - mutex_lock(&phy->pib_lock); \ - ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \ - mutex_unlock(&phy->pib_lock); \ - return ret; \ -} \ -static DEVICE_ATTR_RO(name) - -#define MASTER_SHOW(field, format_string) \ - MASTER_SHOW_COMPLEX(field, format_string, phy->field) - -MASTER_SHOW(current_channel, "%d"); -MASTER_SHOW(current_page, "%d"); -MASTER_SHOW(transmit_power, "%d +- 1 dB"); -MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode); - -static ssize_t channels_supported_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); - int ret; - int i, len = 0; - - mutex_lock(&phy->pib_lock); - for (i = 0; i < 32; i++) { - ret = snprintf(buf + len, PAGE_SIZE - len, - "%#09x\n", phy->channels_supported[i]); - if (ret < 0) - break; - len += ret; - } - mutex_unlock(&phy->pib_lock); - return len; -} -static DEVICE_ATTR_RO(channels_supported); - static void wpan_phy_release(struct device *dev) { struct cfg802154_registered_device *rdev = dev_to_rdev(dev); @@ -101,12 +58,6 @@ static void wpan_phy_release(struct device *dev) static struct attribute *pmib_attrs[] = { &dev_attr_index.attr, &dev_attr_name.attr, - /* below will be removed soon */ - &dev_attr_current_channel.attr, - &dev_attr_current_page.attr, - &dev_attr_channels_supported.attr, - &dev_attr_transmit_power.attr, - &dev_attr_cca_mode.attr, NULL, }; ATTRIBUTE_GROUPS(pmib); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d2e49baaff63..8b47a4d79d04 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog) * shutdown() (rather than close()). */ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && - inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { + !inet_csk(sk)->icsk_accept_queue.fastopenq) { if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) err = fastopen_init_queue(sk, backlog); else if ((sysctl_tcp_fastopen & @@ -314,11 +314,11 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - WARN_ON(answer_prot->slab == NULL); + WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); - if (sk == NULL) + if (!sk) goto out; err = 0; @@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_getname); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, msg, size); + return sk->sk_prot->sendmsg(sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(inet_sendpage); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; @@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; @@ -1270,7 +1269,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (udpfrag) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); - if (skb->next != NULL) + if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; } else { @@ -1675,7 +1674,7 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); + sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); rc = proto_register(&tcp_prot, 1); if (rc) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 205e1472aa78..933a92820d26 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -122,6 +122,7 @@ * Interface to generic neighbour cache. */ static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); +static bool arp_key_eq(const struct neighbour *n, const void *pkey); static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -149,18 +150,12 @@ static const struct neigh_ops arp_direct_ops = { .connected_output = neigh_direct_output, }; -static const struct neigh_ops arp_broken_ops = { - .family = AF_INET, - .solicit = arp_solicit, - .error_report = arp_error_report, - .output = neigh_compat_output, - .connected_output = neigh_compat_output, -}; - struct neigh_table arp_tbl = { .family = AF_INET, .key_len = 4, + .protocol = cpu_to_be16(ETH_P_IP), .hash = arp_hash, + .key_eq = arp_key_eq, .constructor = arp_constructor, .proxy_redo = parp_redo, .id = "arp_cache", @@ -216,7 +211,12 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { - return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd); + return arp_hashfn(pkey, dev, hash_rnd); +} + +static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) +{ + return neigh_key_eq32(neigh, pkey); } static int arp_constructor(struct neighbour *neigh) @@ -228,7 +228,7 @@ static int arp_constructor(struct neighbour *neigh) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev == NULL) { + if (!in_dev) { rcu_read_unlock(); return -EINVAL; } @@ -260,35 +260,6 @@ static int arp_constructor(struct neighbour *neigh) in old paradigm. */ -#if 1 - /* So... these "amateur" devices are hopeless. - The only thing, that I can say now: - It is very sad that we need to keep ugly obsolete - code to make them happy. - - They should be moved to more reasonable state, now - they use rebuild_header INSTEAD OF hard_start_xmit!!! - Besides that, they are sort of out of date - (a lot of redundant clones/copies, useless in 2.1), - I wonder why people believe that they work. - */ - switch (dev->type) { - default: - break; - case ARPHRD_ROSE: -#if IS_ENABLED(CONFIG_AX25) - case ARPHRD_AX25: -#if IS_ENABLED(CONFIG_NETROM) - case ARPHRD_NETROM: -#endif - neigh->ops = &arp_broken_ops; - neigh->output = neigh->ops->output; - return 0; -#else - break; -#endif - } -#endif if (neigh->type == RTN_MULTICAST) { neigh->nud_state = NUD_NOARP; arp_mc_map(addr, neigh->ha, dev, 1); @@ -433,71 +404,6 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) return flag; } -/* OBSOLETE FUNCTIONS */ - -/* - * Find an arp mapping in the cache. If not found, post a request. - * - * It is very UGLY routine: it DOES NOT use skb->dst->neighbour, - * even if it exists. It is supposed that skb->dev was mangled - * by a virtual device (eql, shaper). Nobody but broken devices - * is allowed to use this function, it is scheduled to be removed. --ANK - */ - -static int arp_set_predefined(int addr_hint, unsigned char *haddr, - __be32 paddr, struct net_device *dev) -{ - switch (addr_hint) { - case RTN_LOCAL: - pr_debug("arp called for own IP address\n"); - memcpy(haddr, dev->dev_addr, dev->addr_len); - return 1; - case RTN_MULTICAST: - arp_mc_map(paddr, haddr, dev, 1); - return 1; - case RTN_BROADCAST: - memcpy(haddr, dev->broadcast, dev->addr_len); - return 1; - } - return 0; -} - - -int arp_find(unsigned char *haddr, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - __be32 paddr; - struct neighbour *n; - - if (!skb_dst(skb)) { - pr_debug("arp_find is called with dst==NULL\n"); - kfree_skb(skb); - return 1; - } - - paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); - if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, - paddr, dev)) - return 0; - - n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); - - if (n) { - n->used = jiffies; - if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) { - neigh_ha_snapshot(haddr, n, dev); - neigh_release(n); - return 0; - } - neigh_release(n); - } else - kfree_skb(skb); - return 1; -} -EXPORT_SYMBOL(arp_find); - -/* END OF OBSOLETE FUNCTIONS */ - /* * Check if we can use proxy ARP for this path */ @@ -569,7 +475,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, */ /* - * Create an arp packet. If (dest_hw == NULL), we create a broadcast + * Create an arp packet. If dest_hw is not set, we create a broadcast * message. */ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, @@ -589,7 +495,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, */ skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); - if (skb == NULL) + if (!skb) return NULL; skb_reserve(skb, hlen); @@ -597,9 +503,9 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); - if (src_hw == NULL) + if (!src_hw) src_hw = dev->dev_addr; - if (dest_hw == NULL) + if (!dest_hw) dest_hw = dev->broadcast; /* @@ -663,7 +569,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, break; #endif default: - if (target_hw != NULL) + if (target_hw) memcpy(arp_ptr, target_hw, dev->addr_len); else memset(arp_ptr, 0, dev->addr_len); @@ -685,7 +591,8 @@ EXPORT_SYMBOL(arp_create); void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb, + NULL, skb->dev, dev_queue_xmit_sk); } EXPORT_SYMBOL(arp_xmit); @@ -708,7 +615,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, skb = arp_create(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw); - if (skb == NULL) + if (!skb) return; arp_xmit(skb); @@ -719,7 +626,7 @@ EXPORT_SYMBOL(arp_send); * Process an arp request. */ -static int arp_process(struct sk_buff *skb) +static int arp_process(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -738,7 +645,7 @@ static int arp_process(struct sk_buff *skb) * is ARP'able. */ - if (in_dev == NULL) + if (!in_dev) goto out; arp = arp_hdr(skb); @@ -902,7 +809,7 @@ static int arp_process(struct sk_buff *skb) is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && inet_addr_type(net, sip) == RTN_UNICAST; - if (n == NULL && + if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); @@ -940,7 +847,7 @@ out: static void parp_redo(struct sk_buff *skb) { - arp_process(skb); + arp_process(NULL, skb); } @@ -973,7 +880,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, + dev, NULL, arp_process); consumeskb: consume_skb(skb); @@ -994,7 +902,7 @@ out_of_mem: static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) { - if (dev == NULL) { + if (!dev) { IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } @@ -1020,7 +928,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, return -ENODEV; } if (mask) { - if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL) + if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1)) return -ENOBUFS; return 0; } @@ -1041,7 +949,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; - if (dev == NULL) { + if (!dev) { struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) @@ -1161,7 +1069,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, return arp_req_delete_public(net, r, dev); ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; - if (dev == NULL) { + if (!dev) { struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1210,7 +1118,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (r.arp_dev[0]) { err = -ENODEV; dev = __dev_get_by_name(net, r.arp_dev); - if (dev == NULL) + if (!dev) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e361ea6f3fc8..bdb2a07ec363 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -255,7 +255,7 @@ static int __init cipso_v4_cache_init(void) cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, sizeof(struct cipso_v4_map_cache_bkt), GFP_KERNEL); - if (cipso_v4_cache == NULL) + if (!cipso_v4_cache) return -ENOMEM; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { @@ -339,7 +339,7 @@ static int cipso_v4_cache_check(const unsigned char *key, secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CIPSOV4; - if (prev_entry == NULL) { + if (!prev_entry) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } @@ -393,10 +393,10 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) + if (!entry) return -ENOMEM; entry->key = kmemdup(cipso_ptr, cipso_ptr_len, GFP_ATOMIC); - if (entry->key == NULL) { + if (!entry->key) { ret_val = -ENOMEM; goto cache_add_failure; } @@ -502,7 +502,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, atomic_set(&doi_def->refcount, 1); spin_lock(&cipso_v4_doi_list_lock); - if (cipso_v4_doi_search(doi_def->doi) != NULL) { + if (cipso_v4_doi_search(doi_def->doi)) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -EEXIST; goto doi_add_return; @@ -513,7 +513,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, doi_add_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_ADD, audit_info); - if (audit_buf != NULL) { + if (audit_buf) { const char *type_str; switch (doi_type) { case CIPSO_V4_MAP_TRANS: @@ -547,7 +547,7 @@ doi_add_return: */ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) { - if (doi_def == NULL) + if (!doi_def) return; switch (doi_def->type) { @@ -598,7 +598,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { + if (!doi_def) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -ENOENT; goto doi_remove_return; @@ -617,7 +617,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) doi_remove_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_DEL, audit_info); - if (audit_buf != NULL) { + if (audit_buf) { audit_log_format(audit_buf, " cipso_doi=%u res=%u", doi, ret_val == 0 ? 1 : 0); @@ -644,7 +644,7 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) + if (!doi_def) goto doi_getdef_return; if (!atomic_inc_not_zero(&doi_def->refcount)) doi_def = NULL; @@ -664,7 +664,7 @@ doi_getdef_return: */ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) { - if (doi_def == NULL) + if (!doi_def) return; if (!atomic_dec_and_test(&doi_def->refcount)) @@ -1642,7 +1642,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) rcu_read_lock(); doi_def = cipso_v4_doi_search(get_unaligned_be32(&opt[2])); - if (doi_def == NULL) { + if (!doi_def) { err_offset = 2; goto validate_return_locked; } @@ -1736,7 +1736,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * not the loopback device drop the packet. Further, * there is no legitimate reason for setting this from * userspace so reject it if skb is NULL. */ - if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { + if (!skb || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } @@ -1897,7 +1897,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * defined yet but it is not a problem as the only users of these * "lite" PF_INET sockets are functions which do an accept() call * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) + if (!sk) return 0; /* We allocate the maximum CIPSO option size here so we are probably @@ -1905,7 +1905,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { + if (!buf) { ret_val = -ENOMEM; goto socket_setattr_failure; } @@ -1921,7 +1921,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); - if (opt == NULL) { + if (!opt) { ret_val = -ENOMEM; goto socket_setattr_failure; } @@ -1981,7 +1981,7 @@ int cipso_v4_req_setattr(struct request_sock *req, * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { + if (!buf) { ret_val = -ENOMEM; goto req_setattr_failure; } @@ -1997,7 +1997,7 @@ int cipso_v4_req_setattr(struct request_sock *req, * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); - if (opt == NULL) { + if (!opt) { ret_val = -ENOMEM; goto req_setattr_failure; } @@ -2102,7 +2102,7 @@ void cipso_v4_sock_delattr(struct sock *sk) sk_inet = inet_sk(sk); opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (opt == NULL || opt->opt.cipso == 0) + if (!opt || opt->opt.cipso == 0) return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); @@ -2128,7 +2128,7 @@ void cipso_v4_req_delattr(struct request_sock *req) req_inet = inet_rsk(req); opt = req_inet->opt; - if (opt == NULL || opt->opt.cipso == 0) + if (!opt || opt->opt.cipso == 0) return; cipso_v4_delopt(&req_inet->opt); @@ -2157,7 +2157,7 @@ int cipso_v4_getattr(const unsigned char *cipso, doi = get_unaligned_be32(&cipso[2]); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) + if (!doi_def) goto getattr_return; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3a8985c94581..419d23c53ec7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -107,7 +107,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; -static u32 inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = (__force u32) addr ^ net_hash_mix(net); @@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } +static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +{ + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ifa->ifa_address, + .imr_ifindex = ifa->ifa_dev->dev->ifindex, + }; + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = ip_mc_join_group(sk, &mreq); + else + ret = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + + return ret; +} + static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -565,7 +585,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); in_dev = inetdev_by_index(net, ifm->ifa_index); - if (in_dev == NULL) { + if (!in_dev) { err = -ENODEV; goto errout; } @@ -573,7 +593,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && - ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) + ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) continue; if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) @@ -581,9 +601,11 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[IFA_ADDRESS] && (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) + !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) continue; + if (ipv4_is_multicast(ifa->ifa_address)) + ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -733,21 +755,21 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); err = -EINVAL; - if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) + if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) goto errout; dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; - if (dev == NULL) + if (!dev) goto errout; in_dev = __in_dev_get_rtnl(dev); err = -ENOBUFS; - if (in_dev == NULL) + if (!in_dev) goto errout; ifa = inet_alloc_ifa(); - if (ifa == NULL) + if (!ifa) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. @@ -758,7 +780,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, neigh_parms_data_state_setall(in_dev->arp_parms); in_dev_hold(in_dev); - if (tb[IFA_ADDRESS] == NULL) + if (!tb[IFA_ADDRESS]) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; INIT_HLIST_NODE(&ifa->hash); @@ -769,11 +791,11 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = in_dev; - ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); - ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); + ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); if (tb[IFA_BROADCAST]) - ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); + ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); @@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); + if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, + true, ifa); + + if (ret < 0) { + inet_free_ifa(ifa); + return ret; + } + } return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } else { inet_free_ifa(ifa); @@ -1259,7 +1290,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 addr = 0; struct net_device *dev; - if (in_dev != NULL) + if (in_dev) return confirm_addr_indev(in_dev, dst, local, scope); rcu_read_lock(); @@ -1309,7 +1340,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) if (named++ == 0) goto skip; dot = strchr(old, ':'); - if (dot == NULL) { + if (!dot) { sprintf(old, ":%d", named); dot = old; } @@ -1478,7 +1509,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ifm = nlmsg_data(nlh); @@ -1510,11 +1541,11 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } if ((ifa->ifa_address && - nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || + nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && - nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || + nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_broadcast && - nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || + nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || @@ -1597,7 +1628,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, net = dev_net(ifa->ifa_dev->dev); skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); - if (skb == NULL) + if (!skb) goto errout; err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); @@ -1634,7 +1665,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return -ENODATA; nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); - if (nla == NULL) + if (!nla) return -EMSGSIZE; for (i = 0; i < IPV4_DEVCONF_MAX; i++) @@ -1723,7 +1754,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ncm = nlmsg_data(nlh); @@ -1765,7 +1796,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, int err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, @@ -1822,10 +1853,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, break; default: dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) + if (!dev) goto errout; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) + if (!in_dev) goto errout; devconf = &in_dev->cnf; break; @@ -1833,7 +1864,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, @@ -2184,7 +2215,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) { struct devinet_sysctl_table *t = cnf->sysctl; - if (t == NULL) + if (!t) return; cnf->sysctl = NULL; @@ -2245,16 +2276,16 @@ static __net_init int devinet_init_net(struct net *net) if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); - if (all == NULL) + if (!all) goto err_alloc_all; dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) + if (!dflt) goto err_alloc_dflt; #ifdef CONFIG_SYSCTL tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); - if (tbl == NULL) + if (!tbl) goto err_alloc_ctl; tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; @@ -2274,7 +2305,7 @@ static __net_init int devinet_init_net(struct net *net) err = -ENOMEM; forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); - if (forw_hdr == NULL) + if (!forw_hdr) goto err_reg_ctl; net->ipv4.forw_hdr = forw_hdr; #endif diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 60173d4d3a0e..421a80b09b62 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -553,7 +553,7 @@ static int esp_init_authenc(struct xfrm_state *x) int err; err = -EINVAL; - if (x->ealg == NULL) + if (!x->ealg) goto error; err = -ENAMETOOLONG; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 23b9b3e86f4c..872494e6e6eb 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -52,12 +52,12 @@ static int __net_init fib4_rules_init(struct net *net) { struct fib_table *local_table, *main_table; - local_table = fib_trie_table(RT_TABLE_LOCAL); - if (local_table == NULL) + main_table = fib_trie_table(RT_TABLE_MAIN, NULL); + if (!main_table) return -ENOMEM; - main_table = fib_trie_table(RT_TABLE_MAIN); - if (main_table == NULL) + local_table = fib_trie_table(RT_TABLE_LOCAL, main_table); + if (!local_table) goto fail; hlist_add_head_rcu(&local_table->tb_hlist, @@ -67,14 +67,14 @@ static int __net_init fib4_rules_init(struct net *net) return 0; fail: - fib_free_table(local_table); + fib_free_table(main_table); return -ENOMEM; } #else struct fib_table *fib_new_table(struct net *net, u32 id) { - struct fib_table *tb; + struct fib_table *tb, *alias = NULL; unsigned int h; if (id == 0) @@ -83,23 +83,23 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - tb = fib_trie_table(id); + if (id == RT_TABLE_LOCAL) + alias = fib_new_table(net, RT_TABLE_MAIN); + + tb = fib_trie_table(id, alias); if (!tb) return NULL; switch (id) { case RT_TABLE_LOCAL: - net->ipv4.fib_local = tb; + rcu_assign_pointer(net->ipv4.fib_local, tb); break; - case RT_TABLE_MAIN: - net->ipv4.fib_main = tb; + rcu_assign_pointer(net->ipv4.fib_main, tb); break; - case RT_TABLE_DEFAULT: - net->ipv4.fib_default = tb; + rcu_assign_pointer(net->ipv4.fib_default, tb); break; - default: break; } @@ -129,16 +129,62 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ +static void fib_replace_table(struct net *net, struct fib_table *old, + struct fib_table *new) +{ +#ifdef CONFIG_IP_MULTIPLE_TABLES + switch (new->tb_id) { + case RT_TABLE_LOCAL: + rcu_assign_pointer(net->ipv4.fib_local, new); + break; + case RT_TABLE_MAIN: + rcu_assign_pointer(net->ipv4.fib_main, new); + break; + case RT_TABLE_DEFAULT: + rcu_assign_pointer(net->ipv4.fib_default, new); + break; + default: + break; + } + +#endif + /* replace the old table in the hlist */ + hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist); +} + +int fib_unmerge(struct net *net) +{ + struct fib_table *old, *new; + + /* attempt to fetch local table if it has been allocated */ + old = fib_get_table(net, RT_TABLE_LOCAL); + if (!old) + return 0; + + new = fib_trie_unmerge(old); + if (!new) + return -ENOMEM; + + /* replace merged table with clean table */ + if (new != old) { + fib_replace_table(net, old, new); + fib_free_table(old); + } + + return 0; +} + static void fib_flush(struct net *net) { int flushed = 0; - struct fib_table *tb; - struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) + struct hlist_head *head = &net->ipv4.fib_table_hash[h]; + struct hlist_node *tmp; + struct fib_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) flushed += fib_table_flush(tb); } @@ -146,6 +192,19 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } +void fib_flush_external(struct net *net) +{ + struct fib_table *tb; + struct hlist_head *head; + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, head, tb_hlist) + fib_table_flush_external(tb); + } +} + /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. @@ -427,7 +486,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) if (strcmp(ifa->ifa_label, devname) == 0) break; - if (ifa == NULL) + if (!ifa) return -ENODEV; cfg->fc_prefsrc = ifa->ifa_local; } @@ -455,7 +514,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, int len = 0; mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); - if (mx == NULL) + if (!mx) return -ENOMEM; if (rt->rt_flags & RTF_MTU) @@ -617,7 +676,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; tb = fib_get_table(net, cfg.fc_table); - if (tb == NULL) { + if (!tb) { err = -ESRCH; goto errout; } @@ -639,7 +698,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; tb = fib_new_table(net, cfg.fc_table); - if (tb == NULL) { + if (!tb) { err = -ENOBUFS; goto errout; } @@ -665,10 +724,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_e = cb->args[1]; + rcu_read_lock(); + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -682,6 +743,8 @@ next: } } out: + rcu_read_unlock(); + cb->args[1] = e; cb->args[0] = h; @@ -716,7 +779,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad else tb = fib_new_table(net, RT_TABLE_LOCAL); - if (tb == NULL) + if (!tb) return; cfg.fc_table = tb->tb_id; @@ -743,7 +806,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); - if (prim == NULL) { + if (!prim) { pr_warn("%s: bug: prim == NULL\n", __func__); return; } @@ -797,7 +860,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); - if (prim == NULL) { + if (!prim) { pr_warn("%s: bug: prim == NULL\n", __func__); return; } @@ -967,7 +1030,7 @@ static void nl_fib_input(struct sk_buff *skb) return; skb = netlink_skb_clone(skb, GFP_KERNEL); - if (skb == NULL) + if (!skb) return; nlh = nlmsg_hdr(skb); @@ -988,7 +1051,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) }; sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); - if (sk == NULL) + if (!sk) return -EAFNOSUPPORT; net->ipv4.fibnl = sk; return 0; @@ -1026,7 +1089,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); atomic_inc(&net->ipv4.dev_addr_genid); - if (ifa->ifa_dev->ifa_list == NULL) { + if (!ifa->ifa_dev->ifa_list) { /* Last address was deleted from this interface. * Disable IP. */ @@ -1094,7 +1157,7 @@ static int __net_init ip_fib_net_init(struct net *net) size = max_t(size_t, size, L1_CACHE_BYTES); net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); - if (net->ipv4.fib_table_hash == NULL) + if (!net->ipv4.fib_table_hash) return -ENOMEM; err = fib4_rules_init(net); @@ -1113,20 +1176,25 @@ static void ip_fib_net_exit(struct net *net) rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES - fib4_rules_exit(net); + RCU_INIT_POINTER(net->ipv4.fib_local, NULL); + RCU_INIT_POINTER(net->ipv4.fib_main, NULL); + RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif for (i = 0; i < FIB_TABLE_HASHSZ; i++) { - struct fib_table *tb; - struct hlist_head *head; + struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; + struct fib_table *tb; - head = &net->ipv4.fib_table_hash[i]; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); fib_table_flush(tb); fib_free_table(tb); } } + +#ifdef CONFIG_IP_MULTIPLE_TABLES + fib4_rules_exit(net); +#endif rtnl_unlock(); kfree(net->ipv4.fib_table_hash); } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 825981b1049a..c6211ed60b03 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -6,11 +6,13 @@ #include <net/ip_fib.h> struct fib_alias { - struct list_head fa_list; + struct hlist_node fa_list; struct fib_info *fa_info; u8 fa_tos; u8 fa_type; u8 fa_state; + u8 fa_slen; + u32 tb_id; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d3db718be51d..56151982f74e 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -153,7 +153,7 @@ static struct fib_table *fib_empty_table(struct net *net) u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_get_table(net, id) == NULL) + if (!fib_get_table(net, id)) return fib_new_table(net, id); return NULL; } @@ -174,12 +174,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (frh->tos & ~IPTOS_TOS_MASK) goto errout; + /* split local/main if they are not already split */ + err = fib_unmerge(net); + if (err) + goto errout; + if (rule->table == RT_TABLE_UNSPEC) { if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; table = fib_empty_table(net); - if (table == NULL) { + if (!table) { err = -ENOBUFS; goto errout; } @@ -189,10 +194,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } if (frh->src_len) - rule4->src = nla_get_be32(tb[FRA_SRC]); + rule4->src = nla_get_in_addr(tb[FRA_SRC]); if (frh->dst_len) - rule4->dst = nla_get_be32(tb[FRA_DST]); + rule4->dst = nla_get_in_addr(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW]) { @@ -209,21 +214,31 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); + err = 0; errout: return err; } -static void fib4_rule_delete(struct fib_rule *rule) +static int fib4_rule_delete(struct fib_rule *rule) { struct net *net = rule->fr_net; -#ifdef CONFIG_IP_ROUTE_CLASSID - struct fib4_rule *rule4 = (struct fib4_rule *) rule; + int err; - if (rule4->tclassid) + /* split local/main if they are not already split */ + err = fib_unmerge(net); + if (err) + goto errout; + +#ifdef CONFIG_IP_ROUTE_CLASSID + if (((struct fib4_rule *)rule)->tclassid) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); +errout: + return err; } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, @@ -245,10 +260,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; #endif - if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC]))) + if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC]))) return 0; - if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST]))) + if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST]))) return 0; return 1; @@ -264,9 +279,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule4->tos; if ((rule4->dst_len && - nla_put_be32(skb, FRA_DST, rule4->dst)) || + nla_put_in_addr(skb, FRA_DST, rule4->dst)) || (rule4->src_len && - nla_put_be32(skb, FRA_SRC, rule4->src))) + nla_put_in_addr(skb, FRA_SRC, rule4->src))) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rule4->tclassid && diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1e2090ea663e..8d695b6659c7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -213,7 +213,6 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - release_net(fi->fib_net); if (fi->fib_metrics != (u32 *) dst_default_metrics) kfree(fi->fib_metrics); kfree(fi); @@ -391,7 +390,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int err = -ENOBUFS; skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); - if (skb == NULL) + if (!skb) goto errout; err = fib_dump_info(skb, info->portid, seq, event, tb_id, @@ -469,7 +468,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; + nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; @@ -504,7 +503,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (cfg->fc_mp == NULL) + if (!cfg->fc_mp) return 0; rtnh = cfg->fc_mp; @@ -524,7 +523,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla && nla_get_be32(nla) != nh->nh_gw) + if (nla && nla_get_in_addr(nla) != nh->nh_gw) return 1; #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); @@ -647,7 +646,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, rcu_read_lock(); err = -ENODEV; in_dev = inetdev_by_index(net, nh->nh_oif); - if (in_dev == NULL) + if (!in_dev) goto out; err = -ENETDOWN; if (!(in_dev->dev->flags & IFF_UP)) @@ -804,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); - if (fi == NULL) + if (!fi) goto failure; fib_info_cnt++; if (cfg->fc_mx) { @@ -814,7 +813,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } else fi->fib_metrics = (u32 *) dst_default_metrics; - fi->fib_net = hold_net(net); + fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; @@ -922,7 +921,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) nh->nh_scope = RT_SCOPE_NOWHERE; nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; - if (nh->nh_dev == NULL) + if (!nh->nh_dev) goto failure; } else { change_nexthops(fi) { @@ -996,7 +995,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct rtmsg *rtm; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -1016,7 +1015,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len && - nla_put_be32(skb, RTA_DST, dst)) + nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) @@ -1025,11 +1024,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (fi->fib_prefsrc && - nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) + nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) @@ -1046,12 +1045,12 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); - if (mp == NULL) + if (!mp) goto nla_put_failure; for_nexthops(fi) { rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (rtnh == NULL) + if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; @@ -1059,7 +1058,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && @@ -1094,7 +1093,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) struct hlist_head *head = &fib_info_laddrhash[hash]; struct fib_info *fi; - if (fib_info_laddrhash == NULL || local == 0) + if (!fib_info_laddrhash || local == 0) return 0; hlist_for_each_entry(fi, head, fib_lhash) { @@ -1163,12 +1162,12 @@ int fib_sync_down_dev(struct net_device *dev, int force) void fib_select_default(struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; - struct list_head *fa_head = res->fa_head; + struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; int order = -1, last_idx = -1; struct fib_alias *fa; - list_for_each_entry_rcu(fa, fa_head, fa_list) { + hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; if (next_fi->fib_scope != res->scope || @@ -1183,7 +1182,7 @@ void fib_select_default(struct fib_result *res) fib_alias_accessed(fa); - if (fi == NULL) { + if (!fi) { if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, @@ -1196,7 +1195,7 @@ void fib_select_default(struct fib_result *res) order++; } - if (order <= 0 || fi == NULL) { + if (order <= 0 || !fi) { tb->tb_default = -1; goto out; } @@ -1252,7 +1251,7 @@ int fib_sync_up(struct net_device *dev) alive++; continue; } - if (nexthop_nh->nh_dev == NULL || + if (!nexthop_nh->nh_dev || !(nexthop_nh->nh_dev->flags & IFF_UP)) continue; if (nexthop_nh->nh_dev != dev || diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3daf0224ff2e..e13fcc602da2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -79,6 +79,7 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/switchdev.h> #include "fib_lookup.h" #define MAX_STAT_DEPTH 32 @@ -88,38 +89,35 @@ typedef unsigned int t_key; -#define IS_TNODE(n) ((n)->bits) -#define IS_LEAF(n) (!(n)->bits) +#define IS_TRIE(n) ((n)->pos >= KEYLENGTH) +#define IS_TNODE(n) ((n)->bits) +#define IS_LEAF(n) (!(n)->bits) -#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) - -struct tnode { +struct key_vector { t_key key; - unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char pos; /* 2log(KEYLENGTH) bits needed */ + unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char slen; - struct tnode __rcu *parent; - struct rcu_head rcu; union { - /* The fields in this struct are valid if bits > 0 (TNODE) */ - struct { - t_key empty_children; /* KEYLENGTH bits needed */ - t_key full_children; /* KEYLENGTH bits needed */ - struct tnode __rcu *child[0]; - }; - /* This list pointer if valid if bits == 0 (LEAF) */ - struct hlist_head list; + /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ + struct hlist_head leaf; + /* This array is valid if (pos | bits) > 0 (TNODE) */ + struct key_vector __rcu *tnode[0]; }; }; -struct leaf_info { - struct hlist_node hlist; - int plen; - u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ - struct list_head falh; +struct tnode { struct rcu_head rcu; + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ + struct key_vector __rcu *parent; + struct key_vector kv[1]; +#define tn_bits kv[0].bits }; +#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) +#define LEAF_SIZE TNODE_SIZE(1) + #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; @@ -142,13 +140,13 @@ struct trie_stat { }; struct trie { - struct tnode __rcu *trie; + struct key_vector kv[1]; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats; #endif }; -static void resize(struct trie *t, struct tnode *tn); +static struct key_vector *resize(struct trie *t, struct key_vector *tn); static size_t tnode_free_size; /* @@ -161,41 +159,46 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; +static inline struct tnode *tn_info(struct key_vector *kv) +{ + return container_of(kv, struct tnode, kv[0]); +} + /* caller must hold RTNL */ -#define node_parent(n) rtnl_dereference((n)->parent) +#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) +#define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) /* caller must hold RCU read lock or RTNL */ -#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) +#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) +#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) /* wrapper for rcu_assign_pointer */ -static inline void node_set_parent(struct tnode *n, struct tnode *tp) +static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) { if (n) - rcu_assign_pointer(n->parent, tp); + rcu_assign_pointer(tn_info(n)->parent, tp); } -#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) /* This provides us with the number of children in this node, in the case of a * leaf this will return 0 meaning none of the children are accessible. */ -static inline unsigned long tnode_child_length(const struct tnode *tn) +static inline unsigned long child_length(const struct key_vector *tn) { return (1ul << tn->bits) & ~(1ul); } -/* caller must hold RTNL */ -static inline struct tnode *tnode_get_child(const struct tnode *tn, - unsigned long i) -{ - return rtnl_dereference(tn->child[i]); -} +#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) -/* caller must hold RCU read lock or RTNL */ -static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, - unsigned long i) +static inline unsigned long get_index(t_key key, struct key_vector *kv) { - return rcu_dereference_rtnl(tn->child[i]); + unsigned long index = key ^ kv->key; + + if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) + return 0; + + return index >> kv->pos; } /* To understand this stuff, an understanding of keys and all their bits is @@ -274,106 +277,104 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) } #define TNODE_KMALLOC_MAX \ - ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) + ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) +#define TNODE_VMALLOC_MAX \ + ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) static void __node_free_rcu(struct rcu_head *head) { struct tnode *n = container_of(head, struct tnode, rcu); - if (IS_LEAF(n)) + if (!n->tn_bits) kmem_cache_free(trie_leaf_kmem, n); - else if (n->bits <= TNODE_KMALLOC_MAX) + else if (n->tn_bits <= TNODE_KMALLOC_MAX) kfree(n); else vfree(n); } -#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) +#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) -static inline void free_leaf_info(struct leaf_info *leaf) +static struct tnode *tnode_alloc(int bits) { - kfree_rcu(leaf, rcu); -} + size_t size; + + /* verify bits is within bounds */ + if (bits > TNODE_VMALLOC_MAX) + return NULL; + + /* determine size and verify it is non-zero and didn't overflow */ + size = TNODE_SIZE(1ul << bits); -static struct tnode *tnode_alloc(size_t size) -{ if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else return vzalloc(size); } -static inline void empty_child_inc(struct tnode *n) +static inline void empty_child_inc(struct key_vector *n) { - ++n->empty_children ? : ++n->full_children; + ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; } -static inline void empty_child_dec(struct tnode *n) +static inline void empty_child_dec(struct key_vector *n) { - n->empty_children-- ? : n->full_children--; + tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; } -static struct tnode *leaf_new(t_key key) +static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) { - struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); - if (l) { - l->parent = NULL; - /* set key and pos to reflect full key value - * any trailing zeros in the key should be ignored - * as the nodes are searched - */ - l->key = key; - l->slen = 0; - l->pos = 0; - /* set bits to 0 indicating we are not a tnode */ - l->bits = 0; + struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); + struct key_vector *l = kv->kv; - INIT_HLIST_HEAD(&l->list); - } - return l; -} + if (!kv) + return NULL; -static struct leaf_info *leaf_info_new(int plen) -{ - struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); - if (li) { - li->plen = plen; - li->mask_plen = ntohl(inet_make_mask(plen)); - INIT_LIST_HEAD(&li->falh); - } - return li; + /* initialize key vector */ + l->key = key; + l->pos = 0; + l->bits = 0; + l->slen = fa->fa_slen; + + /* link leaf to fib alias */ + INIT_HLIST_HEAD(&l->leaf); + hlist_add_head(&fa->fa_list, &l->leaf); + + return l; } -static struct tnode *tnode_new(t_key key, int pos, int bits) +static struct key_vector *tnode_new(t_key key, int pos, int bits) { - size_t sz = offsetof(struct tnode, child[1ul << bits]); - struct tnode *tn = tnode_alloc(sz); + struct tnode *tnode = tnode_alloc(bits); unsigned int shift = pos + bits; + struct key_vector *tn = tnode->kv; /* verify bits and pos their msb bits clear and values are valid */ BUG_ON(!bits || (shift > KEYLENGTH)); - if (tn) { - tn->parent = NULL; - tn->slen = pos; - tn->pos = pos; - tn->bits = bits; - tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; - if (bits == KEYLENGTH) - tn->full_children = 1; - else - tn->empty_children = 1ul << bits; - } + pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), + sizeof(struct key_vector *) << bits); + + if (!tnode) + return NULL; + + if (bits == KEYLENGTH) + tnode->full_children = 1; + else + tnode->empty_children = 1ul << bits; + + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; + tn->pos = pos; + tn->bits = bits; + tn->slen = pos; - pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct tnode *) << bits); return tn; } /* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(const struct tnode *tn, const struct tnode *n) +static inline int tnode_full(struct key_vector *tn, struct key_vector *n) { return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } @@ -381,17 +382,18 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n) /* Add a child at position i overwriting the old value. * Update the value of full_children and empty_children. */ -static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) +static void put_child(struct key_vector *tn, unsigned long i, + struct key_vector *n) { - struct tnode *chi = tnode_get_child(tn, i); + struct key_vector *chi = get_child(tn, i); int isfull, wasfull; - BUG_ON(i >= tnode_child_length(tn)); + BUG_ON(i >= child_length(tn)); /* update emptyChildren, overflow into fullChildren */ - if (n == NULL && chi != NULL) + if (!n && chi) empty_child_inc(tn); - if (n != NULL && chi == NULL) + if (n && !chi) empty_child_dec(tn); /* update fullChildren */ @@ -399,23 +401,23 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) isfull = tnode_full(tn, n); if (wasfull && !isfull) - tn->full_children--; + tn_info(tn)->full_children--; else if (!wasfull && isfull) - tn->full_children++; + tn_info(tn)->full_children++; if (n && (tn->slen < n->slen)) tn->slen = n->slen; - rcu_assign_pointer(tn->child[i], n); + rcu_assign_pointer(tn->tnode[i], n); } -static void update_children(struct tnode *tn) +static void update_children(struct key_vector *tn) { unsigned long i; /* update all of the child parent pointers */ - for (i = tnode_child_length(tn); i;) { - struct tnode *inode = tnode_get_child(tn, --i); + for (i = child_length(tn); i;) { + struct key_vector *inode = get_child(tn, --i); if (!inode) continue; @@ -431,36 +433,37 @@ static void update_children(struct tnode *tn) } } -static inline void put_child_root(struct tnode *tp, struct trie *t, - t_key key, struct tnode *n) +static inline void put_child_root(struct key_vector *tp, t_key key, + struct key_vector *n) { - if (tp) - put_child(tp, get_index(key, tp), n); + if (IS_TRIE(tp)) + rcu_assign_pointer(tp->tnode[0], n); else - rcu_assign_pointer(t->trie, n); + put_child(tp, get_index(key, tp), n); } -static inline void tnode_free_init(struct tnode *tn) +static inline void tnode_free_init(struct key_vector *tn) { - tn->rcu.next = NULL; + tn_info(tn)->rcu.next = NULL; } -static inline void tnode_free_append(struct tnode *tn, struct tnode *n) +static inline void tnode_free_append(struct key_vector *tn, + struct key_vector *n) { - n->rcu.next = tn->rcu.next; - tn->rcu.next = &n->rcu; + tn_info(n)->rcu.next = tn_info(tn)->rcu.next; + tn_info(tn)->rcu.next = &tn_info(n)->rcu; } -static void tnode_free(struct tnode *tn) +static void tnode_free(struct key_vector *tn) { - struct callback_head *head = &tn->rcu; + struct callback_head *head = &tn_info(tn)->rcu; while (head) { head = head->next; - tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); + tnode_free_size += TNODE_SIZE(1ul << tn->bits); node_free(tn); - tn = container_of(head, struct tnode, rcu); + tn = container_of(head, struct tnode, rcu)->kv; } if (tnode_free_size >= PAGE_SIZE * sync_pages) { @@ -469,14 +472,16 @@ static void tnode_free(struct tnode *tn) } } -static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +static struct key_vector *replace(struct trie *t, + struct key_vector *oldtnode, + struct key_vector *tn) { - struct tnode *tp = node_parent(oldtnode); + struct key_vector *tp = node_parent(oldtnode); unsigned long i; /* setup the parent pointer out of and back into this node */ NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); + put_child_root(tp, tn->key, tn); /* update all of the child parent pointers */ update_children(tn); @@ -485,18 +490,21 @@ static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) tnode_free(oldtnode); /* resize children now that oldtnode is freed */ - for (i = tnode_child_length(tn); i;) { - struct tnode *inode = tnode_get_child(tn, --i); + for (i = child_length(tn); i;) { + struct key_vector *inode = get_child(tn, --i); /* resize child node */ if (tnode_full(tn, inode)) - resize(t, inode); + tn = resize(t, inode); } + + return tp; } -static int inflate(struct trie *t, struct tnode *oldtnode) +static struct key_vector *inflate(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *tn; + struct key_vector *tn; unsigned long i; t_key m; @@ -504,7 +512,7 @@ static int inflate(struct trie *t, struct tnode *oldtnode) tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) - return -ENOMEM; + goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); @@ -514,13 +522,13 @@ static int inflate(struct trie *t, struct tnode *oldtnode) * point to existing tnodes and the links between our allocated * nodes. */ - for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { - struct tnode *inode = tnode_get_child(oldtnode, --i); - struct tnode *node0, *node1; + for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { + struct key_vector *inode = get_child(oldtnode, --i); + struct key_vector *node0, *node1; unsigned long j, k; /* An empty child */ - if (inode == NULL) + if (!inode) continue; /* A leaf or an internal node with skipped bits */ @@ -534,8 +542,8 @@ static int inflate(struct trie *t, struct tnode *oldtnode) /* An internal node with two children */ if (inode->bits == 1) { - put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); - put_child(tn, 2 * i, tnode_get_child(inode, 0)); + put_child(tn, 2 * i + 1, get_child(inode, 1)); + put_child(tn, 2 * i, get_child(inode, 0)); continue; } @@ -564,11 +572,11 @@ static int inflate(struct trie *t, struct tnode *oldtnode) tnode_free_append(tn, node0); /* populate child pointers in new nodes */ - for (k = tnode_child_length(inode), j = k / 2; j;) { - put_child(node1, --j, tnode_get_child(inode, --k)); - put_child(node0, j, tnode_get_child(inode, j)); - put_child(node1, --j, tnode_get_child(inode, --k)); - put_child(node0, j, tnode_get_child(inode, j)); + for (k = child_length(inode), j = k / 2; j;) { + put_child(node1, --j, get_child(inode, --k)); + put_child(node0, j, get_child(inode, j)); + put_child(node1, --j, get_child(inode, --k)); + put_child(node0, j, get_child(inode, j)); } /* link new nodes to parent */ @@ -581,25 +589,25 @@ static int inflate(struct trie *t, struct tnode *oldtnode) } /* setup the parent pointers into and out of this node */ - replace(t, oldtnode, tn); - - return 0; + return replace(t, oldtnode, tn); nomem: /* all pointers should be clean so we are done */ tnode_free(tn); - return -ENOMEM; +notnode: + return NULL; } -static int halve(struct trie *t, struct tnode *oldtnode) +static struct key_vector *halve(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *tn; + struct key_vector *tn; unsigned long i; pr_debug("In halve\n"); tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); if (!tn) - return -ENOMEM; + goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); @@ -609,10 +617,10 @@ static int halve(struct trie *t, struct tnode *oldtnode) * point to existing tnodes and the links between our allocated * nodes. */ - for (i = tnode_child_length(oldtnode); i;) { - struct tnode *node1 = tnode_get_child(oldtnode, --i); - struct tnode *node0 = tnode_get_child(oldtnode, --i); - struct tnode *inode; + for (i = child_length(oldtnode); i;) { + struct key_vector *node1 = get_child(oldtnode, --i); + struct key_vector *node0 = get_child(oldtnode, --i); + struct key_vector *inode; /* At least one of the children is empty */ if (!node1 || !node0) { @@ -622,10 +630,8 @@ static int halve(struct trie *t, struct tnode *oldtnode) /* Two nonempty children */ inode = tnode_new(node0->key, oldtnode->pos, 1); - if (!inode) { - tnode_free(tn); - return -ENOMEM; - } + if (!inode) + goto nomem; tnode_free_append(tn, inode); /* initialize pointers out of node */ @@ -638,30 +644,36 @@ static int halve(struct trie *t, struct tnode *oldtnode) } /* setup the parent pointers into and out of this node */ - replace(t, oldtnode, tn); - - return 0; + return replace(t, oldtnode, tn); +nomem: + /* all pointers should be clean so we are done */ + tnode_free(tn); +notnode: + return NULL; } -static void collapse(struct trie *t, struct tnode *oldtnode) +static struct key_vector *collapse(struct trie *t, + struct key_vector *oldtnode) { - struct tnode *n, *tp; + struct key_vector *n, *tp; unsigned long i; /* scan the tnode looking for that one child that might still exist */ - for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) - n = tnode_get_child(oldtnode, --i); + for (n = NULL, i = child_length(oldtnode); !n && i;) + n = get_child(oldtnode, --i); /* compress one level */ tp = node_parent(oldtnode); - put_child_root(tp, t, oldtnode->key, n); + put_child_root(tp, oldtnode->key, n); node_set_parent(n, tp); /* drop dead node */ node_free(oldtnode); + + return tp; } -static unsigned char update_suffix(struct tnode *tn) +static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; @@ -671,8 +683,8 @@ static unsigned char update_suffix(struct tnode *tn) * why we start with a stride of 2 since a stride of 1 would * represent the nodes with suffix length equal to tn->pos */ - for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { - struct tnode *n = tnode_get_child(tn, i); + for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { + struct key_vector *n = get_child(tn, i); if (!n || (n->slen <= slen)) continue; @@ -704,12 +716,12 @@ static unsigned char update_suffix(struct tnode *tn) * * 'high' in this instance is the variable 'inflate_threshold'. It * is expressed as a percentage, so we multiply it with - * tnode_child_length() and instead of multiplying by 2 (since the + * child_length() and instead of multiplying by 2 (since the * child array will be doubled by inflate()) and multiplying * the left-hand side by 100 (to handle the percentage thing) we * multiply the left-hand side by 50. * - * The left-hand side may look a bit weird: tnode_child_length(tn) + * The left-hand side may look a bit weird: child_length(tn) * - tn->empty_children is of course the number of non-null children * in the current node. tn->full_children is the number of "full" * children, that is non-null tnodes with a skip value of 0. @@ -719,10 +731,10 @@ static unsigned char update_suffix(struct tnode *tn) * A clearer way to write this would be: * * to_be_doubled = tn->full_children; - * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - + * not_to_be_doubled = child_length(tn) - tn->empty_children - * tn->full_children; * - * new_child_length = tnode_child_length(tn) * 2; + * new_child_length = child_length(tn) * 2; * * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / * new_child_length; @@ -739,57 +751,57 @@ static unsigned char update_suffix(struct tnode *tn) * inflate_threshold * new_child_length * * expand not_to_be_doubled and to_be_doubled, and shorten: - * 100 * (tnode_child_length(tn) - tn->empty_children + + * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= inflate_threshold * new_child_length * * expand new_child_length: - * 100 * (tnode_child_length(tn) - tn->empty_children + + * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= - * inflate_threshold * tnode_child_length(tn) * 2 + * inflate_threshold * child_length(tn) * 2 * * shorten again: - * 50 * (tn->full_children + tnode_child_length(tn) - + * 50 * (tn->full_children + child_length(tn) - * tn->empty_children) >= inflate_threshold * - * tnode_child_length(tn) + * child_length(tn) * */ -static bool should_inflate(const struct tnode *tp, const struct tnode *tn) +static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ - threshold *= tp ? inflate_threshold : inflate_threshold_root; - used -= tn->empty_children; - used += tn->full_children; + threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; + used -= tn_info(tn)->empty_children; + used += tn_info(tn)->full_children; /* if bits == KEYLENGTH then pos = 0, and will fail below */ return (used > 1) && tn->pos && ((50 * used) >= threshold); } -static bool should_halve(const struct tnode *tp, const struct tnode *tn) +static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ - threshold *= tp ? halve_threshold : halve_threshold_root; - used -= tn->empty_children; + threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; + used -= tn_info(tn)->empty_children; /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); } -static bool should_collapse(const struct tnode *tn) +static inline bool should_collapse(struct key_vector *tn) { - unsigned long used = tnode_child_length(tn); + unsigned long used = child_length(tn); - used -= tn->empty_children; + used -= tn_info(tn)->empty_children; /* account for bits == KEYLENGTH case */ - if ((tn->bits == KEYLENGTH) && tn->full_children) + if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) used -= KEY_MAX; /* One child or none, time to drop us from the trie */ @@ -797,10 +809,13 @@ static bool should_collapse(const struct tnode *tn) } #define MAX_WORK 10 -static void resize(struct trie *t, struct tnode *tn) +static struct key_vector *resize(struct trie *t, struct key_vector *tn) { - struct tnode *tp = node_parent(tn); - struct tnode __rcu **cptr; +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie_use_stats __percpu *stats = t->stats; +#endif + struct key_vector *tp = node_parent(tn); + unsigned long cindex = get_index(tn->key, tp); int max_work = MAX_WORK; pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", @@ -810,183 +825,128 @@ static void resize(struct trie *t, struct tnode *tn) * doing it ourselves. This way we can let RCU fully do its * thing without us interfering */ - cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; - BUG_ON(tn != rtnl_dereference(*cptr)); + BUG_ON(tn != get_child(tp, cindex)); /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ while (should_inflate(tp, tn) && max_work) { - if (inflate(t, tn)) { + tp = inflate(t, tn); + if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(t->stats->resize_node_skipped); + this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; - tn = rtnl_dereference(*cptr); + tn = get_child(tp, cindex); } + /* update parent in case inflate failed */ + tp = node_parent(tn); + /* Return if at least one inflate is run */ if (max_work != MAX_WORK) - return; + return tp; /* Halve as long as the number of empty children in this * node is above threshold. */ while (should_halve(tp, tn) && max_work) { - if (halve(t, tn)) { + tp = halve(t, tn); + if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(t->stats->resize_node_skipped); + this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; - tn = rtnl_dereference(*cptr); + tn = get_child(tp, cindex); } /* Only one child remains */ - if (should_collapse(tn)) { - collapse(t, tn); - return; - } + if (should_collapse(tn)) + return collapse(t, tn); + + /* update parent in case halve failed */ + tp = node_parent(tn); /* Return if at least one deflate was run */ if (max_work != MAX_WORK) - return; + return tp; /* push the suffix length to the parent node */ if (tn->slen > tn->pos) { unsigned char slen = update_suffix(tn); - if (tp && (slen > tp->slen)) + if (slen > tp->slen) tp->slen = slen; } -} - -/* readside must use rcu_read_lock currently dump routines - via get_fa_head and dump */ - -static struct leaf_info *find_leaf_info(struct tnode *l, int plen) -{ - struct hlist_head *head = &l->list; - struct leaf_info *li; - - hlist_for_each_entry_rcu(li, head, hlist) - if (li->plen == plen) - return li; - - return NULL; -} - -static inline struct list_head *get_fa_head(struct tnode *l, int plen) -{ - struct leaf_info *li = find_leaf_info(l, plen); - - if (!li) - return NULL; - return &li->falh; + return tp; } -static void leaf_pull_suffix(struct tnode *l) +static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) { - struct tnode *tp = node_parent(l); - - while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { + while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { if (update_suffix(tp) > l->slen) break; tp = node_parent(tp); } } -static void leaf_push_suffix(struct tnode *l) +static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) { - struct tnode *tn = node_parent(l); - /* if this is a new leaf then tn will be NULL and we can sort * out parent suffix lengths as a part of trie_rebalance */ - while (tn && (tn->slen < l->slen)) { + while (tn->slen < l->slen) { tn->slen = l->slen; tn = node_parent(tn); } } -static void remove_leaf_info(struct tnode *l, struct leaf_info *old) -{ - /* record the location of the previous list_info entry */ - struct hlist_node **pprev = old->hlist.pprev; - struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); - - /* remove the leaf info from the list */ - hlist_del_rcu(&old->hlist); - - /* only access li if it is pointing at the last valid hlist_node */ - if (hlist_empty(&l->list) || (*pprev)) - return; - - /* update the trie with the latest suffix length */ - l->slen = KEYLENGTH - li->plen; - leaf_pull_suffix(l); -} - -static void insert_leaf_info(struct tnode *l, struct leaf_info *new) +/* rcu_read_lock needs to be hold by caller from readside */ +static struct key_vector *fib_find_node(struct trie *t, + struct key_vector **tp, u32 key) { - struct hlist_head *head = &l->list; - struct leaf_info *li = NULL, *last = NULL; + struct key_vector *pn, *n = t->kv; + unsigned long index = 0; - if (hlist_empty(head)) { - hlist_add_head_rcu(&new->hlist, head); - } else { - hlist_for_each_entry(li, head, hlist) { - if (new->plen > li->plen) - break; - - last = li; - } - if (last) - hlist_add_behind_rcu(&new->hlist, &last->hlist); - else - hlist_add_before_rcu(&new->hlist, &li->hlist); - } - - /* if we added to the tail node then we need to update slen */ - if (l->slen < (KEYLENGTH - new->plen)) { - l->slen = KEYLENGTH - new->plen; - leaf_push_suffix(l); - } -} + do { + pn = n; + n = get_child_rcu(n, index); -/* rcu_read_lock needs to be hold by caller from readside */ -static struct tnode *fib_find_node(struct trie *t, u32 key) -{ - struct tnode *n = rcu_dereference_rtnl(t->trie); + if (!n) + break; - while (n) { - unsigned long index = get_index(key, n); + index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the bits in the cindex. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if (index & (~0ul << bits)) + * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex + * + * This check is safe even if bits == KEYLENGTH due to the + * fact that we can only allocate a node with 32 bits if a + * long is greater than 32 bits. */ - if (index & (~0ul << n->bits)) - return NULL; - - /* we have found a leaf. Prefixes have already been compared */ - if (IS_LEAF(n)) + if (index >= (1ul << n->bits)) { + n = NULL; break; + } - n = tnode_get_child_rcu(n, index); - } + /* keep searching until we find a perfect match leaf or NULL */ + } while (IS_TNODE(n)); + + *tp = pn; return n; } @@ -994,14 +954,23 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) /* Return the first fib alias matching TOS with * priority less than or equal to PRIO. */ -static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, + u8 tos, u32 prio, u32 tb_id) { struct fib_alias *fa; if (!fah) return NULL; - list_for_each_entry(fa, fah, fa_list) { + hlist_for_each_entry(fa, fah, fa_list) { + if (fa->fa_slen < slen) + continue; + if (fa->fa_slen != slen) + break; + if (fa->tb_id > tb_id) + continue; + if (fa->tb_id != tb_id) + break; if (fa->fa_tos > tos) continue; if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) @@ -1011,77 +980,23 @@ static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) return NULL; } -static void trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct key_vector *tn) { - struct tnode *tp; - - while ((tp = node_parent(tn)) != NULL) { - resize(t, tn); - tn = tp; - } - - /* Handle last (top) tnode */ - if (IS_TNODE(tn)) - resize(t, tn); + while (!IS_TRIE(tn)) + tn = resize(t, tn); } -/* only used from updater-side */ - -static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) +static int fib_insert_node(struct trie *t, struct key_vector *tp, + struct fib_alias *new, t_key key) { - struct list_head *fa_head = NULL; - struct tnode *l, *n, *tp = NULL; - struct leaf_info *li; - - li = leaf_info_new(plen); - if (!li) - return NULL; - fa_head = &li->falh; + struct key_vector *n, *l; - n = rtnl_dereference(t->trie); - - /* If we point to NULL, stop. Either the tree is empty and we should - * just put a new leaf in if, or we have reached an empty child slot, - * and we should just put our new leaf in that. - * - * If we hit a node with a key that does't match then we should stop - * and create a new tnode to replace that node and insert ourselves - * and the other node into the new tnode. - */ - while (n) { - unsigned long index = get_index(key, n); - - /* This bit of code is a bit tricky but it combines multiple - * checks into a single check. The prefix consists of the - * prefix plus zeros for the "bits" in the prefix. The index - * is the difference between the key and this value. From - * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is child index - * else - * we have a mismatch in skip bits and failed - */ - if (index >> n->bits) - break; - - /* we have found a leaf. Prefixes have already been compared */ - if (IS_LEAF(n)) { - /* Case 1: n is a leaf, and prefixes match*/ - insert_leaf_info(n, li); - return fa_head; - } - - tp = n; - n = tnode_get_child_rcu(n, index); - } - - l = leaf_new(key); - if (!l) { - free_leaf_info(li); - return NULL; - } + l = leaf_new(key, new); + if (!l) + goto noleaf; - insert_leaf_info(l, li); + /* retrieve child from parent node */ + n = get_child(tp, get_index(key, tp)); /* Case 2: n is a LEAF or a TNODE and the key doesn't match. * @@ -1090,21 +1005,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) * leaves us in position for handling as case 3 */ if (n) { - struct tnode *tn; + struct key_vector *tn; tn = tnode_new(key, __fls(key ^ n->key), 1); - if (!tn) { - free_leaf_info(li); - node_free(l); - return NULL; - } + if (!tn) + goto notnode; /* initialize routes out of node */ NODE_INIT_PARENT(tn, tp); put_child(tn, get_index(key, tn) ^ 1, n); /* start adding routes into the node */ - put_child_root(tp, t, key, tn); + put_child_root(tp, key, tn); node_set_parent(n, tn); /* parent now has a NULL spot where the leaf can go */ @@ -1112,69 +1024,93 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) } /* Case 3: n is NULL, and will just insert a new leaf */ - if (tp) { - NODE_INIT_PARENT(l, tp); - put_child(tp, get_index(key, tp), l); - trie_rebalance(t, tp); + NODE_INIT_PARENT(l, tp); + put_child_root(tp, key, l); + trie_rebalance(t, tp); + + return 0; +notnode: + node_free(l); +noleaf: + return -ENOMEM; +} + +static int fib_insert_alias(struct trie *t, struct key_vector *tp, + struct key_vector *l, struct fib_alias *new, + struct fib_alias *fa, t_key key) +{ + if (!l) + return fib_insert_node(t, tp, new, key); + + if (fa) { + hlist_add_before_rcu(&new->fa_list, &fa->fa_list); } else { - rcu_assign_pointer(t->trie, l); + struct fib_alias *last; + + hlist_for_each_entry(last, &l->leaf, fa_list) { + if (new->fa_slen < last->fa_slen) + break; + if ((new->fa_slen == last->fa_slen) && + (new->tb_id > last->tb_id)) + break; + fa = last; + } + + if (fa) + hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); + else + hlist_add_head_rcu(&new->fa_list, &l->leaf); } - return fa_head; + /* if we added to the tail node then we need to update slen */ + if (l->slen < new->fa_slen) { + l->slen = new->fa_slen; + leaf_push_suffix(tp, l); + } + + return 0; } -/* - * Caller must hold RTNL. - */ +/* Caller must hold RTNL. */ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { - struct trie *t = (struct trie *) tb->tb_data; + struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; - struct list_head *fa_head = NULL; + struct key_vector *l, *tp; struct fib_info *fi; - int plen = cfg->fc_dst_len; + u8 plen = cfg->fc_dst_len; + u8 slen = KEYLENGTH - plen; u8 tos = cfg->fc_tos; - u32 key, mask; + u32 key; int err; - struct tnode *l; - if (plen > 32) + if (plen > KEYLENGTH) return -EINVAL; key = ntohl(cfg->fc_dst); pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); - mask = ntohl(inet_make_mask(plen)); - - if (key & ~mask) + if ((plen < KEYLENGTH) && (key << plen)) return -EINVAL; - key = key & mask; - fi = fib_create_info(cfg); if (IS_ERR(fi)) { err = PTR_ERR(fi); goto err; } - l = fib_find_node(t, key); - fa = NULL; - - if (l) { - fa_head = get_fa_head(l, plen); - fa = fib_find_alias(fa_head, tos, fi->fib_priority); - } + l = fib_find_node(t, &tp, key); + fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, + tb->tb_id) : NULL; /* Now fa, if non-NULL, points to the first fib alias * with the same keys [prefix,tos,priority], if such key already * exists or to the node before which we will insert new one. * * If fa is NULL, we will need to allocate a new one and - * insert to the head of f. - * - * If f is NULL, no fib node matched the destination key - * and we need to allocate a new one of those as well. + * insert to the tail of the section matching the suffix length + * of the new alias. */ if (fa && fa->fa_tos == tos && @@ -1192,9 +1128,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) */ fa_match = NULL; fa_first = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, fa_head, fa_list) { - if (fa->fa_tos != tos) + hlist_for_each_entry_from(fa, fa_list) { + if ((fa->fa_slen != slen) || + (fa->tb_id != tb->tb_id) || + (fa->fa_tos != tos)) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; @@ -1217,7 +1154,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) } err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - if (new_fa == NULL) + if (!new_fa) goto out; fi_drop = fa->fa_info; @@ -1226,8 +1163,21 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_type = cfg->fc_type; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; + new_fa->fa_slen = fa->fa_slen; + + err = netdev_switch_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + kmem_cache_free(fn_alias_kmem, new_fa); + goto out; + } + + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); - list_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); fib_release_info(fi_drop); @@ -1254,37 +1204,42 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - if (new_fa == NULL) + if (!new_fa) goto out; new_fa->fa_info = fi; new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; new_fa->fa_state = 0; - /* - * Insert new entry to the list. - */ - - if (!fa_head) { - fa_head = fib_insert_node(t, key, plen); - if (unlikely(!fa_head)) { - err = -ENOMEM; - goto out_free_new_fa; - } + new_fa->fa_slen = slen; + new_fa->tb_id = tb->tb_id; + + /* (Optionally) offload fib entry to switch hardware. */ + err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + goto out_free_new_fa; } + /* Insert new entry to the list. */ + err = fib_insert_alias(t, tp, l, new_fa, fa, key); + if (err) + goto out_sw_fib_del; + if (!plen) tb->tb_num_default++; - list_add_tail_rcu(&new_fa->fa_list, - (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, 0); succeeded: return 0; +out_sw_fib_del: + netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1293,7 +1248,7 @@ err: return err; } -static inline t_key prefix_mismatch(t_key key, struct tnode *n) +static inline t_key prefix_mismatch(t_key key, struct key_vector *n) { t_key prefix = n->key; @@ -1304,16 +1259,20 @@ static inline t_key prefix_mismatch(t_key key, struct tnode *n) int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { - struct trie *t = (struct trie *)tb->tb_data; + struct trie *t = (struct trie *) tb->tb_data; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats = t->stats; #endif const t_key key = ntohl(flp->daddr); - struct tnode *n, *pn; - struct leaf_info *li; + struct key_vector *n, *pn; + struct fib_alias *fa; + unsigned long index; t_key cindex; - n = rcu_dereference(t->trie); + pn = t->kv; + cindex = 0; + + n = get_child_rcu(pn, cindex); if (!n) return -EAGAIN; @@ -1321,24 +1280,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, this_cpu_inc(stats->gets); #endif - pn = n; - cindex = 0; - /* Step 1: Travel to the longest prefix match in the trie */ for (;;) { - unsigned long index = get_index(key, n); + index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the "bits" in the prefix. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if (index & (~0ul << bits)) + * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex + * + * This check is safe even if bits == KEYLENGTH due to the + * fact that we can only allocate a node with 32 bits if a + * long is greater than 32 bits. */ - if (index & (~0ul << n->bits)) + if (index >= (1ul << n->bits)) break; /* we have found a leaf. Prefixes have already been compared */ @@ -1353,7 +1313,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, cindex = index; } - n = tnode_get_child_rcu(n, index); + n = get_child_rcu(n, index); if (unlikely(!n)) goto backtrace; } @@ -1361,7 +1321,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, /* Step 2: Sort out leaves and begin backtracing for longest prefix */ for (;;) { /* record the pointer where our next node pointer is stored */ - struct tnode __rcu **cptr = n->child; + struct key_vector __rcu **cptr = n->tnode; /* This test verifies that none of the bits that differ * between the key and the prefix exist in the region of @@ -1393,13 +1353,17 @@ backtrace: while (!cindex) { t_key pkey = pn->key; - pn = node_parent_rcu(pn); - if (unlikely(!pn)) + /* If we don't have a parent then there is + * nothing for us to do as we do not have any + * further nodes to parse. + */ + if (IS_TRIE(pn)) return -EAGAIN; #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->backtrack); #endif /* Get Child's index */ + pn = node_parent_rcu(pn); cindex = get_index(pkey, pn); } @@ -1407,138 +1371,134 @@ backtrace: cindex &= cindex - 1; /* grab pointer for next child node */ - cptr = &pn->child[cindex]; + cptr = &pn->tnode[cindex]; } } found: + /* this line carries forward the xor from earlier in the function */ + index = key ^ n->key; + /* Step 3: Process the leaf, if that fails fall back to backtracing */ - hlist_for_each_entry_rcu(li, &n->list, hlist) { - struct fib_alias *fa; + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + int nhsel, err; - if ((key ^ n->key) & li->mask_plen) + if ((index >= (1ul << fa->fa_slen)) && + ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH))) continue; - - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - struct fib_info *fi = fa->fa_info; - int nhsel, err; - - if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) - continue; - if (fi->fib_dead) - continue; - if (fa->fa_info->fib_scope < flp->flowi4_scope) - continue; - fib_alias_accessed(fa); - err = fib_props[fa->fa_type].error; - if (unlikely(err < 0)) { + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fi->fib_dead) + continue; + if (fa->fa_info->fib_scope < flp->flowi4_scope) + continue; + fib_alias_accessed(fa); + err = fib_props[fa->fa_type].error; + if (unlikely(err < 0)) { #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_passed); + this_cpu_inc(stats->semantic_match_passed); #endif - return err; - } - if (fi->fib_flags & RTNH_F_DEAD) + return err; + } + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (nh->nh_flags & RTNH_F_DEAD) continue; - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) - continue; - - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); - - res->prefixlen = li->plen; - res->nh_sel = nhsel; - res->type = fa->fa_type; - res->scope = fi->fib_scope; - res->fi = fi; - res->table = tb; - res->fa_head = &li->falh; + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) + continue; + + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + + res->prefixlen = KEYLENGTH - fa->fa_slen; + res->nh_sel = nhsel; + res->type = fa->fa_type; + res->scope = fi->fib_scope; + res->fi = fi; + res->table = tb; + res->fa_head = &n->leaf; #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_passed); + this_cpu_inc(stats->semantic_match_passed); #endif - return err; - } + return err; } - + } #ifdef CONFIG_IP_FIB_TRIE_STATS - this_cpu_inc(stats->semantic_match_miss); + this_cpu_inc(stats->semantic_match_miss); #endif - } goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); -/* - * Remove the leaf and return parent. - */ -static void trie_leaf_remove(struct trie *t, struct tnode *l) +static void fib_remove_alias(struct trie *t, struct key_vector *tp, + struct key_vector *l, struct fib_alias *old) { - struct tnode *tp = node_parent(l); + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->fa_list.pprev; + struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); - pr_debug("entering trie_leaf_remove(%p)\n", l); + /* remove the fib_alias from the list */ + hlist_del_rcu(&old->fa_list); - if (tp) { - put_child(tp, get_index(l->key, tp), NULL); + /* if we emptied the list this leaf will be freed and we can sort + * out parent suffix lengths as a part of trie_rebalance + */ + if (hlist_empty(&l->leaf)) { + put_child_root(tp, l->key, NULL); + node_free(l); trie_rebalance(t, tp); - } else { - RCU_INIT_POINTER(t->trie, NULL); + return; } - node_free(l); + /* only access fa if it is pointing at the last valid hlist_node */ + if (*pprev) + return; + + /* update the trie with the latest suffix length */ + l->slen = fa->fa_slen; + leaf_pull_suffix(tp, l); } -/* - * Caller must hold RTNL. - */ +/* Caller must hold RTNL. */ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; - u32 key, mask; - int plen = cfg->fc_dst_len; - u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; - struct list_head *fa_head; - struct tnode *l; - struct leaf_info *li; + struct key_vector *l, *tp; + u8 plen = cfg->fc_dst_len; + u8 slen = KEYLENGTH - plen; + u8 tos = cfg->fc_tos; + u32 key; - if (plen > 32) + if (plen > KEYLENGTH) return -EINVAL; key = ntohl(cfg->fc_dst); - mask = ntohl(inet_make_mask(plen)); - if (key & ~mask) + if ((plen < KEYLENGTH) && (key << plen)) return -EINVAL; - key = key & mask; - l = fib_find_node(t, key); - + l = fib_find_node(t, &tp, key); if (!l) return -ESRCH; - li = find_leaf_info(l, plen); - - if (!li) - return -ESRCH; - - fa_head = &li->falh; - fa = fib_find_alias(fa_head, tos, 0); - + fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); if (!fa) return -ESRCH; pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, fa_head, fa_list) { + hlist_for_each_entry_from(fa, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if ((fa->fa_slen != slen) || + (fa->tb_id != tb->tb_id) || + (fa->fa_tos != tos)) break; if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && @@ -1557,240 +1517,397 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, - &cfg->fc_nlinfo, 0); + netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); - list_del_rcu(&fa->fa_list); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, + &cfg->fc_nlinfo, 0); if (!plen) tb->tb_num_default--; - if (list_empty(fa_head)) { - remove_leaf_info(l, li); - free_leaf_info(li); - } + fib_remove_alias(t, tp, l, fa_to_delete); - if (hlist_empty(&l->list)) - trie_leaf_remove(t, l); - - if (fa->fa_state & FA_S_ACCESSED) + if (fa_to_delete->fa_state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); - fib_release_info(fa->fa_info); - alias_free_mem_rcu(fa); + fib_release_info(fa_to_delete->fa_info); + alias_free_mem_rcu(fa_to_delete); return 0; } -static int trie_flush_list(struct list_head *head) +/* Scan for the next leaf starting at the provided key value */ +static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) { - struct fib_alias *fa, *fa_node; - int found = 0; + struct key_vector *pn, *n = *tn; + unsigned long cindex; - list_for_each_entry_safe(fa, fa_node, head, fa_list) { - struct fib_info *fi = fa->fa_info; + /* this loop is meant to try and find the key in the trie */ + do { + /* record parent and next child index */ + pn = n; + cindex = key ? get_index(key, pn) : 0; - if (fi && (fi->fib_flags & RTNH_F_DEAD)) { - list_del_rcu(&fa->fa_list); - fib_release_info(fa->fa_info); - alias_free_mem_rcu(fa); - found++; + if (cindex >> pn->bits) + break; + + /* descend into the next child */ + n = get_child_rcu(pn, cindex++); + if (!n) + break; + + /* guarantee forward progress on the keys */ + if (IS_LEAF(n) && (n->key >= key)) + goto found; + } while (IS_TNODE(n)); + + /* this loop will search for the next leaf with a greater key */ + while (!IS_TRIE(pn)) { + /* if we exhausted the parent node we will need to climb */ + if (cindex >= (1ul << pn->bits)) { + t_key pkey = pn->key; + + pn = node_parent_rcu(pn); + cindex = get_index(pkey, pn) + 1; + continue; } + + /* grab the next available node */ + n = get_child_rcu(pn, cindex++); + if (!n) + continue; + + /* no need to compare keys since we bumped the index */ + if (IS_LEAF(n)) + goto found; + + /* Rescan start scanning in new node */ + pn = n; + cindex = 0; } - return found; + + *tn = pn; + return NULL; /* Root of trie */ +found: + /* if we are at the limit for keys just return NULL for the tnode */ + *tn = pn; + return n; } -static int trie_flush_leaf(struct tnode *l) +static void fib_trie_free(struct fib_table *tb) { - int found = 0; - struct hlist_head *lih = &l->list; + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; struct hlist_node *tmp; - struct leaf_info *li = NULL; - unsigned char plen = KEYLENGTH; + struct fib_alias *fa; + + /* walk trie in reverse order and free everything */ + for (;;) { + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + if (IS_TRIE(pn)) + break; + + n = pn; + pn = node_parent(pn); - hlist_for_each_entry_safe(li, tmp, lih, hlist) { - found += trie_flush_list(&li->falh); + /* drop emptied tnode */ + put_child_root(pn, n->key, NULL); + node_free(n); + + cindex = get_index(pkey, pn); - if (list_empty(&li->falh)) { - hlist_del_rcu(&li->hlist); - free_leaf_info(li); continue; } - plen = li->plen; - } + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; - l->slen = KEYLENGTH - plen; + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; - return found; + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + } + + put_child_root(pn, n->key, NULL); + node_free(n); + } + +#ifdef CONFIG_IP_FIB_TRIE_STATS + free_percpu(t->stats); +#endif + kfree(tb); } -/* - * Scan for the next right leaf starting at node p->child[idx] - * Since we have back pointer, no recursion necessary. - */ -static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) +struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) { - do { - unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; + struct trie *ot = (struct trie *)oldtb->tb_data; + struct key_vector *l, *tp = ot->kv; + struct fib_table *local_tb; + struct fib_alias *fa; + struct trie *lt; + t_key key = 0; - while (idx < tnode_child_length(p)) { - c = tnode_get_child_rcu(p, idx++); - if (!c) + if (oldtb->tb_data == oldtb->__data) + return oldtb; + + local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); + if (!local_tb) + return NULL; + + lt = (struct trie *)local_tb->tb_data; + + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + struct key_vector *local_l = NULL, *local_tp; + + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + struct fib_alias *new_fa; + + if (local_tb->tb_id != fa->tb_id) continue; - if (IS_LEAF(c)) - return c; + /* clone fa for new local table */ + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); + if (!new_fa) + goto out; + + memcpy(new_fa, fa, sizeof(*fa)); - /* Rescan start scanning in new node */ - p = c; - idx = 0; + /* insert clone into table */ + if (!local_l) + local_l = fib_find_node(lt, &local_tp, l->key); + + if (fib_insert_alias(lt, local_tp, local_l, new_fa, + NULL, l->key)) + goto out; } - /* Node empty, walk back up to parent */ - c = p; - } while ((p = node_parent_rcu(c)) != NULL); + /* stop loop if key wrapped back to 0 */ + key = l->key + 1; + if (key < l->key) + break; + } - return NULL; /* Root of trie */ + return local_tb; +out: + fib_trie_free(local_tb); + + return NULL; } -static struct tnode *trie_firstleaf(struct trie *t) +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) { - struct tnode *n = rcu_dereference_rtnl(t->trie); + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; - if (!n) - return NULL; + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; - if (IS_LEAF(n)) /* trie is just a leaf */ - return n; + if (!(cindex--)) { + t_key pkey = pn->key; - return leaf_walk_rcu(n, NULL); -} + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; -static struct tnode *trie_nextleaf(struct tnode *l) -{ - struct tnode *p = node_parent_rcu(l); + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); - if (!p) - return NULL; /* trie with just one leaf */ + continue; + } - return leaf_walk_rcu(p, l); -} + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; -static struct tnode *trie_leafindex(struct trie *t, int index) -{ - struct tnode *l = trie_firstleaf(t); + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; - while (l && index-- > 0) - l = trie_nextleaf(l); + continue; + } - return l; -} + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + /* record local slen */ + slen = fa->fa_slen; -/* - * Caller must hold RTNL. - */ + if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) + continue; + + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } else { + leaf_pull_suffix(pn, n); + } + } +} + +/* Caller must hold RTNL. */ int fib_table_flush(struct fib_table *tb) { - struct trie *t = (struct trie *) tb->tb_data; - struct tnode *l, *ll = NULL; + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; int found = 0; - for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { - found += trie_flush_leaf(l); + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; - if (ll) { - if (hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); - else - leaf_pull_suffix(ll); + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; } - ll = l; - } + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; - if (ll) { - if (hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); - else - leaf_pull_suffix(ll); + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { + slen = fa->fa_slen; + continue; + } + + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + hlist_del_rcu(&fa->fa_list); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); + found++; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } else { + leaf_pull_suffix(pn, n); + } } pr_debug("trie_flush found=%d\n", found); return found; } -void fib_free_table(struct fib_table *tb) +static void __trie_free_rcu(struct rcu_head *head) { + struct fib_table *tb = container_of(head, struct fib_table, rcu); #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie *t = (struct trie *)tb->tb_data; - free_percpu(t->stats); + if (tb->tb_data == tb->__data) + free_percpu(t->stats); #endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } -static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, - struct fib_table *tb, - struct sk_buff *skb, struct netlink_callback *cb) +void fib_free_table(struct fib_table *tb) { - int i, s_i; + call_rcu(&tb->rcu, __trie_free_rcu); +} + +static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, + struct sk_buff *skb, struct netlink_callback *cb) +{ + __be32 xkey = htonl(l->key); struct fib_alias *fa; - __be32 xkey = htonl(key); + int i, s_i; - s_i = cb->args[5]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ - - list_for_each_entry_rcu(fa, fah, fa_list) { + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (i < s_i) { i++; continue; } + if (tb->tb_id != fa->tb_id) { + i++; + continue; + } + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, fa->fa_type, xkey, - plen, + KEYLENGTH - fa->fa_slen, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[5] = i; - return -1; - } - i++; - } - cb->args[5] = i; - return skb->len; -} - -static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, - struct sk_buff *skb, struct netlink_callback *cb) -{ - struct leaf_info *li; - int i, s_i; - - s_i = cb->args[4]; - i = 0; - - /* rcu_read_lock is hold by caller */ - hlist_for_each_entry_rcu(li, &l->list, hlist) { - if (i < s_i) { - i++; - continue; - } - - if (i > s_i) - cb->args[5] = 0; - - if (list_empty(&li->falh)) - continue; - - if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { cb->args[4] = i; return -1; } @@ -1801,44 +1918,38 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, return skb->len; } +/* rcu_read_lock needs to be hold by caller from readside */ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { - struct tnode *l; - struct trie *t = (struct trie *) tb->tb_data; - t_key key = cb->args[2]; - int count = cb->args[3]; - - rcu_read_lock(); + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *l, *tp = t->kv; /* Dump starting at last key. * Note: 0.0.0.0/0 (ie default) is first key. */ - if (count == 0) - l = trie_firstleaf(t); - else { - /* Normally, continue from last key, but if that is missing - * fallback to using slow rescan - */ - l = fib_find_node(t, key); - if (!l) - l = trie_leafindex(t, count); - } + int count = cb->args[2]; + t_key key = cb->args[3]; - while (l) { - cb->args[2] = l->key; + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { - cb->args[3] = count; - rcu_read_unlock(); + cb->args[3] = key; + cb->args[2] = count; return -1; } ++count; - l = trie_nextleaf(l); + key = l->key + 1; + memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); + + /* stop loop if key wrapped back to 0 */ + if (key < l->key) + break; } - cb->args[3] = count; - rcu_read_unlock(); + + cb->args[3] = key; + cb->args[2] = count; return skb->len; } @@ -1850,28 +1961,34 @@ void __init fib_trie_init(void) 0, SLAB_PANIC, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", - max(sizeof(struct tnode), - sizeof(struct leaf_info)), + LEAF_SIZE, 0, SLAB_PANIC, NULL); } - -struct fib_table *fib_trie_table(u32 id) +struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) { struct fib_table *tb; struct trie *t; + size_t sz = sizeof(*tb); + + if (!alias) + sz += sizeof(struct trie); - tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), - GFP_KERNEL); - if (tb == NULL) + tb = kzalloc(sz, GFP_KERNEL); + if (!tb) return NULL; tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + tb->tb_data = (alias ? alias->__data : tb->__data); + + if (alias) + return tb; t = (struct trie *) tb->tb_data; - RCU_INIT_POINTER(t->trie, NULL); + t->kv[0].pos = KEYLENGTH; + t->kv[0].slen = KEYLENGTH; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats = alloc_percpu(struct trie_use_stats); if (!t->stats) { @@ -1888,65 +2005,63 @@ struct fib_table *fib_trie_table(u32 id) struct fib_trie_iter { struct seq_net_private p; struct fib_table *tb; - struct tnode *tnode; + struct key_vector *tnode; unsigned int index; unsigned int depth; }; -static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) +static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) { unsigned long cindex = iter->index; - struct tnode *tn = iter->tnode; - struct tnode *p; - - /* A single entry routing table */ - if (!tn) - return NULL; + struct key_vector *pn = iter->tnode; + t_key pkey; pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); -rescan: - while (cindex < tnode_child_length(tn)) { - struct tnode *n = tnode_get_child_rcu(tn, cindex); - if (n) { + while (!IS_TRIE(pn)) { + while (cindex < child_length(pn)) { + struct key_vector *n = get_child_rcu(pn, cindex++); + + if (!n) + continue; + if (IS_LEAF(n)) { - iter->tnode = tn; - iter->index = cindex + 1; + iter->tnode = pn; + iter->index = cindex; } else { /* push down one level */ iter->tnode = n; iter->index = 0; ++iter->depth; } + return n; } - ++cindex; - } - - /* Current node exhausted, pop back up */ - p = node_parent_rcu(tn); - if (p) { - cindex = get_index(tn->key, p) + 1; - tn = p; + /* Current node exhausted, pop back up */ + pkey = pn->key; + pn = node_parent_rcu(pn); + cindex = get_index(pkey, pn) + 1; --iter->depth; - goto rescan; } - /* got root? */ + /* record root node so further searches know we are done */ + iter->tnode = pn; + iter->index = 0; + return NULL; } -static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, - struct trie *t) +static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { - struct tnode *n; + struct key_vector *n, *pn = t->kv; if (!t) return NULL; - n = rcu_dereference(t->trie); + n = rcu_dereference(pn->tnode[0]); if (!n) return NULL; @@ -1955,7 +2070,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, iter->index = 0; iter->depth = 1; } else { - iter->tnode = NULL; + iter->tnode = pn; iter->index = 0; iter->depth = 0; } @@ -1965,7 +2080,7 @@ static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct tnode *n; + struct key_vector *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -1973,20 +2088,20 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { - struct leaf_info *li; + struct fib_alias *fa; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, &n->list, hlist) + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) ++s->prefixes; } else { s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; - s->nullpointers += n->empty_children; + s->nullpointers += tn_info(n)->empty_children; } } rcu_read_unlock(); @@ -2009,13 +2124,13 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - bytes = sizeof(struct tnode) * stat->leaves; + bytes = LEAF_SIZE * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); - bytes += sizeof(struct leaf_info) * stat->prefixes; + bytes += sizeof(struct fib_alias) * stat->prefixes; seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; + bytes += TNODE_SIZE(0) * stat->tnodes; max = MAX_STAT_DEPTH; while (max > 0 && stat->nodesizes[max-1] == 0) @@ -2030,7 +2145,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct tnode *) * pointers; + bytes += sizeof(struct key_vector *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } @@ -2084,7 +2199,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" " %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct tnode), sizeof(struct tnode)); + LEAF_SIZE, TNODE_SIZE(0)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; @@ -2123,7 +2238,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2135,7 +2250,7 @@ static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - struct tnode *n; + struct key_vector *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2164,7 +2279,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct tnode *n; + struct key_vector *n; ++*pos; /* next node in same table */ @@ -2250,9 +2365,9 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct tnode *n = v; + struct key_vector *n = v; - if (!node_parent_rcu(n)) + if (IS_TRIE(node_parent_rcu(n))) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { @@ -2261,30 +2376,28 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", &prf, KEYLENGTH - n->pos - n->bits, n->bits, - n->full_children, n->empty_children); + tn_info(n)->full_children, + tn_info(n)->empty_children); } else { - struct leaf_info *li; __be32 val = htonl(n->key); + struct fib_alias *fa; seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, &n->list, hlist) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - char buf1[32], buf2[32]; - - seq_indent(seq, iter->depth+1); - seq_printf(seq, " /%d %s %s", li->plen, - rtn_scope(buf1, sizeof(buf1), - fa->fa_info->fib_scope), - rtn_type(buf2, sizeof(buf2), - fa->fa_type)); - if (fa->fa_tos) - seq_printf(seq, " tos=%d", fa->fa_tos); - seq_putc(seq, '\n'); - } + hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { + char buf1[32], buf2[32]; + + seq_indent(seq, iter->depth + 1); + seq_printf(seq, " /%zu %s %s", + KEYLENGTH - fa->fa_slen, + rtn_scope(buf1, sizeof(buf1), + fa->fa_info->fib_scope), + rtn_type(buf2, sizeof(buf2), + fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, " tos=%d", fa->fa_tos); + seq_putc(seq, '\n'); } } @@ -2314,31 +2427,47 @@ static const struct file_operations fib_trie_fops = { struct fib_route_iter { struct seq_net_private p; - struct trie *main_trie; + struct fib_table *main_tb; + struct key_vector *tnode; loff_t pos; t_key key; }; -static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, + loff_t pos) { - struct tnode *l = NULL; - struct trie *t = iter->main_trie; + struct fib_table *tb = iter->main_tb; + struct key_vector *l, **tp = &iter->tnode; + struct trie *t; + t_key key; - /* use cache location of last found key */ - if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) + /* use cache location of next-to-find key */ + if (iter->pos > 0 && pos >= iter->pos) { pos -= iter->pos; - else { + key = iter->key; + } else { + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; iter->pos = 0; - l = trie_firstleaf(t); + key = 0; } - while (l && pos-- > 0) { + while ((l = leaf_walk_rcu(tp, key)) != NULL) { + key = l->key + 1; iter->pos++; - l = trie_nextleaf(l); + + if (pos-- <= 0) + break; + + l = NULL; + + /* handle unlikely case of a key wrap */ + if (!key) + break; } if (l) - iter->key = pos; /* remember it */ + iter->key = key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2350,37 +2479,46 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct fib_table *tb; + struct trie *t; rcu_read_lock(); + tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); if (!tb) return NULL; - iter->main_trie = (struct trie *) tb->tb_data; - if (*pos == 0) - return SEQ_START_TOKEN; - else - return fib_route_get_idx(iter, *pos - 1); + iter->main_tb = tb; + + if (*pos != 0) + return fib_route_get_idx(iter, *pos); + + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; + iter->pos = 0; + iter->key = 0; + + return SEQ_START_TOKEN; } static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; - struct tnode *l = v; + struct key_vector *l = NULL; + t_key key = iter->key; ++*pos; - if (v == SEQ_START_TOKEN) { - iter->pos = 0; - l = trie_firstleaf(iter->main_trie); - } else { + + /* only allow key of 0 for start of sequence */ + if ((v == SEQ_START_TOKEN) || key) + l = leaf_walk_rcu(&iter->tnode, key); + + if (l) { + iter->key = l->key + 1; iter->pos++; - l = trie_nextleaf(l); + } else { + iter->pos = 0; } - if (l) - iter->key = l->key; - else - iter->pos = 0; return l; } @@ -2412,8 +2550,11 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - struct tnode *l = v; - struct leaf_info *li; + struct fib_route_iter *iter = seq->private; + struct fib_table *tb = iter->main_tb; + struct fib_alias *fa; + struct key_vector *l = v; + __be32 prefix; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " @@ -2422,45 +2563,43 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - hlist_for_each_entry_rcu(li, &l->list, hlist) { - struct fib_alias *fa; - __be32 mask, prefix; + prefix = htonl(l->key); - mask = inet_make_mask(li->plen); - prefix = htonl(l->key); + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + const struct fib_info *fi = fa->fa_info; + __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); + unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); - list_for_each_entry_rcu(fa, &li->falh, fa_list) { - const struct fib_info *fi = fa->fa_info; - unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); + if ((fa->fa_type == RTN_BROADCAST) || + (fa->fa_type == RTN_MULTICAST)) + continue; - if (fa->fa_type == RTN_BROADCAST - || fa->fa_type == RTN_MULTICAST) - continue; + if (fa->tb_id != tb->tb_id) + continue; - seq_setwidth(seq, 127); - - if (fi) - seq_printf(seq, - "%s\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", - fi->fib_dev ? fi->fib_dev->name : "*", - prefix, - fi->fib_nh->nh_gw, flags, 0, 0, - fi->fib_priority, - mask, - (fi->fib_advmss ? - fi->fib_advmss + 40 : 0), - fi->fib_window, - fi->fib_rtt >> 3); - else - seq_printf(seq, - "*\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", - prefix, 0, flags, 0, 0, 0, - mask, 0, 0, 0); - - seq_pad(seq, '\n'); - } + seq_setwidth(seq, 127); + + if (fi) + seq_printf(seq, + "%s\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? + fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + seq_printf(seq, + "*\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); + + seq_pad(seq, '\n'); } return 0; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index ff069f6597ac..af150b43b214 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -16,14 +16,12 @@ #include <uapi/linux/fou.h> #include <uapi/linux/genetlink.h> -static DEFINE_SPINLOCK(fou_lock); -static LIST_HEAD(fou_list); - struct fou { struct socket *sock; u8 protocol; u8 flags; - u16 port; + __be16 port; + u16 type; struct udp_offload udp_offloads; struct list_head list; }; @@ -37,6 +35,13 @@ struct fou_cfg { struct udp_port_cfg udp_config; }; +static unsigned int fou_net_id; + +struct fou_net { + struct list_head fou_list; + struct mutex fou_lock; +}; + static inline struct fou *fou_from_sock(struct sock *sk) { return sk->sk_user_data; @@ -387,20 +392,21 @@ out_unlock: return err; } -static int fou_add_to_port_list(struct fou *fou) +static int fou_add_to_port_list(struct net *net, struct fou *fou) { + struct fou_net *fn = net_generic(net, fou_net_id); struct fou *fout; - spin_lock(&fou_lock); - list_for_each_entry(fout, &fou_list, list) { + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { if (fou->port == fout->port) { - spin_unlock(&fou_lock); + mutex_unlock(&fn->fou_lock); return -EALREADY; } } - list_add(&fou->list, &fou_list); - spin_unlock(&fou_lock); + list_add(&fou->list, &fn->fou_list); + mutex_unlock(&fn->fou_lock); return 0; } @@ -410,14 +416,10 @@ static void fou_release(struct fou *fou) struct socket *sock = fou->sock; struct sock *sk = sock->sk; - udp_del_offload(&fou->udp_offloads); - + if (sk->sk_family == AF_INET) + udp_del_offload(&fou->udp_offloads); list_del(&fou->list); - - /* Remove hooks into tunnel socket */ - sk->sk_user_data = NULL; - - sock_release(sock); + udp_tunnel_sock_release(sock); kfree(fou); } @@ -447,10 +449,10 @@ static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { - struct fou *fou = NULL; - int err; struct socket *sock = NULL; + struct fou *fou = NULL; struct sock *sk; + int err; /* Open UDP socket */ err = udp_sock_create(net, &cfg->udp_config, &sock); @@ -486,6 +488,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, goto error; } + fou->type = cfg->type; + udp_sk(sk)->encap_type = 1; udp_encap_enable(); @@ -502,7 +506,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, goto error; } - err = fou_add_to_port_list(fou); + err = fou_add_to_port_list(net, fou); if (err) goto error; @@ -514,27 +518,27 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, error: kfree(fou); if (sock) - sock_release(sock); + udp_tunnel_sock_release(sock); return err; } static int fou_destroy(struct net *net, struct fou_cfg *cfg) { - struct fou *fou; - u16 port = cfg->udp_config.local_udp_port; + struct fou_net *fn = net_generic(net, fou_net_id); + __be16 port = cfg->udp_config.local_udp_port; int err = -EINVAL; + struct fou *fou; - spin_lock(&fou_lock); - list_for_each_entry(fou, &fou_list, list) { + mutex_lock(&fn->fou_lock); + list_for_each_entry(fou, &fn->fou_list, list) { if (fou->port == port) { - udp_del_offload(&fou->udp_offloads); fou_release(fou); err = 0; break; } } - spin_unlock(&fou_lock); + mutex_unlock(&fn->fou_lock); return err; } @@ -573,7 +577,7 @@ static int parse_nl_config(struct genl_info *info, } if (info->attrs[FOU_ATTR_PORT]) { - u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]); + __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); cfg->udp_config.local_udp_port = port; } @@ -592,6 +596,7 @@ static int parse_nl_config(struct genl_info *info, static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct fou_cfg cfg; int err; @@ -599,16 +604,120 @@ static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return fou_create(&init_net, &cfg, NULL); + return fou_create(net, &cfg, NULL); } static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + struct fou_cfg cfg; + int err; + + err = parse_nl_config(info, &cfg); + if (err) + return err; + + return fou_destroy(net, &cfg); +} + +static int fou_fill_info(struct fou *fou, struct sk_buff *msg) +{ + if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || + nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || + nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || + nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) + return -1; + + if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) + if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) + return -1; + return 0; +} + +static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, + u32 flags, struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (fou_fill_info(fou, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct fou_net *fn = net_generic(net, fou_net_id); + struct sk_buff *msg; struct fou_cfg cfg; + struct fou *fout; + __be16 port; + int ret; + + ret = parse_nl_config(info, &cfg); + if (ret) + return ret; + port = cfg.udp_config.local_udp_port; + if (port == 0) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = -ESRCH; + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (port == fout->port) { + ret = fou_dump_info(fout, info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + break; + } + } + mutex_unlock(&fn->fou_lock); + if (ret < 0) + goto out_free; - parse_nl_config(info, &cfg); + return genlmsg_reply(msg, info); - return fou_destroy(&init_net, &cfg); +out_free: + nlmsg_free(msg); + return ret; +} + +static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct fou_net *fn = net_generic(net, fou_net_id); + struct fou *fout; + int idx = 0, ret; + + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (idx++ < cb->args[0]) + continue; + ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, FOU_CMD_GET); + if (ret) + goto done; + } + mutex_unlock(&fn->fou_lock); + +done: + cb->args[0] = idx; + return skb->len; } static const struct genl_ops fou_nl_ops[] = { @@ -624,6 +733,12 @@ static const struct genl_ops fou_nl_ops[] = { .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = FOU_CMD_GET, + .doit = fou_nl_cmd_get_port, + .dumpit = fou_nl_dump, + .policy = fou_nl_policy, + }, }; size_t fou_encap_hlen(struct ip_tunnel_encap *e) @@ -771,12 +886,12 @@ EXPORT_SYMBOL(gue_build_header); #ifdef CONFIG_NET_FOU_IP_TUNNELS -static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = { +static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou_build_header, }; -static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = { +static const struct ip_tunnel_encap_ops gue_iptun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue_build_header, }; @@ -820,38 +935,63 @@ static void ip_tunnel_encap_del_fou_ops(void) #endif +static __net_init int fou_init_net(struct net *net) +{ + struct fou_net *fn = net_generic(net, fou_net_id); + + INIT_LIST_HEAD(&fn->fou_list); + mutex_init(&fn->fou_lock); + return 0; +} + +static __net_exit void fou_exit_net(struct net *net) +{ + struct fou_net *fn = net_generic(net, fou_net_id); + struct fou *fou, *next; + + /* Close all the FOU sockets */ + mutex_lock(&fn->fou_lock); + list_for_each_entry_safe(fou, next, &fn->fou_list, list) + fou_release(fou); + mutex_unlock(&fn->fou_lock); +} + +static struct pernet_operations fou_net_ops = { + .init = fou_init_net, + .exit = fou_exit_net, + .id = &fou_net_id, + .size = sizeof(struct fou_net), +}; + static int __init fou_init(void) { int ret; + ret = register_pernet_device(&fou_net_ops); + if (ret) + goto exit; + ret = genl_register_family_with_ops(&fou_nl_family, fou_nl_ops); - if (ret < 0) - goto exit; + goto unregister; ret = ip_tunnel_encap_add_fou_ops(); - if (ret < 0) - genl_unregister_family(&fou_nl_family); + if (ret == 0) + return 0; + genl_unregister_family(&fou_nl_family); +unregister: + unregister_pernet_device(&fou_net_ops); exit: return ret; } static void __exit fou_fini(void) { - struct fou *fou, *next; - ip_tunnel_encap_del_fou_ops(); - genl_unregister_family(&fou_nl_family); - - /* Close all the FOU sockets */ - - spin_lock(&fou_lock); - list_for_each_entry_safe(fou, next, &fou_list, list) - fou_release(fou); - spin_unlock(&fou_lock); + unregister_pernet_device(&fou_net_ops); } module_init(fou_init); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 5a4828ba05ad..8986e63f3bda 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -131,12 +127,16 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, if (unlikely(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, skb, src, dst, + return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, !csum); } @@ -196,7 +196,7 @@ static struct sk_buff **geneve_gro_receive(struct sk_buff **head, rcu_read_lock(); ptype = gro_find_receive_by_type(type); - if (ptype == NULL) { + if (!ptype) { flush = 1; goto out_unlock; } @@ -230,7 +230,7 @@ static int geneve_gro_complete(struct sk_buff *skb, int nhoff, rcu_read_lock(); ptype = gro_find_complete_by_type(type); - if (ptype != NULL) + if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); rcu_read_unlock(); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 51973ddc05a6..5aa46d4b44ef 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -149,7 +149,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, rcu_read_lock(); ptype = gro_find_receive_by_type(type); - if (ptype == NULL) + if (!ptype) goto out_unlock; grehlen = GRE_HEADER_SECTION; @@ -243,7 +243,7 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) rcu_read_lock(); ptype = gro_find_complete_by_type(type); - if (ptype != NULL) + if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); rcu_read_unlock(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5e564014a0b7..f5203fba6236 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -399,7 +399,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) return; sk = icmp_xmit_lock(net); - if (sk == NULL) + if (!sk) return; inet = inet_sk(sk); @@ -609,7 +609,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) skb_in->data, sizeof(_inner_type), &_inner_type); - if (itp == NULL) + if (!itp) goto out; /* @@ -627,7 +627,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) return; sk = icmp_xmit_lock(net); - if (sk == NULL) + if (!sk) goto out_free; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 666cf364df86..a3a697f5ffba 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -97,6 +97,7 @@ #include <net/route.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/inet_common.h> #include <linux/netfilter_ipv4.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> @@ -369,7 +370,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -691,7 +692,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); - if (skb == NULL) { + if (!skb) { ip_rt_put(rt); return -1; } @@ -713,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -980,7 +981,7 @@ int igmp_rcv(struct sk_buff *skb) int len = skb->len; bool dropped = true; - if (in_dev == NULL) + if (!in_dev) goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) @@ -1849,30 +1850,28 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; } - -/* - * Join a multicast group +/* Join a multicast group */ -int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) + +int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) { - int err; __be32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml = NULL, *i; + struct ip_mc_socklist *iml, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); int ifindex; int count = 0; + int err; + + ASSERT_RTNL(); if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); - in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { - iml = NULL; err = -ENODEV; goto done; } @@ -1889,7 +1888,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) if (count >= sysctl_igmp_max_memberships) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); - if (iml == NULL) + if (!iml) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); @@ -1900,7 +1899,6 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) ip_mc_inc_group(in_dev, addr); err = 0; done: - rtnl_unlock(); return err; } EXPORT_SYMBOL(ip_mc_join_group); @@ -1911,7 +1909,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; - if (psf == NULL) { + if (!psf) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); @@ -1925,10 +1923,6 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } -/* - * Ask a socket to leave a group. - */ - int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); @@ -1940,7 +1934,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; - rtnl_lock(); + ASSERT_RTNL(); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { ret = -ENODEV; @@ -1964,14 +1959,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; ip_mc_dec_group(in_dev, group); - rtnl_unlock(); + /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); return 0; } out: - rtnl_unlock(); return ret; } EXPORT_SYMBOL(ip_mc_leave_group); @@ -1993,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; @@ -2107,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); done: - rtnl_unlock(); if (leavegroup) - return ip_mc_leave_group(sk, &imr); + err = ip_mc_leave_group(sk, &imr); return err; } @@ -2131,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; @@ -2193,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) pmc->sfmode = msf->imsf_fmode; err = 0; done: - rtnl_unlock(); if (leavegroup) err = ip_mc_leave_group(sk, &imr); return err; @@ -2368,7 +2360,7 @@ void ip_mc_drop_socket(struct sock *sk) struct ip_mc_socklist *iml; struct net *net = sock_net(sk); - if (inet->mc_list == NULL) + if (!inet->mc_list) return; rtnl_lock(); @@ -2378,7 +2370,7 @@ void ip_mc_drop_socket(struct sock *sk) inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); - if (in_dev != NULL) + if (in_dev) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); @@ -2595,13 +2587,13 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) for_each_netdev_rcu(net, state->dev) { struct in_device *idev; idev = __in_dev_get_rcu(state->dev); - if (unlikely(idev == NULL)) + if (unlikely(!idev)) continue; im = rcu_dereference(idev->mc_list); - if (likely(im != NULL)) { + if (likely(im)) { spin_lock_bh(&im->lock); psf = im->sources; - if (likely(psf != NULL)) { + if (likely(psf)) { state->im = im; state->idev = idev; break; @@ -2671,7 +2663,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); - if (likely(state->im != NULL)) { + if (likely(state->im)) { spin_unlock_bh(&state->im->lock); state->im = NULL; } @@ -2724,6 +2716,7 @@ static const struct file_operations igmp_mcf_seq_fops = { static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; + int err; pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); if (!pde) @@ -2732,8 +2725,18 @@ static int __net_init igmp_net_init(struct net *net) &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; + err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET, + SOCK_DGRAM, 0, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n", + err); + goto out_sock; + } + return 0; +out_sock: + remove_proc_entry("mcfilter", net->proc_net); out_mcfilter: remove_proc_entry("igmp", net->proc_net); out_igmp: @@ -2744,6 +2747,7 @@ static void __net_exit igmp_net_exit(struct net *net) { remove_proc_entry("mcfilter", net->proc_net); remove_proc_entry("igmp", net->proc_net); + inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk); } static struct pernet_operations igmp_net_ops = { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3e44b9b0b78e..5c3dd6267ed3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -23,6 +23,7 @@ #include <net/route.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/tcp.h> #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -294,8 +295,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; - struct sock *newsk; struct request_sock *req; + struct sock *newsk; int error; lock_sock(sk); @@ -324,9 +325,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) newsk = req->sk; sk_acceptq_removed(sk); - if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { + if (sk->sk_protocol == IPPROTO_TCP && + tcp_rsk(req)->tfo_listener && + queue->fastopenq) { spin_lock_bh(&queue->fastopenq->lock); - if (tcp_rsk(req)->listener) { + if (tcp_rsk(req)->tfo_listener) { /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to * NULL to signify that the child socket is taken @@ -341,7 +344,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) out: release_sock(sk); if (req) - __reqsk_free(req); + reqsk_put(req); return newsk; out_err: newsk = NULL; @@ -400,18 +403,17 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) { - struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options_rcu *opt = inet_rsk(req)->opt; - struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk); + struct net *net = read_pnet(&ireq->ireq_net); + struct ip_options_rcu *opt = ireq->opt; + struct rtable *rt; - flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, - sk->sk_protocol, - flags, + sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); + ireq->ir_loc_addr, ireq->ir_rmt_port, + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -433,9 +435,9 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, const struct request_sock *req) { const struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = read_pnet(&ireq->ireq_net); struct inet_sock *newinet = inet_sk(newsk); struct ip_options_rcu *opt; - struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; @@ -443,11 +445,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); + ireq->ir_loc_addr, ireq->ir_rmt_port, + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -475,33 +478,37 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, #if IS_ENABLED(CONFIG_IPV6) #define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define AF_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) true #endif -struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, - const __be16 rport, const __be32 raddr, +/* Note: this is temporary : + * req sock will no longer be in listener hash table +*/ +struct request_sock *inet_csk_search_req(struct sock *sk, + const __be16 rport, + const __be32 raddr, const __be32 laddr) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; + u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && ireq->ir_rmt_addr == raddr && ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk); - *prevp = prev; break; } } + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); return req; } @@ -557,23 +564,24 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); -void inet_csk_reqsk_queue_prune(struct sock *parent, - const unsigned long interval, - const unsigned long timeout, - const unsigned long max_rto) +static void reqsk_timer_handler(unsigned long data) { - struct inet_connection_sock *icsk = inet_csk(parent); + struct request_sock *req = (struct request_sock *)data; + struct sock *sk_listener = req->rsk_listener; + struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; - int thresh = max_retries; - unsigned long now = jiffies; - struct request_sock **reqp, *req; - int i, budget; + int qlen, expire = 0, resend = 0; + int max_retries, thresh; + u8 defer_accept; - if (lopt == NULL || lopt->qlen == 0) + if (sk_listener->sk_state != TCP_LISTEN || !lopt) { + reqsk_put(req); return; + } + max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + thresh = max_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means @@ -591,67 +599,65 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, * embrions; and abort old ones without pity, if old * ones are about to clog our table. */ - if (lopt->qlen>>(lopt->max_qlen_log-1)) { - int young = (lopt->qlen_young<<1); + qlen = listen_sock_qlen(lopt); + if (qlen >> (lopt->max_qlen_log - 1)) { + int young = listen_sock_young(lopt) << 1; while (thresh > 2) { - if (lopt->qlen < young) + if (qlen < young) break; thresh--; young <<= 1; } } + defer_accept = READ_ONCE(queue->rskq_defer_accept); + if (defer_accept) + max_retries = defer_accept; + syn_ack_recalc(req, thresh, max_retries, defer_accept, + &expire, &resend); + req->rsk_ops->syn_ack_timeout(req); + if (!expire && + (!resend || + !inet_rtx_syn_ack(sk_listener, req) || + inet_rsk(req)->acked)) { + unsigned long timeo; + + if (req->num_timeout++ == 0) + atomic_inc(&lopt->young_dec); + timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); + mod_timer_pinned(&req->rsk_timer, jiffies + timeo); + return; + } + inet_csk_reqsk_queue_drop(sk_listener, req); + reqsk_put(req); +} - if (queue->rskq_defer_accept) - max_retries = queue->rskq_defer_accept; - - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); - i = lopt->clock_hand; - - do { - reqp=&lopt->syn_table[i]; - while ((req = *reqp) != NULL) { - if (time_after_eq(now, req->expires)) { - int expire = 0, resend = 0; - - syn_ack_recalc(req, thresh, max_retries, - queue->rskq_defer_accept, - &expire, &resend); - req->rsk_ops->syn_ack_timeout(parent, req); - if (!expire && - (!resend || - !inet_rtx_syn_ack(parent, req) || - inet_rsk(req)->acked)) { - unsigned long timeo; - - if (req->num_timeout++ == 0) - lopt->qlen_young--; - timeo = min(timeout << req->num_timeout, - max_rto); - req->expires = now + timeo; - reqp = &req->dl_next; - continue; - } - - /* Drop this request */ - inet_csk_reqsk_queue_unlink(parent, req, reqp); - reqsk_queue_removed(queue, req); - reqsk_free(req); - continue; - } - reqp = &req->dl_next; - } +void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned long timeout) +{ + struct listen_sock *lopt = queue->listen_opt; - i = (i + 1) & (lopt->nr_table_entries - 1); + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; - } while (--budget > 0); + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 2); + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + req->rsk_hash = hash; - lopt->clock_hand = i; + spin_lock(&queue->syn_wait_lock); + req->dl_next = lopt->syn_table[hash]; + lopt->syn_table[hash] = req; + spin_unlock(&queue->syn_wait_lock); - if (lopt->qlen) - inet_csk_reset_keepalive_timer(parent, interval); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); +EXPORT_SYMBOL(reqsk_queue_hash_req); /** * inet_csk_clone_lock - clone an inet socket, and lock its clone @@ -667,7 +673,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, { struct sock *newsk = sk_clone_lock(sk, priority); - if (newsk != NULL) { + if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); newsk->sk_state = TCP_SYN_RECV; @@ -679,6 +685,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_write_space = sk_stream_write_space; newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; @@ -785,8 +793,6 @@ void inet_csk_listen_stop(struct sock *sk) struct request_sock *acc_req; struct request_sock *req; - inet_csk_delete_keepalive_timer(sk); - /* make all the listen_opt local to us */ acc_req = reqsk_queue_yank_acceptq(queue); @@ -816,9 +822,9 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); - if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(tcp_sk(child)->fastopen_rsk != req); - BUG_ON(sk != tcp_rsk(req)->listener); + BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if * an inbound pkt destined for child is @@ -827,7 +833,6 @@ void inet_csk_listen_stop(struct sock *sk) * tcp_v4_destroy_sock(). */ tcp_sk(child)->fastopen_rsk = NULL; - sock_put(sk); } inet_csk_destroy_sock(child); @@ -836,9 +841,9 @@ void inet_csk_listen_stop(struct sock *sk) sock_put(child); sk_acceptq_removed(sk); - __reqsk_free(req); + reqsk_put(req); } - if (queue->fastopenq != NULL) { + if (queue->fastopenq) { /* Free all the reqs queued in rskq_rst_head. */ spin_lock_bh(&queue->fastopenq->lock); acc_req = queue->fastopenq->rskq_rst_head; @@ -846,7 +851,7 @@ void inet_csk_listen_stop(struct sock *sk) spin_unlock_bh(&queue->fastopenq->lock); while ((req = acc_req) != NULL) { acc_req = req->dl_next; - __reqsk_free(req); + reqsk_put(req); } } WARN_ON(sk->sk_ack_backlog); @@ -870,7 +875,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_af_ops->compat_getsockopt != NULL) + if (icsk->icsk_af_ops->compat_getsockopt) return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, optval, optlen); return icsk->icsk_af_ops->getsockopt(sk, level, optname, @@ -883,7 +888,7 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_af_ops->compat_setsockopt != NULL) + if (icsk->icsk_af_ops->compat_setsockopt) return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, optval, optlen); return icsk->icsk_af_ops->setsockopt(sk, level, optname, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 592aff37366b..70e8b3c308ec 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -38,16 +38,12 @@ static const struct inet_diag_handler **inet_diag_table; struct inet_diag_entry { - __be32 *saddr; - __be32 *daddr; + const __be32 *saddr; + const __be32 *daddr; u16 sport; u16 dport; u16 family; u16 userlocks; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */ - struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */ -#endif }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -65,12 +61,35 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return inet_diag_table[proto]; } -static inline void inet_diag_unlock_handler( - const struct inet_diag_handler *handler) +static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) { mutex_unlock(&inet_diag_table_mutex); } +static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) +{ + r->idiag_family = sk->sk_family; + + r->id.idiag_sport = htons(sk->sk_num); + r->id.idiag_dport = sk->sk_dport; + r->id.idiag_if = sk->sk_bound_dev_if; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + + r->id.idiag_src[0] = sk->sk_rcv_saddr; + r->id.idiag_dst[0] = sk->sk_daddr; + } +} + static size_t inet_sk_attr_size(void) { return nla_total_size(sizeof(struct tcp_info)) @@ -86,21 +105,21 @@ static size_t inet_sk_attr_size(void) } int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + struct sk_buff *skb, const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); + const struct inet_diag_handler *handler; + int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; - const struct inet_diag_handler *handler; - int ext = req->idiag_ext; handler = inet_diag_table[req->sdiag_protocol]; - BUG_ON(handler == NULL); + BUG_ON(!handler); nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -108,25 +127,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, return -EMSGSIZE; r = nlmsg_data(nlh); - BUG_ON(sk->sk_state == TCP_TIME_WAIT); + BUG_ON(!sk_fullsock(sk)); - r->idiag_family = sk->sk_family; + inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = inet->inet_dport; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = inet->inet_rcv_saddr; - r->id.idiag_dst[0] = inet->inet_daddr; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; @@ -139,10 +146,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - - *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; - if (ext & (1 << (INET_DIAG_TCLASS - 1))) if (nla_put_u8(skb, INET_DIAG_TCLASS, inet6_sk(sk)->tclass) < 0) @@ -169,7 +172,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; - if (icsk == NULL) { + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; } @@ -227,23 +230,25 @@ errout: EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct sk_buff *skb, + const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, portid, seq, nlmsg_flags, unlh); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, + user_ns, portid, seq, nlmsg_flags, unlh); } -static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, struct inet_diag_req_v2 *req, +static int inet_twsk_diag_fill(struct sock *sk, + struct sk_buff *skb, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - s32 tmo; + struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; + long tmo; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -253,25 +258,13 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_ttd - inet_tw_time_stamp(); + tmo = tw->tw_timer.expires - jiffies; if (tmo < 0) tmo = 0; - r->idiag_family = tw->tw_family; + inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; - r->id.idiag_if = tw->tw_bound_dev_if; - sock_diag_save_cookie(tw, r->id.idiag_cookie); - - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -279,61 +272,91 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr; - } -#endif + + nlmsg_end(skb, nlh); + return 0; +} + +static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, + u32 portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = inet_reqsk(sk)->num_retrans; + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + + tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; + r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; nlmsg_end(skb, nlh); return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + return inet_twsk_diag_fill(sk, skb, portid, seq, nlmsg_flags, unlh); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return inet_req_diag_fill(sk, skb, portid, seq, + nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { - int err; - struct sock *sk; - struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; err = -EINVAL; - if (req->sdiag_family == AF_INET) { + if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); - } #if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) { + else if (req->sdiag_family == AF_INET6) sk = inet6_lookup(net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, req->id.idiag_if); - } #endif - else { + else goto out_nosk; - } err = -ENOENT; - if (sk == NULL) + if (!sk) goto out_nosk; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); @@ -371,7 +394,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; int err; @@ -412,9 +435,8 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) return 1; } - static int inet_diag_bc_run(const struct nlattr *_bc, - const struct inet_diag_entry *entry) + const struct inet_diag_entry *entry) { const void *bc = nla_data(_bc); int len = nla_len(_bc); @@ -446,10 +468,10 @@ static int inet_diag_bc_run(const struct nlattr *_bc, break; case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { - struct inet_diag_hostcond *cond; - __be32 *addr; + const struct inet_diag_hostcond *cond; + const __be32 *addr; - cond = (struct inet_diag_hostcond *)(op + 1); + cond = (const struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { @@ -498,29 +520,36 @@ static int inet_diag_bc_run(const struct nlattr *_bc, return len == 0; } +/* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) + */ +static void entry_fill_addrs(struct inet_diag_entry *entry, + const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry->daddr = sk->sk_v6_daddr.s6_addr32; + } else +#endif + { + entry->saddr = &sk->sk_rcv_saddr; + entry->daddr = &sk->sk_daddr; + } +} + int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) { - struct inet_diag_entry entry; struct inet_sock *inet = inet_sk(sk); + struct inet_diag_entry entry; - if (bc == NULL) + if (!bc) return 1; entry.family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - if (entry.family == AF_INET6) { - - entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32; - entry.daddr = sk->sk_v6_daddr.s6_addr32; - } else -#endif - { - entry.saddr = &inet->inet_rcv_saddr; - entry.daddr = &inet->inet_daddr; - } + entry_fill_addrs(&entry, sk); entry.sport = inet->inet_num; entry.dport = ntohs(inet->inet_dport); - entry.userlocks = sk->sk_userlocks; + entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; return inet_diag_bc_run(bc, &entry); } @@ -547,8 +576,8 @@ static int valid_cc(const void *bc, int len, int cc) static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, int *min_len) { - int addr_len; struct inet_diag_hostcond *cond; + int addr_len; /* Check hostcond space. */ *min_len += sizeof(struct inet_diag_hostcond); @@ -582,8 +611,8 @@ static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, } /* Validate a port comparison operator. */ -static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, - int len, int *min_len) +static bool valid_port_comparison(const struct inet_diag_bc_op *op, + int len, int *min_len) { /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ *min_len += sizeof(struct inet_diag_bc_op); @@ -598,10 +627,9 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) int len = bytecode_len; while (len > 0) { - const struct inet_diag_bc_op *op = bc; int min_len = sizeof(struct inet_diag_bc_op); + const struct inet_diag_bc_op *op = bc; -//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: @@ -642,7 +670,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -654,139 +682,42 @@ static int inet_csk_diag_dump(struct sock *sk, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } -static int inet_twsk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - const struct nlattr *bc) +static void twsk_build_assert(void) { - struct inet_timewait_sock *tw = inet_twsk(sk); + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != + offsetof(struct sock, sk_family)); - if (bc != NULL) { - struct inet_diag_entry entry; - - entry.family = tw->tw_family; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw->tw_v6_daddr.s6_addr32; - } else -#endif - { - entry.saddr = &tw->tw_rcv_saddr; - entry.daddr = &tw->tw_daddr; - } - entry.sport = tw->tw_num; - entry.dport = ntohs(tw->tw_dport); - entry.userlocks = 0; + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != + offsetof(struct inet_sock, inet_num)); - if (!inet_diag_bc_run(bc, &entry)) - return 0; - } + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != + offsetof(struct inet_sock, inet_dport)); - return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); -} + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != + offsetof(struct inet_sock, inet_rcv_saddr)); -/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses - * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6. - */ -static inline void inet_diag_req_addrs(const struct sock *sk, - const struct request_sock *req, - struct inet_diag_entry *entry) -{ - struct inet_request_sock *ireq = inet_rsk(req); + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != + offsetof(struct inet_sock, inet_daddr)); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (req->rsk_ops->family == AF_INET6) { - entry->saddr = ireq->ir_v6_loc_addr.s6_addr32; - entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32; - } else if (req->rsk_ops->family == AF_INET) { - ipv6_addr_set_v4mapped(ireq->ir_loc_addr, - &entry->saddr_storage); - ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, - &entry->daddr_storage); - entry->saddr = entry->saddr_storage.s6_addr32; - entry->daddr = entry->daddr_storage.s6_addr32; - } - } else -#endif - { - entry->saddr = &ireq->ir_loc_addr; - entry->daddr = &ireq->ir_rmt_addr; - } -} - -static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, - const struct nlmsghdr *unlh) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_sock *inet = inet_sk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != + offsetof(struct sock, sk_v6_rcv_saddr)); - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - r->idiag_family = sk->sk_family; - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = req->num_retrans; - - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(req, r->id.idiag_cookie); - - tmo = req->expires - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = ireq->ir_rmt_port; - - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - - r->id.idiag_src[0] = ireq->ir_loc_addr; - r->id.idiag_dst[0] = ireq->ir_rmt_addr; - - r->idiag_expires = jiffies_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - struct inet_diag_entry entry; - inet_diag_req_addrs(sk, req, &entry); - memcpy(r->id.idiag_src, entry.saddr, sizeof(struct in6_addr)); - memcpy(r->id.idiag_dst, entry.daddr, sizeof(struct in6_addr)); - } + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != + offsetof(struct sock, sk_v6_daddr)); #endif - - nlmsg_end(skb, nlh); - return 0; } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { - struct inet_diag_entry entry; struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt; struct inet_sock *inet = inet_sk(sk); - int j, s_j; - int reqnum, s_reqnum; + struct inet_diag_entry entry; + int j, s_j, reqnum, s_reqnum; + struct listen_sock *lopt; int err = 0; s_j = cb->args[3]; @@ -797,13 +728,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); lopt = icsk->icsk_accept_queue.listen_opt; - if (!lopt || !lopt->qlen) + if (!lopt || !listen_sock_qlen(lopt)) goto out; - if (bc != NULL) { + if (bc) { entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -822,17 +753,18 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, continue; if (bc) { - inet_diag_req_addrs(sk, req, &entry); + /* Note: entry.sport and entry.userlocks are already set */ + entry_fill_addrs(&entry, req_to_sk(req)); entry.dport = ntohs(ireq->ir_rmt_port); if (!inet_diag_bc_run(bc, &entry)) continue; } - err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh); + err = inet_req_diag_fill(req_to_sk(req), skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh); if (err < 0) { cb->args[3] = j + 1; cb->args[4] = reqnum; @@ -844,17 +776,17 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { - int i, num; - int s_i, s_num; struct net *net = sock_net(skb->sk); + int i, num, s_i, s_num; s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -864,9 +796,9 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, goto skip_listen_ht; for (i = s_i; i < INET_LHTABLE_SIZE; i++) { - struct sock *sk; - struct hlist_nulls_node *node; struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -883,7 +815,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, } if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) + sk->sk_family != r->sdiag_family) goto next_listen; if (r->id.idiag_sport != inet->inet_sport && @@ -931,8 +863,8 @@ skip_listen_ht: for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); - struct sock *sk; struct hlist_nulls_node *node; + struct sock *sk; num = 0; @@ -944,8 +876,7 @@ skip_listen_ht: spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int res; - int state; + int state, res; if (!net_eq(sock_net(sk), net)) continue; @@ -964,10 +895,16 @@ skip_listen_ht: if (r->id.idiag_dport != sk->sk_dport && r->id.idiag_dport) goto next_normal; - if (sk->sk_state == TCP_TIME_WAIT) - res = inet_twsk_diag_dump(sk, skb, cb, r, bc); - else - res = inet_csk_diag_dump(sk, skb, cb, r, bc); + twsk_build_assert(); + + if (!inet_diag_bc_sk(bc, sk)) + goto next_normal; + + res = sk_diag_fill(sk, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh); if (res < 0) { spin_unlock_bh(lock); goto done; @@ -988,7 +925,8 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, + struct nlattr *bc) { const struct inet_diag_handler *handler; int err = 0; @@ -1005,8 +943,8 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr *bc = NULL; int hdrlen = sizeof(struct inet_diag_req_v2); + struct nlattr *bc = NULL; if (nlmsg_attrlen(cb->nlh, hdrlen)) bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); @@ -1014,7 +952,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); } -static inline int inet_diag_type2proto(int type) +static int inet_diag_type2proto(int type) { switch (type) { case TCPDIAG_GETSOCK: @@ -1026,12 +964,13 @@ static inline int inet_diag_type2proto(int type) } } -static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump_compat(struct sk_buff *skb, + struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); + int hdrlen = sizeof(struct inet_diag_req); struct inet_diag_req_v2 req; struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req); req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -1046,7 +985,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) + const struct nlmsghdr *nlh) { struct inet_diag_req *rc = nlmsg_data(nlh); struct inet_diag_req_v2 req; @@ -1075,7 +1014,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) attr = nlmsg_find_attr(nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - if (attr == NULL || + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op) || inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; @@ -1102,9 +1041,10 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; + attr = nlmsg_find_attr(h, hdrlen, INET_DIAG_REQ_BYTECODE); - if (attr == NULL || + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op) || inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; @@ -1140,7 +1080,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; - if (inet_diag_table[type] == NULL) { + if (!inet_diag_table[type]) { inet_diag_table[type] = h; err = 0; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index e7920352646a..5e346a082e5f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -385,7 +385,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); - if (q == NULL) + if (!q) return NULL; q->net = nf; @@ -406,7 +406,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, struct inet_frag_queue *q; q = inet_frag_alloc(nf, f, arg); - if (q == NULL) + if (!q) return NULL; return inet_frag_intern(nf, q, f, arg); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9111a4e22155..c6fb80bd5826 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,9 +24,9 @@ #include <net/secure_seq.h> #include <net/ip.h> -static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 inet_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 inet_ehash_secret __read_mostly; @@ -36,17 +36,21 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, inet_ehash_secret + net_hash_mix(net)); } - -static unsigned int inet_sk_ehashfn(const struct sock *sk) +/* This function handles inet_sock, but also timewait and request sockets + * for IPv4/IPv6. + */ +u32 sk_ehashfn(const struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); - const __be32 laddr = inet->inet_rcv_saddr; - const __u16 lport = inet->inet_num; - const __be32 faddr = inet->inet_daddr; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet_ehashfn(net, laddr, lport, faddr, fport); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return inet6_ehashfn(sock_net(sk), + &sk->sk_v6_rcv_saddr, sk->sk_num, + &sk->sk_v6_daddr, sk->sk_dport); +#endif + return inet_ehashfn(sock_net(sk), + sk->sk_rcv_saddr, sk->sk_num, + sk->sk_daddr, sk->sk_dport); } /* @@ -60,8 +64,8 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); - if (tb != NULL) { - write_pnet(&tb->ib_net, hold_net(net)); + if (tb) { + write_pnet(&tb->ib_net, net); tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; @@ -79,7 +83,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -263,11 +266,19 @@ void sock_gen_put(struct sock *sk) if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_free(inet_twsk(sk)); + else if (sk->sk_state == TCP_NEW_SYN_RECV) + reqsk_free(inet_reqsk(sk)); else sk_free(sk); } EXPORT_SYMBOL_GPL(sock_gen_put); +void sock_edemux(struct sk_buff *skb) +{ + sock_gen_put(skb->sk); +} +EXPORT_SYMBOL(sock_edemux); + struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, @@ -377,7 +388,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } @@ -400,13 +411,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; - spinlock_t *lock; struct inet_ehash_bucket *head; + spinlock_t *lock; int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); - sk->sk_hash = inet_sk_ehashfn(sk); + sk->sk_hash = sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); @@ -423,15 +434,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); -static void __inet_hash(struct sock *sk) +int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; - if (sk->sk_state != TCP_LISTEN) { - __inet_hash_nolisten(sk, NULL); - return; - } + if (sk->sk_state != TCP_LISTEN) + return __inet_hash_nolisten(sk, tw); WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -440,13 +449,15 @@ static void __inet_hash(struct sock *sk) __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); + return 0; } +EXPORT_SYMBOL(__inet_hash); void inet_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk); + __inet_hash(sk, NULL); local_bh_enable(); } } @@ -477,8 +488,7 @@ EXPORT_SYMBOL_GPL(inet_unhash); int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, struct inet_timewait_sock **), - int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) + struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->inet_num; @@ -548,14 +558,14 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - twrefcnt += hash(sk, tw); + twrefcnt += __inet_hash_nolisten(sk, tw); } if (tw) twrefcnt += inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head->lock); if (tw) { - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); while (twrefcnt) { twrefcnt--; inet_twsk_put(tw); @@ -570,7 +580,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - hash(sk, NULL); + __inet_hash_nolisten(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { @@ -590,7 +600,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), - __inet_check_established, __inet_hash_nolisten); + __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 6d592f8555fb..00ec8d5d7e7e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -67,9 +67,9 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, } /* Must be called with locally disabled BHs. */ -static void __inet_twsk_kill(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo) +static void inet_twsk_kill(struct inet_timewait_sock *tw) { + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; struct inet_bind_hashbucket *bhead; int refcnt; /* Unlink from established hashes. */ @@ -89,6 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); atomic_sub(refcnt, &tw->tw_refcnt); + atomic_dec(&tw->tw_dr->tw_count); + inet_twsk_put(tw); } void inet_twsk_free(struct inet_timewait_sock *tw) @@ -98,7 +100,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw) #ifdef SOCK_REFCNT_DEBUG pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - release_net(twsk_net(tw)); kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } @@ -169,16 +170,34 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) +void tw_timer_handler(unsigned long data) { - struct inet_timewait_sock *tw = - kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - GFP_ATOMIC); - if (tw != NULL) { + struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; + + if (tw->tw_kill) + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); + else + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); + inet_twsk_kill(tw); +} + +struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + struct inet_timewait_death_row *dr, + const int state) +{ + struct inet_timewait_sock *tw; + + if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets) + return NULL; + + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, + GFP_ATOMIC); + if (tw) { const struct inet_sock *inet = inet_sk(sk); kmemcheck_annotate_bitfield(tw, flags); + tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; @@ -195,14 +214,16 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_ipv6only = 0; tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; - twsk_net_set(tw, hold_net(sock_net(sk))); + atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); + twsk_net_set(tw, sock_net(sk)); + setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this * timewait socket. */ atomic_set(&tw->tw_refcnt, 0); - inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } @@ -210,139 +231,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat } EXPORT_SYMBOL_GPL(inet_twsk_alloc); -/* Returns non-zero if quota exceeded. */ -static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, - const int slot) -{ - struct inet_timewait_sock *tw; - unsigned int killed; - int ret; - - /* NOTE: compare this to previous version where lock - * was released after detaching chain. It was racy, - * because tw buckets are scheduled in not serialized context - * in 2.3 (with netfilter), and with softnet it is common, because - * soft irqs are not sequenced. - */ - killed = 0; - ret = 0; -rescan: - inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { - __inet_twsk_del_dead_node(tw); - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); -#endif - inet_twsk_put(tw); - killed++; - spin_lock(&twdr->death_lock); - if (killed > INET_TWDR_TWKILL_QUOTA) { - ret = 1; - break; - } - - /* While we dropped twdr->death_lock, another cpu may have - * killed off the next TW bucket in the list, therefore - * do a fresh re-read of the hlist head node with the - * lock reacquired. We still use the hlist traversal - * macro in order to get the prefetches. - */ - goto rescan; - } - - twdr->tw_count -= killed; -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); -#endif - return ret; -} - -void inet_twdr_hangman(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - unsigned int need_timer; - - twdr = (struct inet_timewait_death_row *)data; - spin_lock(&twdr->death_lock); - - if (twdr->tw_count == 0) - goto out; - - need_timer = 0; - if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { - twdr->thread_slots |= (1 << twdr->slot); - schedule_work(&twdr->twkill_work); - need_timer = 1; - } else { - /* We purged the entire slot, anything left? */ - if (twdr->tw_count) - need_timer = 1; - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); - } - if (need_timer) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); -out: - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_hangman); - -void inet_twdr_twkill_work(struct work_struct *work) -{ - struct inet_timewait_death_row *twdr = - container_of(work, struct inet_timewait_death_row, twkill_work); - int i; - - BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > - (sizeof(twdr->thread_slots) * 8)); - - while (twdr->thread_slots) { - spin_lock_bh(&twdr->death_lock); - for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { - if (!(twdr->thread_slots & (1 << i))) - continue; - - while (inet_twdr_do_twkill_work(twdr, i) != 0) { - if (need_resched()) { - spin_unlock_bh(&twdr->death_lock); - schedule(); - spin_lock_bh(&twdr->death_lock); - } - } - - twdr->thread_slots &= ~(1 << i); - } - spin_unlock_bh(&twdr->death_lock); - } -} -EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); - /* These are always called from BH context. See callers in * tcp_input.c to verify this. */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr) +void inet_twsk_deschedule(struct inet_timewait_sock *tw) { - spin_lock(&twdr->death_lock); - if (inet_twsk_del_dead_node(tw)) { - inet_twsk_put(tw); - if (--twdr->tw_count == 0) - del_timer(&twdr->tw_timer); - } - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); + if (del_timer_sync(&tw->tw_timer)) + inet_twsk_kill(tw); } EXPORT_SYMBOL(inet_twsk_deschedule); -void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo, const int timewait_len) +void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) { - struct hlist_head *list; - int slot; - /* timeout := RTO * 3.5 * * 3.5 = 1+2+0.5 to wait for two retransmits. @@ -367,115 +269,15 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ - slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - spin_lock(&twdr->death_lock); - - /* Unlink it, if it was scheduled */ - if (inet_twsk_del_dead_node(tw)) - twdr->tw_count--; - else + tw->tw_kill = timeo <= 4*HZ; + if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { atomic_inc(&tw->tw_refcnt); - - if (slot >= INET_TWDR_RECYCLE_SLOTS) { - /* Schedule to slow timer */ - if (timeo >= timewait_len) { - slot = INET_TWDR_TWKILL_SLOTS - 1; - } else { - slot = DIV_ROUND_UP(timeo, twdr->period); - if (slot >= INET_TWDR_TWKILL_SLOTS) - slot = INET_TWDR_TWKILL_SLOTS - 1; - } - tw->tw_ttd = inet_tw_time_stamp() + timeo; - slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); - list = &twdr->cells[slot]; - } else { - tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); - - if (twdr->twcal_hand < 0) { - twdr->twcal_hand = 0; - twdr->twcal_jiffie = jiffies; - twdr->twcal_timer.expires = twdr->twcal_jiffie + - (slot << INET_TWDR_RECYCLE_TICK); - add_timer(&twdr->twcal_timer); - } else { - if (time_after(twdr->twcal_timer.expires, - jiffies + (slot << INET_TWDR_RECYCLE_TICK))) - mod_timer(&twdr->twcal_timer, - jiffies + (slot << INET_TWDR_RECYCLE_TICK)); - slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - list = &twdr->twcal_row[slot]; + atomic_inc(&tw->tw_dr->tw_count); } - - hlist_add_head(&tw->tw_death_node, list); - - if (twdr->tw_count++ == 0) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); - spin_unlock(&twdr->death_lock); } EXPORT_SYMBOL_GPL(inet_twsk_schedule); -void inet_twdr_twcal_tick(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int n, slot; - unsigned long j; - unsigned long now = jiffies; - int killed = 0; - int adv = 0; - - twdr = (struct inet_timewait_death_row *)data; - - spin_lock(&twdr->death_lock); - if (twdr->twcal_hand < 0) - goto out; - - slot = twdr->twcal_hand; - j = twdr->twcal_jiffie; - - for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { - if (time_before_eq(j, now)) { - struct hlist_node *safe; - struct inet_timewait_sock *tw; - - inet_twsk_for_each_inmate_safe(tw, safe, - &twdr->twcal_row[slot]) { - __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); -#endif - inet_twsk_put(tw); - killed++; - } - } else { - if (!adv) { - adv = 1; - twdr->twcal_jiffie = j; - twdr->twcal_hand = slot; - } - - if (!hlist_empty(&twdr->twcal_row[slot])) { - mod_timer(&twdr->twcal_timer, j); - goto out; - } - } - j += 1 << INET_TWDR_RECYCLE_TICK; - slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - twdr->twcal_hand = -1; - -out: - if ((twdr->tw_count -= killed) == 0) - del_timer(&twdr->tw_timer); -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); -#endif - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); - void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { @@ -487,6 +289,7 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; restart_rcu: + cond_resched(); rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { @@ -508,7 +311,7 @@ restart: rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule(tw, twdr); + inet_twsk_deschedule(tw); local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index d9bc28ac5d1b..939992c456f3 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -57,7 +57,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) } -static int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -68,7 +68,7 @@ static int ip_forward_finish(struct sk_buff *skb) ip_forward_options(skb); skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } int ip_forward(struct sk_buff *skb) @@ -136,8 +136,8 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); - return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->dst.dev, ip_forward_finish); + return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 145a50c4d566..cc1da6d9cb35 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (pskb_pull(skb, ihl) == NULL) + if (!pskb_pull(skb, ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -537,7 +537,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - WARN_ON(head == NULL); + WARN_ON(!head); WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ @@ -559,7 +559,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) goto out_nomem; clone->next = head->next; head->next = clone; @@ -638,7 +639,8 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ - if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { + qp = ip_find(net, ip_hdr(skb), user); + if (qp) { int ret; spin_lock(&qp->q.lock); @@ -754,7 +756,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table = ip4_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; @@ -770,7 +772,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) } hdr = register_net_sysctl(net, "net/ipv4", table); - if (hdr == NULL) + if (!hdr) goto err_reg; net->ipv4.frags_hdr = hdr; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6207275fc749..5fd706473c73 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -182,7 +182,7 @@ static int ipgre_err(struct sk_buff *skb, u32 info, t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->daddr, iph->saddr, tpi->key); - if (t == NULL) + if (!t) return PACKET_REJECT; if (t->parms.iph.daddr == 0 || @@ -423,7 +423,7 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->dst.dev; ip_rt_put(rt); - if (__in_dev_get_rtnl(dev) == NULL) + if (!__in_dev_get_rtnl(dev)) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); @@ -456,6 +456,7 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_do_ioctl = ipgre_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; #define GRE_FEATURES (NETIF_F_SG | \ @@ -621,10 +622,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); @@ -686,6 +687,7 @@ static const struct net_device_ops gre_tap_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; static void ipgre_tap_setup(struct net_device *dev) @@ -776,8 +778,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || - nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || + nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || + nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || nla_put_u8(skb, IFLA_GRE_PMTUDISC, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3d4da2c16b6a..2db4c8773c1b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -187,7 +187,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } -static int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -203,7 +203,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) raw = raw_local_deliver(skb, protocol); ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot != NULL) { + if (ipprot) { int ret; if (!ipprot->no_policy) { @@ -253,7 +253,8 @@ int ip_local_deliver(struct sk_buff *skb) return 0; } - return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip_local_deliver_finish); } @@ -309,12 +310,12 @@ drop: int sysctl_ip_early_demux __read_mostly = 1; EXPORT_SYMBOL(sysctl_ip_early_demux); -static int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) { const struct net_protocol *ipprot; int protocol = iph->protocol; @@ -387,7 +388,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } @@ -450,7 +452,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip_rcv_finish); csum_error: diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 5b3d91be2db0..bd246792360b 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -264,7 +264,7 @@ int ip_options_compile(struct net *net, unsigned char *iph; int optlen, l; - if (skb != NULL) { + if (skb) { rt = skb_rtable(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a7aea2048a0d..c65b93a7b711 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -91,14 +91,19 @@ void ip_send_check(struct iphdr *iph) } EXPORT_SYMBOL(ip_send_check); -int __ip_local_out(struct sk_buff *skb) +int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL, + skb_dst(skb)->dev, dst_output_sk); +} + +int __ip_local_out(struct sk_buff *skb) +{ + return __ip_local_out_sk(skb->sk, skb); } int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) @@ -148,7 +153,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, sk); + ip_select_ident(sock_net(sk), skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -163,7 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); -static inline int ip_finish_output2(struct sk_buff *skb) +static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; @@ -182,7 +187,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (skb2 == NULL) { + if (!skb2) { kfree_skb(skb); return -ENOMEM; } @@ -211,7 +216,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip_finish_output_gso(struct sk_buff *skb) +static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) { netdev_features_t features; struct sk_buff *segs; @@ -220,7 +225,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) /* common case: locally created skb or seglen is <= mtu */ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. * @@ -243,7 +248,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = ip_fragment(segs, ip_finish_output2); + err = ip_fragment(sk, segs, ip_finish_output2); if (err && ret == 0) ret = err; @@ -253,22 +258,22 @@ static int ip_finish_output_gso(struct sk_buff *skb) return ret; } -static int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sock *sk, struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb_dst(skb)->xfrm != NULL) { + if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif if (skb_is_gso(skb)) - return ip_finish_output_gso(skb); + return ip_finish_output_gso(sk, skb); if (skb->len > ip_skb_dst_mtu(skb)) - return ip_fragment(skb, ip_finish_output2); + return ip_fragment(sk, skb, ip_finish_output2); - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) @@ -307,7 +312,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } @@ -322,11 +327,11 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb->dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -340,7 +345,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -376,12 +382,12 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) inet_opt = rcu_dereference(inet->inet_opt); fl4 = &fl->u.ip4; rt = skb_rtable(skb); - if (rt != NULL) + if (rt) goto packet_routed; /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); - if (rt == NULL) { + if (!rt) { __be32 daddr; /* Use correct destination address if we have options. */ @@ -430,7 +436,8 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); + ip_select_ident_segs(sock_net(sk), skb, sk, + skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -448,7 +455,6 @@ no_route: } EXPORT_SYMBOL(ip_queue_xmit); - static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; @@ -479,7 +485,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct iphdr *iph; int ptr; @@ -586,13 +593,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip_options_fragment(frag); offset += skb->len - hlen; iph->frag_off = htons(offset>>3); - if (frag->next != NULL) + if (frag->next) iph->frag_off |= htons(IP_MF); /* Ready, complete checksum */ ip_send_check(iph); } - err = output(skb); + err = output(sk, skb); if (!err) IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); @@ -636,10 +643,7 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ - /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header - */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* * Fragment the datagram. @@ -732,7 +736,7 @@ slow_path: ip_send_check(iph); - err = output(skb2); + err = output(sk, skb2); if (err) goto fail; @@ -792,12 +796,13 @@ static inline int ip_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(queue)) == NULL) { + skb = skb_peek_tail(queue); + if (!skb) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); - if (skb == NULL) + if (!skb) return err; /* reserve space for Hardware header */ @@ -814,7 +819,6 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; - __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -963,10 +967,10 @@ alloc_new_skb: skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, sk->sk_allocation); - if (unlikely(skb == NULL)) + if (unlikely(!skb)) err = -ENOBUFS; } - if (skb == NULL) + if (!skb) goto error; /* @@ -1090,10 +1094,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ opt = ipc->opt; if (opt) { - if (cork->opt == NULL) { + if (!cork->opt) { cork->opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); - if (unlikely(cork->opt == NULL)) + if (unlikely(!cork->opt)) return -ENOBUFS; } memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); @@ -1200,7 +1204,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EMSGSIZE; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + skb = skb_peek_tail(&sk->sk_write_queue); + if (!skb) return -EINVAL; cork->length += size; @@ -1211,7 +1216,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } - while (size > 0) { int i; @@ -1331,7 +1335,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, __be16 df = 0; __u8 ttl; - if ((skb = __skb_dequeue(queue)) == NULL) + skb = __skb_dequeue(queue); + if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); @@ -1382,7 +1387,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, sk); + ip_select_ident(net, skb, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5cd99271d3a6..7cfb0893f263 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -351,7 +351,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (new_ra == NULL) { + if (!new_ra) { spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } @@ -387,7 +387,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, skb_network_header(skb); serr->port = port; - if (skb_pull(skb, payload - skb->data) != NULL) { + if (skb_pull(skb, payload - skb->data)) { skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) return; @@ -482,7 +482,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) err = -EAGAIN; skb = sock_dequeue_err_skb(sk); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; @@ -536,12 +536,34 @@ out: * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket. */ +static bool setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_MSFILTER: + case IP_UNBLOCK_SOURCE: + case MCAST_BLOCK_SOURCE: + case MCAST_MSFILTER: + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_UNBLOCK_SOURCE: + return true; + } + return false; +} static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); int val = 0, err; + bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { case IP_PKTINFO: @@ -584,6 +606,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -1118,10 +1142,14 @@ mc_msf_out: break; } release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return err; e_inval: release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return -EINVAL; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2cd08280c77b..4c2c3ba4ba65 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -389,7 +389,6 @@ static int ip_tunnel_bind_dev(struct net_device *dev) hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } - dev->iflink = tunnel->parms.link; dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); @@ -655,7 +654,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (dst == 0) { /* NBMA tunnel */ - if (skb_dst(skb) == NULL) { + if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } @@ -673,7 +672,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) + if (!neigh) goto tx_error; addr6 = (const struct in6_addr *)&neigh->primary_key; @@ -783,7 +782,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -844,7 +843,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) case SIOCGETTUNNEL: if (dev == itn->fb_tunnel_dev) { t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) + if (!t) t = netdev_priv(dev); } memcpy(p, &t->parms, sizeof(*p)); @@ -877,7 +876,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -915,7 +914,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) if (dev == itn->fb_tunnel_dev) { err = -ENOENT; t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(itn->fb_tunnel_dev)) @@ -980,6 +979,14 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev) } EXPORT_SYMBOL(ip_tunnel_get_link_net); +int ip_tunnel_get_iflink(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->parms.link; +} +EXPORT_SYMBOL(ip_tunnel_get_iflink); + int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 88c386cf7d85..ce63ab21b6cd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,8 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); + __ip_select_ident(dev_net(rt->dst.dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 94efe148181c..9f7269f3c54a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -60,7 +60,7 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); - if (tunnel != NULL) { + if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; @@ -341,6 +341,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_do_ioctl = vti_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; static void vti_tunnel_setup(struct net_device *dev) @@ -361,7 +362,6 @@ static int vti_tunnel_init(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; - dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); @@ -456,10 +456,10 @@ static void vti_netlink_parms(struct nlattr *data[], parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); if (data[IFLA_VTI_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]); } @@ -505,8 +505,8 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_VTI_LINK, p->link); nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); - nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr); - nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr); + nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr); + nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr); return 0; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index c0855d50a3fa..d97f4f2787f5 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -63,7 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) struct xfrm_state *t; t = xfrm_state_alloc(net); - if (t == NULL) + if (!t) goto out; t->id.proto = IPPROTO_IPIP; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b26376ef87f6..8e7328c6a390 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -504,7 +504,8 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (!net_eq(dev_net(dev), &init_net)) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct arphdr))) @@ -958,7 +959,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; if (!pskb_may_pull(skb, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 915d215a7d14..ff96396ebec5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -144,7 +144,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (t == NULL) + if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { @@ -272,6 +272,7 @@ static const struct net_device_ops ipip_netdev_ops = { .ndo_do_ioctl = ipip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; #define IPIP_FEATURES (NETIF_F_SG | \ @@ -286,7 +287,6 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL; dev->flags = IFF_NOARP; - dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); @@ -325,10 +325,10 @@ static void ipip_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); + parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); + parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -450,8 +450,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fe54eba6d00d..3a2c0162c3ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -73,9 +73,7 @@ struct mr_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock __rcu *mroute_sk; struct timer_list ipmr_expire_timer; @@ -191,7 +189,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, } mrt = ipmr_get_table(rule->fr_net, rule->table); - if (mrt == NULL) + if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; @@ -255,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) { + if (!mrt) { err = -ENOMEM; goto err1; } @@ -323,11 +321,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) unsigned int i; mrt = ipmr_get_table(net, id); - if (mrt != NULL) + if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (mrt == NULL) + if (!mrt) return NULL; write_pnet(&mrt->net, net); mrt->id = id; @@ -429,7 +427,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) + if (!in_dev) goto failure; ipv4_devconf_setall(in_dev); @@ -480,8 +478,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static int reg_vif_get_iflink(const struct net_device *dev) +{ + return 0; +} + static const struct net_device_ops reg_vif_netdev_ops = { .ndo_start_xmit = reg_vif_xmit, + .ndo_get_iflink = reg_vif_get_iflink, }; static void reg_vif_setup(struct net_device *dev) @@ -507,7 +511,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -516,7 +520,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) free_netdev(dev); return NULL; } - dev->iflink = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -764,7 +767,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); - if (dev && __in_dev_get_rtnl(dev) == NULL) { + if (dev && !__in_dev_get_rtnl(dev)) { dev_put(dev); return -EADDRNOTAVAIL; } @@ -808,7 +811,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -1010,7 +1013,7 @@ static int ipmr_cache_report(struct mr_table *mrt, rcu_read_lock(); mroute_sk = rcu_dereference(mrt->mroute_sk); - if (mroute_sk == NULL) { + if (!mroute_sk) { rcu_read_unlock(); kfree_skb(skb); return -EINVAL; @@ -1163,7 +1166,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return -EINVAL; c = ipmr_cache_alloc(); - if (c == NULL) + if (!c) return -ENOMEM; c->mfc_origin = mfc->mfcc_origin.s_addr; @@ -1285,7 +1288,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -EOPNOTSUPP; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT_INIT) { @@ -1448,7 +1451,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return -EOPNOTSUPP; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT_VERSION && @@ -1494,7 +1497,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1568,7 +1571,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1649,7 +1652,8 @@ static struct notifier_block ip_mr_notifier = { * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) +static void ip_encap(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr) { struct iphdr *iph; const struct iphdr *old_iph = ip_hdr(skb); @@ -1668,14 +1672,14 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); nf_reset(skb); } -static inline int ipmr_forward_finish(struct sk_buff *skb) +static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -1685,7 +1689,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -1702,7 +1706,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct flowi4 fl4; int encap = 0; - if (vif->dev == NULL) + if (!vif->dev) goto out_free; #ifdef CONFIG_IP_PIMSM @@ -1765,7 +1769,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * What do we do with netfilter? -- RR */ if (vif->flags & VIFF_TUNNEL) { - ip_encap(skb, vif->local, vif->remote); + ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; @@ -1784,7 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, + NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ipmr_forward_finish); return; @@ -1993,7 +1998,7 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); - if (cache == NULL) { + if (!cache) { int vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) @@ -2004,13 +2009,13 @@ int ip_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache == NULL) { + if (!cache) { int vif; if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); ip_local_deliver(skb); - if (skb2 == NULL) + if (!skb2) return -ENOBUFS; skb = skb2; } @@ -2069,7 +2074,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; read_unlock(&mrt_lock); - if (reg_dev == NULL) + if (!reg_dev) return 1; skb->mac_header = skb->network_header; @@ -2199,18 +2204,18 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, int err; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); - if (cache == NULL && skb->dev) { + if (!cache && skb->dev) { int vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, daddr, vif); } - if (cache == NULL) { + if (!cache) { struct sk_buff *skb2; struct iphdr *iph; struct net_device *dev; @@ -2268,7 +2273,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2287,8 +2292,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || - nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) + if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || + nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; err = __ipmr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ @@ -2333,7 +2338,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); @@ -2448,7 +2453,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct mr_table *mrt; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); iter->mrt = mrt; @@ -2567,7 +2572,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) struct mr_table *mrt; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); it->mrt = mrt; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 7ebd6e37875c..65de0684e22a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -94,7 +94,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, { struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); rt_info->tos = iph->tos; @@ -109,7 +109,7 @@ static int nf_ip_reroute(struct sk_buff *skb, { const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos && diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 59f883d9cadf..fb20f363151f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,24 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config NF_LOG_ARP - tristate "ARP packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON - -config NF_LOG_IPV4 - tristate "IPv4 packet logging" - default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON +if NF_TABLES config NF_TABLES_IPV4 - depends on NF_TABLES tristate "IPv4 nf_tables support" help This option enables the IPv4 support for nf_tables. +if NF_TABLES_IPV4 + config NFT_CHAIN_ROUTE_IPV4 - depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" help This option enables the "route" chain for IPv4 in nf_tables. This @@ -61,22 +53,34 @@ config NFT_CHAIN_ROUTE_IPV4 fields such as the source, destination, type of service and the packet mark. -config NF_REJECT_IPV4 - tristate "IPv4 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 select NF_REJECT_IPV4 default NFT_REJECT tristate +endif # NF_TABLES_IPV4 + config NF_TABLES_ARP - depends on NF_TABLES tristate "ARP nf_tables support" help This option enables the ARP support for nf_tables. +endif # NF_TABLES + +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_NAT_IPV4 tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f95b6f93814b..13bfe84bf3ca 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -248,8 +248,7 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -265,8 +264,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; local_bh_disable(); addend = xt_write_recseq_begin(); @@ -281,8 +280,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.hooknum = hook; acpar.family = NFPROTO_ARP; acpar.hotdrop = false; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 802ddecb30b8..93876d03120c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -28,12 +28,11 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return arpt_do_table(skb, ops->hooknum, in, out, + return arpt_do_table(skb, ops->hooknum, state, net->ipv4.arptable_filter); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cf5e82f39d3b..c69db7fa25ee 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -288,8 +288,7 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -306,8 +305,8 @@ ipt_do_table(struct sk_buff *skb, /* Initialization */ ip = ip_hdr(skb); - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -317,8 +316,8 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.family = NFPROTO_IPV4; acpar.hooknum = hook; @@ -370,7 +369,7 @@ ipt_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + trace_packet(skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e90f83a3415b..771ab3d01ad3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + if (!par->net->xt.clusterip_deprecated_warning) { + pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " + "use xt_cluster instead\n"); + par->net->xt.clusterip_deprecated_warning = true; + } + return ret; } @@ -497,14 +504,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; - struct net *net = dev_net(in ? in : out); + struct net *net = dev_net(state->in ? state->in : state->out); /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -529,10 +534,10 @@ arp_mangle(const struct nf_hook_ops *ops, * addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ - if (c->dev != out) { + if (c->dev != state->out) { pr_debug("not mangling arp reply on different " "interface: cip'%s'-skb'%s'\n", - c->dev->name, out->name); + c->dev->name, state->out->name); clusterip_config_put(c); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8f48f5517e33..87907d4bd259 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,31 +34,32 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; + int hook = par->hooknum; switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - nf_send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH, hook); break; case IPT_ICMP_HOST_UNREACHABLE: - nf_send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); break; case IPT_ICMP_PROT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); break; case IPT_ICMP_PORT_UNREACHABLE: - nf_send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); break; case IPT_ICMP_NET_PROHIBITED: - nf_send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO, hook); break; case IPT_ICMP_HOST_PROHIBITED: - nf_send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO, hook); break; case IPT_ICMP_ADMIN_PROHIBITED: - nf_send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(skb, par->hooknum); + nf_send_reset(skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index a313c3fbeb46..e9e67793055f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -300,11 +300,9 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index e08a74a243a8..a0f3beca52d2 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -34,8 +34,7 @@ static const struct xt_table packet_filter = { static unsigned int iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -45,9 +44,8 @@ iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, - net->ipv4.iptable_filter); + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 6a5079c34bb3..62cbb8c5f4a8 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -37,8 +37,9 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) +ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { + struct net_device *out = state->out; unsigned int ret; const struct iphdr *iph; u_int8_t tos; @@ -58,7 +59,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, + ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { @@ -81,18 +82,16 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) static unsigned int iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ipt_mangle_out(skb, out); + return ipt_mangle_out(skb, state); if (ops->hooknum == NF_INET_POST_ROUTING) - return ipt_do_table(skb, ops->hooknum, in, out, - dev_net(out)->ipv4.iptable_mangle); + return ipt_do_table(skb, ops->hooknum, state, + dev_net(state->out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, ops->hooknum, in, out, - dev_net(in)->ipv4.iptable_mangle); + return ipt_do_table(skb, ops->hooknum, state, + dev_net(state->in)->ipv4.iptable_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 6b67d7e9a75d..0d4d9cdf98a4 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -30,49 +30,40 @@ static const struct xt_table nf_nat_ipv4_table = { static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.nat_table); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table); } static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index b2f7e8f98316..0356e6da4bb7 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -21,8 +21,7 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -32,8 +31,8 @@ iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index c86647ed2078..4bce3980ccd9 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -38,9 +38,7 @@ static const struct xt_table security_table = { static unsigned int iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -50,8 +48,8 @@ iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* Somebody is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5c61328b7704..30ad9554b5e9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -94,9 +94,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -123,9 +121,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops, static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -149,24 +145,20 @@ out: static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb); } static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so @@ -322,8 +314,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || - nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) goto nla_put_failure; return 0; @@ -342,8 +334,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) return -EINVAL; - t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]); - t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]); + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); return 0; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index a460a87e14f8..f0dfe92a00d6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -300,7 +300,9 @@ static int exp_seq_show(struct seq_file *s, void *v) __nf_ct_l3proto_find(exp->tuple.src.l3num), __nf_ct_l4proto_find(exp->tuple.src.l3num, exp->tuple.dst.protonum)); - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 7e5ca6f2d0cd..c88b7d434718 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -63,9 +63,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(skb->sk); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index d059182c1466..e7ad950cf9ef 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,8 +10,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 75101980eeee..076aadda0473 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -26,7 +28,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index fc37711e11f3..e59cc05c09e9 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -256,11 +256,10 @@ EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { struct nf_conn *ct; @@ -309,7 +308,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, in, out, ct); + ret = do_chain(ops, skb, state, ct); if (ret != NF_ACCEPT) return ret; @@ -323,7 +322,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, + state->out)) goto oif_changed; } break; @@ -332,7 +332,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) goto oif_changed; } @@ -346,17 +346,16 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -367,11 +366,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { #ifdef CONFIG_XFRM @@ -386,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -410,11 +408,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { const struct nf_conn *ct; @@ -427,7 +424,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 536da7bc598a..3262e41ff76f 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -13,6 +13,7 @@ #include <net/dst.h> #include <net/netfilter/ipv4/nf_reject.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv4/nf_reject.h> const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, @@ -43,7 +44,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - __be16 protocol, int ttl) + __u8 protocol, int ttl) { struct iphdr *niph, *oiph = ip_hdr(oldskb); @@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); niph->tot_len = htons(nskb->len); ip_send_check(niph); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), @@ -164,4 +166,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset); +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) +{ + struct iphdr *iph = ip_hdr(skb_in); + u8 proto; + + if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + return; + + if (skb_csum_unnecessary(skb_in)) { + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); + return; + } + + if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) + proto = iph->protocol; + else + proto = 0; + + if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach); + MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 19412a4063fb..8412268bbad1 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -17,13 +17,11 @@ static unsigned int nft_do_chain_arp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, ops, skb, in, out); + nft_set_pktinfo(&pkt, ops, skb, state); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 6820c8c40842..aa180d3a69a5 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -20,22 +20,18 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); return nft_do_chain(&pkt, ops); } static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { @@ -45,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv4(ops, skb, in, out, okfn); + return nft_do_chain_ipv4(ops, skb, state); } struct nft_af_info nft_af_ipv4 __read_mostly = { diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index df547bf50078..bf5c30ae14e4 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -28,51 +28,42 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); return nft_do_chain(&pkt, ops); } static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv4 = { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 125b66766c0a..e335b0afdaf3 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -23,9 +23,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { unsigned int ret; struct nft_pktinfo pkt; @@ -39,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 665de06561cd..40e414c4ca56 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -17,20 +17,17 @@ #include <net/netfilter/ipv4/nf_nat_masquerade.h> static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); range.flags = priv->flags; - verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, - &range, pkt->out); - - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, + &range, pkt->out); } static struct nft_expr_type nft_masq_ipv4_type; diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 6ecfce63201a..d8d795df9c13 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -18,26 +18,25 @@ #include <net/netfilter/nft_redir.h> static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_ipv4_multi_range_compat mr; - unsigned int verdict; memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { mr.range[0].min.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; mr.range[0].max.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } mr.range[0].flags |= priv->flags; - verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum); - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, + pkt->ops->hooknum); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index d729542bd1b7..b07e58b51158 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -20,21 +20,24 @@ #include <net/netfilter/nft_reject.h> static void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); break; + default: + break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static struct nft_expr_type nft_reject_ipv4_type; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 208d5439e59b..a93f260cf24c 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -64,11 +64,11 @@ EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; -static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) +static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { - int res = (num + net_hash_mix(net)) & mask; + u32 res = (num + net_hash_mix(net)) & mask; - pr_debug("hash(%d) = %d\n", num, res); + pr_debug("hash(%u) = %u\n", num, res); return res; } EXPORT_SYMBOL_GPL(ping_hash); @@ -516,7 +516,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) ntohs(icmph->un.echo.sequence)); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); - if (sk == NULL) { + if (!sk) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ } @@ -692,8 +692,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -849,8 +848,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; @@ -972,7 +971,7 @@ bool ping_rcv(struct sk_buff *skb) skb_push(skb, skb->data - (u8 *)icmph); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); - if (sk != NULL) { + if (sk) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d8953ef0770c..e1f3b911dd1e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -63,7 +63,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - tcp_death_row.tw_count, sockets, + atomic_read(&tcp_death_row.tw_count), sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4a356b7c081b..561cd4b8fc6e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - if (raw_sk != NULL) { + if (raw_sk) { iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); @@ -362,7 +362,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); @@ -403,7 +403,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -411,8 +411,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) @@ -480,8 +480,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; @@ -708,8 +707,8 @@ out: return ret; * we return it, otherwise we block. */ -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -872,7 +871,7 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) + if (skb) amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ad5064362c5c..a78540f28276 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -152,7 +152,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ipv4_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, @@ -483,7 +482,7 @@ u32 ip_idents_reserve(u32 hash, int segs) } EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, int segs) +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; @@ -492,7 +491,7 @@ void __ip_select_ident(struct iphdr *iph, int segs) hash = jhash_3words((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol, + iph->protocol ^ net_hash_mix(net), ip_idents_hashrnd); id = ip_idents_reserve(hash, segs); iph->id = htons(id); @@ -1057,7 +1056,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; - if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { + if (odst->obsolete && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1451,7 +1450,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* Primary sanity checks. */ - if (in_dev == NULL) + if (!in_dev) return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || @@ -1554,7 +1553,7 @@ static int __mkroute_input(struct sk_buff *skb, /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); - if (out_dev == NULL) { + if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; } @@ -1592,7 +1591,7 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe != NULL) + if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); @@ -2055,7 +2054,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) ipv4_is_lbcast(fl4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = __ip_dev_find(net, fl4->saddr, false); - if (dev_out == NULL) + if (!dev_out) goto out; /* Special hack: user can direct multicasts @@ -2088,7 +2087,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (fl4->flowi4_oif) { dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); rth = ERR_PTR(-ENODEV); - if (dev_out == NULL) + if (!dev_out) goto out; /* RACE: Check return value of inet_select_addr instead. */ @@ -2225,7 +2224,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .check = ipv4_blackhole_dst_check, .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, @@ -2301,7 +2299,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 metrics[RTAX_MAX]; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); @@ -2321,11 +2319,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (IPCB(skb)->flags & IPSKB_DOREDIRECT) r->rtm_flags |= RTCF_DOREDIRECT; - if (nla_put_be32(skb, RTA_DST, dst)) + if (nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; - if (nla_put_be32(skb, RTA_SRC, src)) + if (nla_put_in_addr(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && @@ -2338,11 +2336,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, #endif if (!rt_is_input_route(rt) && fl4->saddr != src) { - if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) + if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } if (rt->rt_uses_gateway && - nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; expires = rt->dst.expires; @@ -2423,7 +2421,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rtm = nlmsg_data(nlh); skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } @@ -2438,8 +2436,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) ip_hdr(skb)->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; @@ -2454,7 +2452,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net_device *dev; dev = __dev_get_by_index(net, iif); - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto errout_free; } @@ -2653,7 +2651,7 @@ static __net_init int sysctl_route_net_init(struct net *net) tbl = ipv4_route_flush_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); - if (tbl == NULL) + if (!tbl) goto err_dup; /* Don't export sysctls to unprivileged users */ @@ -2663,7 +2661,7 @@ static __net_init int sysctl_route_net_init(struct net *net) tbl[0].extra1 = net; net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); - if (net->ipv4.route_hdr == NULL) + if (!net->ipv4.route_hdr) goto err_reg; return 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 45fe60c5238e..df849e5a10f1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -219,19 +219,20 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_check); -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ if (!req) goto out; @@ -336,8 +337,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -345,7 +346,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; - treq->listener = NULL; + treq->tfo_listener = false; + + ireq->ir_iif = sk->sk_bound_dev_if; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -357,7 +360,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; } - req->expires = 0UL; req->num_retrans = 0; /* diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d151539da8e6..c3852a7ff3c7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -883,6 +883,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_probe_threshold", + .data = &init_net.ipv4.sysctl_tcp_probe_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_probe_interval", + .data = &init_net.ipv4.sysctl_tcp_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; @@ -895,7 +909,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) int i; table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; /* Update the variables to point into the current struct net */ @@ -904,7 +918,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); - if (net->ipv4.ipv4_hdr == NULL) + if (!net->ipv4.ipv4_hdr) goto err_reg; net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); @@ -942,7 +956,7 @@ static __init int sysctl_ipv4_init(void) struct ctl_table_header *hdr; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); - if (hdr == NULL) + if (!hdr) return -ENOMEM; if (register_pernet_subsys(&ipv4_sysctl_ops)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 995a2259bcfc..18e3a12eb1b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -496,7 +496,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected or passive Fast Open socket? */ if (sk->sk_state != TCP_SYN_SENT && - (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { + (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -1028,7 +1028,7 @@ static inline int select_size(const struct sock *sk, bool sg) void tcp_free_fastopen_req(struct tcp_sock *tp) { - if (tp->fastopen_req != NULL) { + if (tp->fastopen_req) { kfree(tp->fastopen_req); tp->fastopen_req = NULL; } @@ -1042,12 +1042,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) return -EOPNOTSUPP; - if (tp->fastopen_req != NULL) + if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), sk->sk_allocation); - if (unlikely(tp->fastopen_req == NULL)) + if (unlikely(!tp->fastopen_req)) return -ENOBUFS; tp->fastopen_req->data = msg; tp->fastopen_req->size = size; @@ -1060,8 +1060,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1120,7 +1119,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (iov_iter_count(&msg->msg_iter)) { + while (msg_data_left(msg)) { int copy = 0; int max = size_goal; @@ -1164,8 +1163,8 @@ new_segment: } /* Try to append data to the end of skb. */ - if (copy > iov_iter_count(&msg->msg_iter)) - copy = iov_iter_count(&msg->msg_iter); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); /* Where to copy to? */ if (skb_availroom(skb) > 0) { @@ -1222,7 +1221,7 @@ new_segment: tcp_skb_pcount_set(skb, 0); copied += copy; - if (!iov_iter_count(&msg->msg_iter)) { + if (!msg_data_left(msg)) { tcp_tx_timestamp(sk, skb); goto out; } @@ -1539,8 +1538,8 @@ EXPORT_SYMBOL(tcp_read_sock); * Probably, code can be easily improved even more. */ -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; @@ -1914,18 +1913,19 @@ EXPORT_SYMBOL_GPL(tcp_set_state); static const unsigned char new_state[16] = { /* current state: new state: action: */ - /* (Invalid) */ TCP_CLOSE, - /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_SYN_SENT */ TCP_CLOSE, - /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, - /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, - /* TCP_TIME_WAIT */ TCP_CLOSE, - /* TCP_CLOSE */ TCP_CLOSE, - /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, - /* TCP_LAST_ACK */ TCP_LAST_ACK, - /* TCP_LISTEN */ TCP_CLOSE, - /* TCP_CLOSING */ TCP_CLOSING, + [0 /* (Invalid) */] = TCP_CLOSE, + [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_SYN_SENT] = TCP_CLOSE, + [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, + [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, + [TCP_TIME_WAIT] = TCP_CLOSE, + [TCP_CLOSE] = TCP_CLOSE, + [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, + [TCP_LAST_ACK] = TCP_LAST_ACK, + [TCP_LISTEN] = TCP_CLOSE, + [TCP_CLOSING] = TCP_CLOSING, + [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; static int tcp_close_state(struct sock *sk) @@ -2138,7 +2138,7 @@ adjudge_to_death: * aborted (e.g., closed with unread data) before 3WHS * finishes. */ - if (req != NULL) + if (req) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } @@ -2776,7 +2776,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - if (icsk->icsk_accept_queue.fastopenq != NULL) + if (icsk->icsk_accept_queue.fastopenq) val = icsk->icsk_accept_queue.fastopenq->max_qlen; else val = 0; @@ -2960,7 +2960,7 @@ void tcp_done(struct sock *sk) tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); - if (req != NULL) + if (req) reqsk_fastopen_remove(sk, req, false); sk->sk_shutdown = SHUTDOWN_MASK; @@ -3001,12 +3001,11 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - struct sk_buff *skb = NULL; unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 62856e185a93..7a5ae50c80c8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -83,7 +83,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); - pr_info("%s registered\n", ca->name); + pr_debug("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d73f9ddb55b..79b34a0f4a4a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -29,18 +29,18 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } - if (info != NULL) + if (info) tcp_get_info(sk, info); } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ea82fd492c1b..e3d87aca6be8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -141,7 +141,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + if (!child) return false; spin_lock(&queue->fastopenq->lock); @@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, tp = tcp_sk(child); tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; + tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. @@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); + atomic_set(&req->rsk_refcnt, 1); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, child); @@ -218,10 +214,9 @@ static bool tcp_fastopen_create_child(struct sock *sk, sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); - WARN_ON(req->sk == NULL); + WARN_ON(!req->sk); return true; } -EXPORT_SYMBOL(tcp_fastopen_create_child); static bool tcp_fastopen_queue_check(struct sock *sk) { @@ -238,14 +233,14 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * temporarily vs a server not supporting Fast Open at all. */ fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - if (fastopenq == NULL || fastopenq->max_qlen == 0) + if (!fastopenq || fastopenq->max_qlen == 0) return false; if (fastopenq->qlen >= fastopenq->max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { spin_unlock(&fastopenq->lock); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); @@ -254,7 +249,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); - reqsk_free(req1); + reqsk_put(req1); } return true; } @@ -308,6 +303,7 @@ fastopen: } else if (foc->len > 0) /* Client presents an invalid cookie */ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + valid_foc.exp = foc->exp; *foc = valid_foc; return false; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f501ac048366..a7ef679dd3ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -866,7 +866,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if ((tp->retransmit_skb_hint == NULL) || + if (!tp->retransmit_skb_hint || before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) tp->retransmit_skb_hint = skb; @@ -1256,7 +1256,7 @@ static u8 tcp_sacktag_one(struct sock *sk, fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && + if (!tcp_is_fack(tp) && tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; @@ -1535,7 +1535,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - if ((next_dup != NULL) && + if (next_dup && before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { in_sack = tcp_match_skb_to_sack(sk, skb, next_dup->start_seq, @@ -1551,7 +1551,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, if (in_sack <= 0) { tmp = tcp_shift_skb_data(sk, skb, state, start_seq, end_seq, dup_sack); - if (tmp != NULL) { + if (tmp) { if (tmp != skb) { skb = tmp; continue; @@ -1614,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, struct tcp_sacktag_state *state, u32 skip_to_seq) { - if (next_dup == NULL) + if (!next_dup) return skb; if (before(next_dup->start_seq, skip_to_seq)) { @@ -1783,7 +1783,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (tcp_highest_sack_seq(tp) == cache->end_seq) { /* ...but better entrypoint exists! */ skb = tcp_highest_sack(sk); - if (skb == NULL) + if (!skb) break; state.fack_count = tp->fackets_out; cache++; @@ -1798,7 +1798,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (!before(start_seq, tcp_highest_sack_seq(tp))) { skb = tcp_highest_sack(sk); - if (skb == NULL) + if (!skb) break; state.fack_count = tp->fackets_out; } @@ -3099,17 +3099,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; - } else { + } else if (!(sacked & TCPCB_SACKED_ACKED)) { last_ackt = skb->skb_mstamp; WARN_ON_ONCE(last_ackt.v64 == 0); if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) { - reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; - } + reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) @@ -3322,6 +3320,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } +/* Return true if we're currently rate-limiting out-of-window ACKs and + * thus shouldn't send a dupack right now. We rate-limit dupacks in + * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS + * attacks that send repeated SYNs or ACKs for the same connection. To + * do this, we do not send a duplicate SYNACK or ACK if the remote + * endpoint is sending out-of-window SYNs or pure ACKs at a high rate. + */ +bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, + int mib_idx, u32 *last_oow_ack_time) +{ + /* Data packets without SYNs are not likely part of an ACK loop. */ + if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && + !tcp_hdr(skb)->syn) + goto not_rate_limited; + + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + + if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + NET_INC_STATS_BH(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } + } + + *last_oow_ack_time = tcp_time_stamp; + +not_rate_limited: + return false; /* not rate-limited: go ahead, send dupack now! */ +} + /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) { @@ -3573,6 +3601,23 @@ old_ack: return 0; } +static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, + bool syn, struct tcp_fastopen_cookie *foc, + bool exp_opt) +{ + /* Valid only in SYN or SYN-ACK with an even length. */ + if (!foc || !syn || len < 0 || (len & 1)) + return; + + if (len >= TCP_FASTOPEN_COOKIE_MIN && + len <= TCP_FASTOPEN_COOKIE_MAX) + memcpy(foc->val, cookie, len); + else if (len != 0) + len = -1; + foc->len = len; + foc->exp = exp_opt; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -3662,21 +3707,22 @@ void tcp_parse_options(const struct sk_buff *skb, */ break; #endif + case TCPOPT_FASTOPEN: + tcp_parse_fastopen_option( + opsize - TCPOLEN_FASTOPEN_BASE, + ptr, th->syn, foc, false); + break; + case TCPOPT_EXP: /* Fast Open option shares code 254 using a - * 16 bits magic number. It's valid only in - * SYN or SYN-ACK with an even size. + * 16 bits magic number. */ - if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || - get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || - foc == NULL || !th->syn || (opsize & 1)) - break; - foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; - if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && - foc->len <= TCP_FASTOPEN_COOKIE_MAX) - memcpy(foc->val, ptr + 2, foc->len); - else if (foc->len != 0) - foc->len = -1; + if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && + get_unaligned_be16(ptr) == + TCPOPT_FASTOPEN_MAGIC) + tcp_parse_fastopen_option(opsize - + TCPOLEN_EXP_FASTOPEN_BASE, + ptr + 2, th->syn, foc, true); break; } @@ -4640,7 +4686,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) struct sk_buff *head; u32 start, end; - if (skb == NULL) + if (!skb) return; start = TCP_SKB_CB(skb)->seq; @@ -5095,7 +5141,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (unlikely(sk->sk_rx_dst == NULL)) + if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. @@ -5292,7 +5338,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); - if (skb != NULL) { + if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } @@ -5330,8 +5376,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; - u16 mss = tp->rx_opt.mss_clamp; - bool syn_drop; + u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; + bool syn_drop = false; if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; @@ -5343,16 +5389,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } - if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ + if (!tp->syn_fastopen) { + /* Ignore an unsolicited cookie */ cookie->len = -1; + } else if (tp->total_retrans) { + /* SYN timed out and the SYN-ACK neither has a cookie nor + * acknowledges data. Presumably the remote received only + * the retransmitted (regular) SYNs: either the original + * SYN-data or the corresponding SYN-ACK was dropped. + */ + syn_drop = (cookie->len < 0 && data); + } else if (cookie->len < 0 && !tp->syn_data) { + /* We requested a cookie but didn't get it. If we did not use + * the (old) exp opt format then try so next time (try_exp=1). + * Otherwise we go back to use the RFC7413 opt (try_exp=2). + */ + try_exp = tp->syn_fastopen_exp ? 2 : 1; + } - /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably - * the remote receives only the retransmitted (regular) SYNs: either - * the original SYN-data or the corresponding SYN-ACK is lost. - */ - syn_drop = (cookie->len <= 0 && data && tp->total_retrans); - - tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); + tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); if (data) { /* Retransmit unacked data in SYN */ tcp_for_write_queue_from(data, sk) { @@ -5661,11 +5716,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } req = tp->fastopen_rsk; - if (req != NULL) { + if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (tcp_check_req(sk, skb, req, NULL, true) == NULL) + if (!tcp_check_req(sk, skb, req, true)) goto discard; } @@ -5751,7 +5806,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * ACK we have received, this would have acknowledged * our SYNACK so stop the SYNACK timer. */ - if (req != NULL) { + if (req) { /* Return RST if ack_seq is invalid. * Note that RFC793 only says to generate a * DUPACK for it but for TCP Fast Open it seems @@ -5913,6 +5968,80 @@ static void tcp_ecn_create_request(struct request_sock *req, inet_rsk(req)->ecn_ok = 1; } +static void tcp_openreq_init(struct request_sock *req, + const struct tcp_options_received *rx_opt, + struct sk_buff *skb, const struct sock *sk) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ + req->cookie_ts = 0; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_rsk(req)->last_oow_ack_time = 0; + req->mss = rx_opt->mss_clamp; + req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->sack_ok = rx_opt->sack_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + ireq->wscale_ok = rx_opt->wscale_ok; + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->ir_rmt_port = tcp_hdr(skb)->source; + ireq->ir_num = ntohs(tcp_hdr(skb)->dest); + ireq->ir_mark = inet_request_mark(sk, skb); +} + +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener) +{ + struct request_sock *req = reqsk_alloc(ops, sk_listener); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); + ireq->opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + ireq->ireq_family = sk_listener->sk_family; + } + + return req; +} +EXPORT_SYMBOL(inet_reqsk_alloc); + +/* + * Return true if a syncookie should be sent + */ +static bool tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) +{ + const char *msg = "Dropping request"; + bool want_cookie = false; + struct listen_sock *lopt; + +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + msg = "Sending cookies"; + want_cookie = true; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else +#endif + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -5950,7 +6079,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - req = inet_reqsk_alloc(rsk_ops); + req = inet_reqsk_alloc(rsk_ops, sk); if (!req) goto drop; @@ -5967,6 +6096,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + /* Note: tcp_v6_init_req() might override ir_iif for link locals */ + inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + af_ops->init_req(req, sk, skb); if (security_inet_conn_request(sk, skb, req)) @@ -6039,7 +6171,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (err || want_cookie) goto drop_and_free; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f1756ee02207..3571f2be4470 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -122,7 +122,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (twp == NULL || (sysctl_tcp_tw_reuse && + (!twp || (sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet->inet_saddr) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; + sk_rcv_saddr_set(sk, inet->inet_saddr); if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ @@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } + +/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ +void tcp_req_err(struct sock *sk, u32 seq) +{ + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* ICMPs are not backlogged, hence we cannot get + * an established socket here. + */ + WARN_ON(req->sk); + + if (seq != tcp_rsk(req)->snt_isn) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); + } else { + /* + * Still in SYN_RECV, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + } +} +EXPORT_SYMBOL(tcp_req_err); + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) int err; struct net *net = dev_net(icmp_skb->dev); - sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, - iph->saddr, th->source, inet_iif(icmp_skb)); + sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, + th->dest, iph->saddr, ntohs(th->source), + inet_iif(icmp_skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; @@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -458,42 +489,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet_csk_search_req(sk, &prev, th->dest, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - an established socket here. - */ - WARN_ON(req->sk); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - /* - * Still in SYN_RECV, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req, prev); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * is already accepted it is treated as a connected one below. */ - if (fastopen && fastopen->sk == NULL) + if (fastopen && !fastopen->sk) break; if (!sock_owned_by_user(sk)) { @@ -647,7 +648,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -855,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -/* - * Return true if a syncookie should be sent - */ -bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto) -{ - const char *msg = "Dropping request"; - bool want_cookie = false; - struct listen_sock *lopt; - -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - msg = "Sending cookies"; - want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); - } else -#endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); - } - return want_cookie; -} -EXPORT_SYMBOL(tcp_syn_flood_action); #ifdef CONFIG_TCP_MD5SIG /* @@ -897,10 +869,10 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; unsigned int size = sizeof(struct in_addr); - struct tcp_md5sig_info *md5sig; + const struct tcp_md5sig_info *md5sig; /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, @@ -923,24 +895,15 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { - union tcp_md5_addr *addr; + const union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); -static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; - return tcp_md5_do_lookup(sk, addr, AF_INET); -} - /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) @@ -1101,8 +1064,8 @@ clear_hash_noput: return 1; } -int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - const struct sock *sk, const struct request_sock *req, +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; @@ -1110,12 +1073,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; - if (sk) { - saddr = inet_sk(sk)->inet_saddr; - daddr = inet_sk(sk)->inet_daddr; - } else if (req) { - saddr = inet_rsk(req)->ir_loc_addr; - daddr = inet_rsk(req)->ir_rmt_addr; + if (sk) { /* valid for establish/request sockets */ + saddr = sk->sk_rcv_saddr; + daddr = sk->sk_daddr; } else { const struct iphdr *iph = ip_hdr(skb); saddr = iph->saddr; @@ -1152,8 +1112,9 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static bool __tcp_v4_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +/* Called with rcu_read_lock() */ +static bool tcp_v4_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1193,7 +1154,7 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk, */ genhash = tcp_v4_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", @@ -1205,28 +1166,16 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk, } return false; } - -static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - bool ret; - - rcu_read_lock(); - ret = __tcp_v4_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; -} - #endif -static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); } @@ -1259,7 +1208,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v4_reqsk_md5_lookup, + .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif .init_req = tcp_v4_init_req, @@ -1318,8 +1267,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); @@ -1356,7 +1305,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Copy over the MD5 key from the original socket */ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, AF_INET); - if (key != NULL) { + if (key) { /* * We're using one, so create a matching key * on the newsk structure. If we fail to get @@ -1391,15 +1340,17 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, - iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, prev, false); + + req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -1439,7 +1390,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || - dst->ops->check(dst, 0) == NULL) { + !dst->ops->check(dst, 0)) { dst_release(dst); sk->sk_rx_dst = NULL; } @@ -1517,7 +1468,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) @@ -1734,7 +1685,7 @@ do_time_wait: iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; goto process; @@ -1846,7 +1797,7 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - BUG_ON(tp->fastopen_rsk != NULL); + BUG_ON(tp->fastopen_rsk); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); @@ -1904,13 +1855,13 @@ get_req: } sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_nulls_next(sk); } get_sk: @@ -1922,7 +1873,7 @@ get_sk: goto out; } icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); @@ -1931,7 +1882,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); st->offset = 0; @@ -2150,7 +2101,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_OPENREQ: if (v) { struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) @@ -2204,17 +2155,17 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(const struct sock *sk, const struct request_sock *req, +static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); - long delta = req->expires - jiffies; + long delta = req->rsk_timer.expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", i, ireq->ir_loc_addr, - ntohs(inet_sk(sk)->inet_sport), + ireq->ir_num, ireq->ir_rmt_addr, ntohs(ireq->ir_rmt_port), TCP_SYN_RECV, @@ -2225,7 +2176,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ - atomic_read(&sk->sk_refcnt), + 0, req); } @@ -2291,9 +2242,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i) { + long delta = tw->tw_timer.expires - jiffies; __be32 dest, src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; @@ -2332,7 +2283,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) get_tcp4_sock(v, seq, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); + get_openreq4(v, seq, st->num, st->uid); break; } out: @@ -2460,6 +2411,8 @@ static int __net_init tcp_sk_init(struct net *net) } net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; + net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; return 0; fail: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e5f41bd5ec1b..a51d63a43e33 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_fastopen_metrics { u16 mss; - u16 syn_loss:10; /* Recurring Fast Open SYN losses */ + u16 syn_loss:10, /* Recurring Fast Open SYN losses */ + try_exp:2; /* Request w/ exp. option (once) */ unsigned long last_syn_loss; /* Last Fast Open SYN loss */ struct tcp_fastopen_cookie cookie; }; @@ -40,6 +41,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; + possible_net_t tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -52,6 +54,11 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; +static inline struct net *tm_net(struct tcp_metrics_block *tm) +{ + return read_pnet(&tm->tcpm_net); +} + static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { @@ -74,23 +81,20 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - const struct in6_addr *a6, *b6; - if (a->family != b->family) return false; if (a->family == AF_INET) return a->addr.a4 == b->addr.a4; - - a6 = (const struct in6_addr *) &a->addr.a6[0]; - b6 = (const struct in6_addr *) &b->addr.a6[0]; - - return ipv6_addr_equal(a6, b6); + return ipv6_addr_equal(&a->addr.in6, &b->addr.in6); } struct tcpm_hash_bucket { struct tcp_metrics_block __rcu *chain; }; +static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; +static unsigned int tcp_metrics_hash_log __read_mostly; + static DEFINE_SPINLOCK(tcp_metrics_lock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, @@ -128,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; + tm->tcpm_fastopen.try_exp = 0; + tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; } } @@ -143,6 +149,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst #define TCP_METRICS_RECLAIM_DEPTH 5 #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL +#define deref_locked(p) \ + rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock)) + static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct inetpeer_addr *saddr, struct inetpeer_addr *daddr, @@ -171,9 +180,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (unlikely(reclaim)) { struct tcp_metrics_block *oldest; - oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); - for (tm = rcu_dereference(oldest->tcpm_next); tm; - tm = rcu_dereference(tm->tcpm_next)) { + oldest = deref_locked(tcp_metrics_hash[hash].chain); + for (tm = deref_locked(oldest->tcpm_next); tm; + tm = deref_locked(tm->tcpm_next)) { if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) oldest = tm; } @@ -183,14 +192,15 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } + write_pnet(&tm->tcpm_net, net); tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); if (likely(!reclaim)) { - tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; - rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); + tm->tcpm_next = tcp_metrics_hash[hash].chain; + rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm); } out_unlock: @@ -214,10 +224,11 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_metrics_block *tm; int depth = 0; - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, saddr) && - addr_same(&tm->tcpm_daddr, daddr)) + addr_same(&tm->tcpm_daddr, daddr) && + net_eq(tm_net(tm), net)) break; depth++; } @@ -242,8 +253,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; - *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr; + daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -252,12 +263,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, } net = dev_net(dst->dev); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } tcpm_check_stamp(tm, dst); @@ -288,9 +301,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + saddr.addr.in6 = tw->tw_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; + daddr.addr.in6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); } } @@ -299,12 +312,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; net = twsk_net(tw); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } return tm; @@ -336,9 +351,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + saddr.addr.in6 = sk->sk_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; + daddr.addr.in6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); } } @@ -347,7 +362,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; net = dev_net(dst->dev); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); tm = __tcp_get_metrics(&saddr, &daddr, net, hash); if (tm == TCP_METRICS_RECLAIM_PTR) @@ -492,7 +508,7 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ - if (dst == NULL) + if (!dst) goto reset; dst_confirm(dst); @@ -700,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, if (tfom->mss) *mss = tfom->mss; *cookie = tfom->cookie; + if (cookie->len <= 0 && tfom->try_exp == 1) + cookie->exp = true; *syn_loss = tfom->syn_loss; *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); @@ -708,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, } void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, bool syn_lost) + struct tcp_fastopen_cookie *cookie, bool syn_lost, + u16 try_exp) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_metrics_block *tm; @@ -725,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, tfom->mss = mss; if (cookie && cookie->len > 0) tfom->cookie = *cookie; + else if (try_exp > tfom->try_exp && + tfom->cookie.len <= 0 && !tfom->cookie.exp) + tfom->try_exp = try_exp; if (syn_lost) { ++tfom->syn_loss; tfom->last_syn_loss = jiffies; @@ -773,19 +795,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, switch (tm->tcpm_daddr.family) { case AF_INET: - if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_daddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; - if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, - tm->tcpm_saddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: - if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_daddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, + &tm->tcpm_daddr.addr.in6) < 0) goto nla_put_failure; - if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, - tm->tcpm_saddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, + &tm->tcpm_saddr.addr.in6) < 0) goto nla_put_failure; break; default: @@ -898,17 +920,19 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int max_rows = 1U << tcp_metrics_hash_log; unsigned int row, s_row = cb->args[0]; int s_col = cb->args[1], col = s_col; for (row = s_row; row < max_rows; row++, s_col = 0) { struct tcp_metrics_block *tm; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + struct tcpm_hash_bucket *hb = tcp_metrics_hash + row; rcu_read_lock(); for (col = 0, tm = rcu_dereference(hb->chain); tm; tm = rcu_dereference(tm->tcpm_next), col++) { + if (!net_eq(tm_net(tm), net)) + continue; if (col < s_col) continue; if (tcp_metrics_dump_info(skb, cb, tm) < 0) { @@ -933,7 +957,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, a = info->attrs[v4]; if (a) { addr->family = AF_INET; - addr->addr.a4 = nla_get_be32(a); + addr->addr.a4 = nla_get_in_addr(a); if (hash) *hash = (__force unsigned int) addr->addr.a4; return 0; @@ -943,9 +967,9 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; - memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + addr->addr.in6 = nla_get_in6_addr(a); if (hash) - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + *hash = ipv6_addr_hash(&addr->addr.in6); return 0; } return optional ? 1 : -EAFNOSUPPORT; @@ -994,13 +1018,15 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) goto nla_put_failure; - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); ret = -ESRCH; rcu_read_lock(); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -1020,34 +1046,27 @@ out_free: return ret; } -#define deref_locked_genl(p) \ - rcu_dereference_protected(p, lockdep_genl_is_held() && \ - lockdep_is_held(&tcp_metrics_lock)) - -#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) - -static int tcp_metrics_flush_all(struct net *net) +static void tcp_metrics_flush_all(struct net *net) { - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + unsigned int max_rows = 1U << tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = tcp_metrics_hash; struct tcp_metrics_block *tm; unsigned int row; for (row = 0; row < max_rows; row++, hb++) { + struct tcp_metrics_block __rcu **pp; spin_lock_bh(&tcp_metrics_lock); - tm = deref_locked_genl(hb->chain); - if (tm) - hb->chain = NULL; - spin_unlock_bh(&tcp_metrics_lock); - while (tm) { - struct tcp_metrics_block *next; - - next = deref_genl(tm->tcpm_next); - kfree_rcu(tm, rcu_head); - tm = next; + pp = &hb->chain; + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { + if (net_eq(tm_net(tm), net)) { + *pp = tm->tcpm_next; + kfree_rcu(tm, rcu_head); + } else { + pp = &tm->tcpm_next; + } } + spin_unlock_bh(&tcp_metrics_lock); } - return 0; } static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -1064,19 +1083,23 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; - if (ret > 0) - return tcp_metrics_flush_all(net); + if (ret > 0) { + tcp_metrics_flush_all(net); + return 0; + } ret = parse_nl_saddr(info, &saddr); if (ret < 0) src = false; - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - hb = net->ipv4.tcp_metrics_hash + hash; + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + hb = tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); found = true; @@ -1126,6 +1149,9 @@ static int __net_init tcp_net_metrics_init(struct net *net) size_t size; unsigned int slots; + if (!net_eq(net, &init_net)) + return 0; + slots = tcpmhash_entries; if (!slots) { if (totalram_pages >= 128 * 1024) @@ -1134,14 +1160,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - net->ipv4.tcp_metrics_hash_log = order_base_2(slots); - size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; + tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!net->ipv4.tcp_metrics_hash) - net->ipv4.tcp_metrics_hash = vzalloc(size); + tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!tcp_metrics_hash) + tcp_metrics_hash = vzalloc(size); - if (!net->ipv4.tcp_metrics_hash) + if (!tcp_metrics_hash) return -ENOMEM; return 0; @@ -1149,19 +1175,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { - unsigned int i; - - for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { - struct tcp_metrics_block *tm, *next; - - tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); - while (tm) { - next = rcu_dereference_protected(tm->tcpm_next, 1); - kfree(tm); - tm = next; - } - } - kvfree(net->ipv4.tcp_metrics_hash); + tcp_metrics_flush_all(net); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { @@ -1175,16 +1189,10 @@ void __init tcp_metrics_init(void) ret = register_pernet_subsys(&tcp_net_metrics_ops); if (ret < 0) - goto cleanup; + panic("Could not allocate the tcp_metrics hash table\n"); + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, tcp_metrics_nl_ops); if (ret < 0) - goto cleanup_subsys; - return; - -cleanup_subsys: - unregister_pernet_subsys(&tcp_net_metrics_ops); - -cleanup: - return; + panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dd11ac7798c6..63d6311b5365 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -34,18 +34,7 @@ int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&tcp_death_row), - .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&tcp_death_row), }; EXPORT_SYMBOL_GPL(tcp_death_row); @@ -158,7 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_RST; } @@ -174,11 +163,9 @@ kill_with_rst: if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -211,13 +198,12 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -267,8 +253,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -283,18 +268,17 @@ EXPORT_SYMBOL(tcp_timewait_state_process); */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw; bool recycle_ok = false; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tcp_remember_stamp(sk); - if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &tcp_death_row, state); - if (tw != NULL) { + if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); struct inet_sock *inet = inet_sk(sk); @@ -332,7 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct tcp_md5sig_key *key; tcptw->tw_md5_key = NULL; key = tp->af_specific->md5_lookup(sk, sk); - if (key != NULL) { + if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); @@ -355,8 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - inet_twsk_schedule(tw, &tcp_death_row, timeo, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -454,7 +437,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); - if (newsk != NULL) { + if (newsk) { const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); @@ -572,7 +555,6 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev, bool fastopen) { struct tcp_options_received tmp_opt; @@ -629,9 +611,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time) && - !inet_rtx_syn_ack(sk, req)) - req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, - TCP_RTO_MAX) + jiffies; + !inet_rtx_syn_ack(sk, req)) { + unsigned long expires = jiffies; + + expires += min(TCP_TIMEOUT_INIT << req->num_timeout, + TCP_RTO_MAX); + if (!fastopen) + mod_timer_pending(&req->rsk_timer, expires); + else + req->rsk_timer.expires = expires; + } return NULL; } @@ -763,10 +752,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + if (!child) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); @@ -791,7 +780,7 @@ embryonic_reset: tcp_reset(sk); } if (!fastopen) { - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 9d7930ba8e0f..3f7c2fca5431 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -29,8 +29,8 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } -struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) { if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1db253e36045..8c8d7e06b72f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -518,17 +518,26 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; + u8 *p = (u8 *)ptr; + u32 len; /* Fast Open option length */ + + if (foc->exp) { + len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | + TCPOPT_FASTOPEN_MAGIC); + p += TCPOLEN_EXP_FASTOPEN_BASE; + } else { + len = TCPOLEN_FASTOPEN_BASE + foc->len; + *p++ = TCPOPT_FASTOPEN; + *p++ = len; + } - *ptr++ = htonl((TCPOPT_EXP << 24) | - ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | - TCPOPT_FASTOPEN_MAGIC); - - memcpy(ptr, foc->val, foc->len); - if ((foc->len & 3) == 2) { - u8 *align = ((u8 *)ptr) + foc->len; - align[0] = align[1] = TCPOPT_NOP; + memcpy(p, foc->val, foc->len); + if ((len & 3) == 2) { + p[foc->len] = TCPOPT_NOP; + p[foc->len + 1] = TCPOPT_NOP; } - ptr += (foc->len + 3) >> 2; + ptr += (len + 3) >> 2; } } @@ -565,7 +574,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; @@ -583,13 +592,17 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (fastopen && fastopen->cookie.len >= 0) { - u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; + u32 need = fastopen->cookie.len; + + need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE : + TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = &fastopen->cookie; remaining -= need; tp->syn_fastopen = 1; + tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; } } @@ -601,15 +614,14 @@ static unsigned int tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5, + const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; #ifdef CONFIG_TCP_MD5SIG - *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); - if (*md5) { + if (md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -620,8 +632,6 @@ static unsigned int tcp_synack_options(struct sock *sk, */ ireq->tstamp_ok &= !ireq->sack_ok; } -#else - *md5 = NULL; #endif /* We always send an MSS option. */ @@ -645,7 +655,10 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= TCPOLEN_SACKPERM_ALIGNED; } if (foc != NULL && foc->len >= 0) { - u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + u32 need = foc->len; + + need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE : + TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; @@ -989,7 +1002,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (md5) { sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, NULL, skb); + md5, sk, skb); } #endif @@ -1151,7 +1164,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Get a new skb... force flag on. */ buff = sk_stream_alloc_skb(sk, nsize, gfp); - if (buff == NULL) + if (!buff) return -ENOMEM; /* We'll just try again later. */ sk->sk_wmem_queued += buff->truesize; @@ -1354,6 +1367,8 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; + if (icsk->icsk_mtup.enabled) + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1708,7 +1723,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return tcp_fragment(sk, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp); - if (unlikely(buff == NULL)) + if (unlikely(!buff)) return -ENOMEM; sk->sk_wmem_queued += buff->truesize; @@ -1752,20 +1767,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, bool *is_cwnd_limited, u32 max_segs) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - u32 send_win, cong_win, limit, in_flight; + u32 age, send_win, cong_win, limit, in_flight; + struct tcp_sock *tp = tcp_sk(sk); + struct skb_mstamp now; + struct sk_buff *head; int win_divisor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto send_now; - if (icsk->icsk_ca_state != TCP_CA_Open) + if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR))) goto send_now; - /* Defer for less than two clock ticks. */ - if (tp->tso_deferred && - (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + /* Avoid bursty behavior by allowing defer + * only if the last write was recent. + */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1807,11 +1825,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - /* Ok, it looks like it is advisable to defer. - * Do not rearm the timer if already set to not break TCP ACK clocking. - */ - if (!tp->tso_deferred) - tp->tso_deferred = 1 | (jiffies << 1); + head = tcp_write_queue_head(sk); + skb_mstamp_get(&now); + age = skb_mstamp_us_delta(&now, &head->skb_mstamp); + /* If next ACK is likely to come too late (half srtt), do not defer */ + if (age < (tp->srtt_us >> 4)) + goto send_now; + + /* Ok, it looks like it is advisable to defer. */ if (cong_win < send_win && cong_win < skb->len) *is_cwnd_limited = true; @@ -1819,10 +1840,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, return true; send_now: - tp->tso_deferred = 0; return false; } +static inline void tcp_mtu_check_reprobe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + u32 interval; + s32 delta; + + interval = net->ipv4.sysctl_tcp_probe_interval; + delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + if (unlikely(delta >= interval * HZ)) { + int mss = tcp_current_mss(sk); + + /* Update current search range */ + icsk->icsk_mtup.probe_size = 0; + icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + + sizeof(struct tcphdr) + + icsk->icsk_af_ops->net_header_len; + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + + /* Update probe time stamp */ + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + } +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -1837,11 +1882,13 @@ static int tcp_mtu_probe(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb, *nskb, *next; + struct net *net = sock_net(sk); int len; int probe_size; int size_needed; int copy; int mss_now; + int interval; /* Not currently probing/verifying, * not in recovery, @@ -1854,12 +1901,25 @@ static int tcp_mtu_probe(struct sock *sk) tp->rx_opt.num_sacks || tp->rx_opt.dsack) return -1; - /* Very simple search strategy: just double the MSS. */ + /* Use binary search for probe_size between tcp_mss_base, + * and current mss_clamp. if (search_high - search_low) + * smaller than a threshold, backoff from probing. + */ mss_now = tcp_current_mss(sk); - probe_size = 2 * tp->mss_cache; + probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high + + icsk->icsk_mtup.search_low) >> 1); size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; - if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { - /* TODO: set timer for probe_converge_event */ + interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; + /* When misfortune happens, we are reprobing actively, + * and then reprobe timer has expired. We stick with current + * probing process by not resetting search range to its orignal. + */ + if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || + interval < net->ipv4.sysctl_tcp_probe_threshold) { + /* Check whether enough time has elaplased for + * another round of probing. + */ + tcp_mtu_check_reprobe(sk); return -1; } @@ -1881,7 +1941,8 @@ static int tcp_mtu_probe(struct sock *sk) } /* We're allowed to probe. Build it now. */ - if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) + nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC); + if (!nskb) return -1; sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); @@ -2179,7 +2240,7 @@ void tcp_send_loss_probe(struct sock *sk) int mss = tcp_current_mss(sk); int err = -1; - if (tcp_send_head(sk) != NULL) { + if (tcp_send_head(sk)) { err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); goto rearm_timer; } @@ -2689,7 +2750,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; /* we could do better than to assign each time */ - if (hole == NULL) + if (!hole) tp->retransmit_skb_hint = skb; /* Assume this retransmit will generate @@ -2713,7 +2774,7 @@ begin_fwd: if (!tcp_can_forward_retransmit(sk)) break; /* Backtrack if necessary to non-L'ed skb */ - if (hole != NULL) { + if (hole) { skb = hole; hole = NULL; } @@ -2721,7 +2782,7 @@ begin_fwd: goto begin_fwd; } else if (!(sacked & TCPCB_LOST)) { - if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) + if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) hole = skb; continue; @@ -2766,7 +2827,7 @@ void tcp_send_fin(struct sock *sk) */ mss_now = tcp_current_mss(sk); - if (tcp_send_head(sk) != NULL) { + if (tcp_send_head(sk)) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; @@ -2824,14 +2885,14 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { pr_debug("%s: wrong queue state\n", __func__); return -EFAULT; } if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - if (nskb == NULL) + if (!nskb) return -ENOMEM; tcp_unlink_write_queue(skb, sk); __skb_header_release(nskb); @@ -2866,7 +2927,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; struct sk_buff *skb; - struct tcp_md5sig_key *md5; + struct tcp_md5sig_key *md5 = NULL; int tcp_header_size; int mss; @@ -2879,7 +2940,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); - security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) @@ -2892,7 +2952,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif skb_mstamp_get(&skb->skb_mstamp); - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + +#ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); + md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); +#endif + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2923,12 +2988,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) { + if (md5) tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, NULL, req, skb); - } + md5, req_to_sk(req), skb); + rcu_read_unlock(); #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); @@ -2966,7 +3033,7 @@ static void tcp_connect_init(struct sock *sk) (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG - if (tp->af_specific->md5_lookup(sk, sk) != NULL) + if (tp->af_specific->md5_lookup(sk, sk)) tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif @@ -3252,7 +3319,7 @@ void tcp_send_ack(struct sock *sk) * sock. */ buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (buff == NULL) { + if (!buff) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3296,7 +3363,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) /* We don't queue it, tcp_transmit_skb() sets ownership. */ skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (skb == NULL) + if (!skb) return -1; /* Reserve space for headers and set control bits. */ @@ -3327,8 +3394,8 @@ int tcp_write_wakeup(struct sock *sk) if (sk->sk_state == TCP_CLOSE) return -1; - if ((skb = tcp_send_head(sk)) != NULL && - before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { + skb = tcp_send_head(sk); + if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { int err; unsigned int mss = tcp_current_mss(sk); unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0732b787904e..8c65dc147d8b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); @@ -166,7 +167,7 @@ static int tcp_write_timeout(struct sock *sk) if (icsk->icsk_retransmits) { dst_negative_advice(sk); if (tp->syn_fastopen || tp->syn_data) - tcp_fastopen_cache_set(sk, 0, NULL, true); + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (tp->syn_data) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); @@ -326,7 +327,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; - req->rsk_ops->syn_ack_timeout(sk, req); + req->rsk_ops->syn_ack_timeout(req); if (req->num_timeout >= max_retries) { tcp_write_err(sk); @@ -538,19 +539,11 @@ static void tcp_write_timer(unsigned long data) sock_put(sk); } -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) +void tcp_syn_ack_timeout(const struct request_sock *req) { - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); -} + struct net *net = read_pnet(&inet_rsk(req)->ireq_net); -void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) -{ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); + NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_SYMBOL(tcp_syn_ack_timeout); @@ -582,7 +575,7 @@ static void tcp_keepalive_timer (unsigned long data) } if (sk->sk_state == TCP_LISTEN) { - tcp_synack_timer(sk); + pr_err("Hmm... keepalive on a LISTEN ???\n"); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 97ef1f8b7be8..d10b7e0112eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -318,8 +318,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); } -static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, - unsigned int port) +static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, + unsigned int port) { return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } @@ -421,9 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } -static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 udp_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 udp_ehash_secret __read_mostly; @@ -433,7 +433,6 @@ static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } - /* called with read_rcu_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, @@ -633,7 +632,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); - if (sk == NULL) { + if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -873,8 +872,7 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); @@ -1012,7 +1010,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); - if (rt == NULL) { + if (!rt) { struct net *net = sock_net(sk); fl4 = &fl4_stack; @@ -1136,7 +1134,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, * sendpage interface can't pass. * This will succeed only when the socket is connected. */ - ret = udp_sendmsg(NULL, sk, &msg, 0); + ret = udp_sendmsg(sk, &msg, 0); if (ret < 0) return ret; } @@ -1172,7 +1170,6 @@ out: return ret; } - /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1254,8 +1251,8 @@ EXPORT_SYMBOL(udp_ioctl); * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); @@ -1356,7 +1353,6 @@ csum_copy_err: goto try_again; } - int udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -1523,7 +1519,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -1580,7 +1576,6 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); @@ -1610,7 +1605,6 @@ drop: return -1; } - static void flush_stack(struct sock **stack, unsigned int count, struct sk_buff *skb, unsigned int final) { @@ -1620,7 +1614,7 @@ static void flush_stack(struct sock **stack, unsigned int count, for (i = 0; i < count; i++) { sk = stack[i]; - if (likely(skb1 == NULL)) + if (likely(!skb1)) skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); if (!skb1) { @@ -1803,7 +1797,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk != NULL) { + if (sk) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) @@ -2525,6 +2519,16 @@ void __init udp_table_init(struct udp_table *table, const char *name) } } +u32 udp_flow_hashrnd(void) +{ + static u32 hashrnd __read_mostly; + + net_get_random_once(&hashrnd, sizeof(hashrnd)); + + return hashrnd; +} +EXPORT_SYMBOL(udp_flow_hashrnd); + void __init udp_init(void) { unsigned long limit; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 4a000f1dd757..b763c39ae1d7 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -18,8 +18,9 @@ #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -31,7 +32,8 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { int err = -EINVAL; struct sock *sk; @@ -56,7 +58,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out_nosk; err = -ENOENT; - if (sk == NULL) + if (!sk) goto out_nosk; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); @@ -90,8 +92,9 @@ out_nosk: return err; } -static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) +static void udp_dump(struct udp_table *table, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); @@ -144,13 +147,13 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udp_table, skb, cb, r, bc); } static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udp_table, in_skb, nlh, req); } @@ -170,13 +173,14 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, + struct nlattr *bc) { udp_dump(&udplite_table, skb, cb, r, bc); } static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, in_skb, nlh, req); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index f3c27899f62b..7e0fe4bdd967 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname, int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 4915d8284a86..f9386160cbee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -285,7 +285,7 @@ void udp_del_offload(struct udp_offload *uo) pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); unlock: spin_unlock(&udp_offload_lock); - if (uo_priv != NULL) + if (uo_priv) call_rcu(&uo_priv->rcu, udp_offload_free_routine); } EXPORT_SYMBOL(udp_del_offload); @@ -394,7 +394,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) break; } - if (uo_priv != NULL) { + if (uo_priv) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr), diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index c83b35485056..6bb98cc193c9 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -75,7 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck) @@ -92,7 +92,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index aac6197b7a71..60b032f58ccc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -22,9 +22,9 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb) { - if (skb_dst(skb) == NULL) { + if (!skb_dst(skb)) { const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, @@ -52,7 +52,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, xfrm4_rcv_encap_finish); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 91771a7c802f..35feda676464 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -63,7 +63,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - ip_select_ident(skb, NULL); + ip_select_ident(dev_net(dst->dev), skb, NULL); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index dab73813cb92..2878dbfffeb7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -77,26 +77,26 @@ int xfrm4_output_finish(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm4_output(struct sk_buff *skb) +static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; #ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm4_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6156f68a1e90..bff69746e05f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -232,7 +232,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops = { .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, @@ -299,7 +298,7 @@ static void __net_exit xfrm4_net_exit(struct net *net) { struct ctl_table *table; - if (net->ipv4.xfrm4_hdr == NULL) + if (!net->ipv4.xfrm4_hdr) return; table = net->ipv4.xfrm4_hdr->ctl_table_arg; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b6030025f411..37b70e82bff8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -46,6 +46,7 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> +#include <linux/inet.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_addr.h> @@ -102,6 +103,9 @@ #define INFINITY_LIFE_TIME 0xFFFFFFFF +#define IPV6_MAX_STRLEN \ + sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -127,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_generate_stable_address(struct in6_addr *addr, + u8 dad_count, + const struct inet6_dev *idev); /* * Configured unicast address hash table @@ -202,6 +209,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + } }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -240,6 +250,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + }, }; /* Check if a valid qdisc is available */ @@ -321,7 +334,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev == NULL) + if (!ndev) return ERR_PTR(err); rwlock_init(&ndev->lock); @@ -333,7 +346,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); - if (ndev->nd_parms == NULL) { + if (!ndev->nd_parms) { kfree(ndev); return ERR_PTR(err); } @@ -468,7 +481,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ncm = nlmsg_data(nlh); @@ -506,7 +519,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, int err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, @@ -561,10 +574,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, break; default: dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) + if (!dev) goto errout; in6_dev = __in6_dev_get(dev); - if (in6_dev == NULL) + if (!in6_dev) goto errout; devconf = &in6_dev->cnf; break; @@ -572,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, @@ -841,7 +854,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); - if (ifa == NULL) { + if (!ifa) { ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; goto out; @@ -860,7 +873,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); - spin_lock_init(&ifa->state_lock); INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; @@ -1003,10 +1015,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ASSERT_RTNL(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (state == INET6_IFADDR_STATE_DEAD) goto out; @@ -1546,7 +1558,7 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp_flags&banned_flags) && - (dev == NULL || ifp->idev->dev == dev || + (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); return 1; @@ -1568,7 +1580,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev) + if (!dev || ifp->idev->dev == dev) return true; } } @@ -1637,7 +1649,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev || + if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { result = ifp; in6_ifa_hold(ifp); @@ -1686,19 +1698,21 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); return err; } void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct in6_addr addr; struct inet6_dev *idev = ifp->idev; + struct net *net = dev_net(ifp->idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -1708,9 +1722,57 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", ifp->idev->dev->name, &ifp->addr); - if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { - struct in6_addr addr; + spin_lock_bh(&ifp->lock); + + if (ifp->flags & IFA_F_STABLE_PRIVACY) { + int scope = ifp->scope; + u32 flags = ifp->flags; + struct in6_addr new_addr; + struct inet6_ifaddr *ifp2; + u32 valid_lft, preferred_lft; + int pfxlen = ifp->prefix_len; + int retries = ifp->stable_privacy_retry + 1; + + if (retries > net->ipv6.sysctl.idgen_retries) { + net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n", + ifp->idev->dev->name); + goto errdad; + } + + new_addr = ifp->addr; + if (ipv6_generate_stable_address(&new_addr, retries, + idev)) + goto errdad; + + valid_lft = ifp->valid_lft; + preferred_lft = ifp->prefered_lft; + + spin_unlock_bh(&ifp->lock); + + if (idev->cnf.max_addresses && + ipv6_count_addresses(idev) >= + idev->cnf.max_addresses) + goto lock_errdad; + + net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", + ifp->idev->dev->name); + + ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, + scope, flags, valid_lft, + preferred_lft); + if (IS_ERR(ifp2)) + goto lock_errdad; + + spin_lock_bh(&ifp2->lock); + ifp2->stable_privacy_retry = retries; + ifp2->state = INET6_IFADDR_STATE_PREDAD; + spin_unlock_bh(&ifp2->lock); + addrconf_mod_dad_work(ifp2, net->ipv6.sysctl.idgen_delay); + in6_ifa_put(ifp2); +lock_errdad: + spin_lock_bh(&ifp->lock); + } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { addr.s6_addr32[0] = htonl(0xfe800000); addr.s6_addr32[1] = 0; @@ -1724,10 +1786,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - spin_lock_bh(&ifp->state_lock); +errdad: /* transition from _POSTDAD to _ERRDAD */ ifp->state = INET6_IFADDR_STATE_ERRDAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); addrconf_mod_dad_work(ifp, 0); } @@ -2052,7 +2114,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_table *table; table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); - if (table == NULL) + if (!table) return NULL; read_lock_bh(&table->tb6_lock); @@ -2186,6 +2248,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) __u32 valid_lft; __u32 prefered_lft; int addr_type; + u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); @@ -2215,7 +2278,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { net_dbg_ratelimited("addrconf: device %s not configured\n", dev->name); return; @@ -2292,6 +2355,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; + } else if (in6_dev->addr_gen_mode == + IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !ipv6_generate_stable_address(&addr, 0, + in6_dev)) { + addr_flags |= IFA_F_STABLE_PRIVACY; + goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2308,9 +2377,8 @@ ok: ifp = ipv6_get_ifaddr(net, &addr, dev, 1); - if (ifp == NULL && valid_lft) { + if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; - u32 addr_flags = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && @@ -2350,7 +2418,7 @@ ok: u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; @@ -2380,12 +2448,12 @@ ok: ifp->tstamp = now; flags = ifp->flags; ifp->flags &= ~IFA_F_DEPRECATED; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); } else - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, create, now); @@ -2418,7 +2486,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) dev = __dev_get_by_index(net, ireq.ifr6_ifindex); err = -ENODEV; - if (dev == NULL) + if (!dev) goto err_exit; #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2464,6 +2532,23 @@ err_exit: return err; } +static int ipv6_mc_config(struct sock *sk, bool join, + const struct in6_addr *addr, int ifindex) +{ + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = ipv6_sock_mc_join(sk, ifindex, addr); + else + ret = ipv6_sock_mc_drop(sk, ifindex, addr); + release_sock(sk); + + return ret; +} + /* * Manual configuration of address on an interface */ @@ -2476,10 +2561,10 @@ static int inet6_addr_add(struct net *net, int ifindex, struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + unsigned long timeout; + clock_t expires; int scope; u32 flags; - clock_t expires; - unsigned long timeout; ASSERT_RTNL(); @@ -2501,6 +2586,14 @@ static int inet6_addr_add(struct net *net, int ifindex, if (IS_ERR(idev)) return PTR_ERR(idev); + if (ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, + true, pfx, ifindex); + + if (ret < 0) + return ret; + } + scope = ipv6_addr_scope(pfx); timeout = addrconf_timeout_fixup(valid_lft, HZ); @@ -2542,6 +2635,9 @@ static int inet6_addr_add(struct net *net, int ifindex, in6_ifa_put(ifp); addrconf_verify_rtnl(); return 0; + } else if (ifa_flags & IFA_F_MCAUTOJOIN) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, ifindex); } return PTR_ERR(ifp); @@ -2562,7 +2658,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, return -ENODEV; idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENXIO; read_lock_bh(&idev->lock); @@ -2578,6 +2674,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, jiffies); ipv6_del_addr(ifp); addrconf_verify_rtnl(); + if (ipv6_addr_is_multicast(pfx)) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, dev->ifindex); + } return 0; } } @@ -2710,7 +2810,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2757,10 +2857,11 @@ static void init_loopback(struct net_device *dev) } } -static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) +static void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags) { struct inet6_ifaddr *ifp; - u32 addr_flags = IFA_F_PERMANENT; + u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && @@ -2768,7 +2869,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr addr_flags |= IFA_F_OPTIMISTIC; #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { @@ -2778,18 +2878,103 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static bool ipv6_reserved_interfaceid(struct in6_addr address) +{ + if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0) + return true; + + if (address.s6_addr32[2] == htonl(0x02005eff) && + ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000))) + return true; + + if (address.s6_addr32[2] == htonl(0xfdffffff) && + ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80))) + return true; + + return false; +} + +static int ipv6_generate_stable_address(struct in6_addr *address, + u8 dad_count, + const struct inet6_dev *idev) +{ + static DEFINE_SPINLOCK(lock); + static __u32 digest[SHA_DIGEST_WORDS]; + static __u32 workspace[SHA_WORKSPACE_WORDS]; + + static union { + char __data[SHA_MESSAGE_BYTES]; + struct { + struct in6_addr secret; + __be32 prefix[2]; + unsigned char hwaddr[MAX_ADDR_LEN]; + u8 dad_count; + } __packed; + } data; + + struct in6_addr secret; + struct in6_addr temp; + struct net *net = dev_net(idev->dev); + + BUILD_BUG_ON(sizeof(data.__data) != sizeof(data)); + + if (idev->cnf.stable_secret.initialized) + secret = idev->cnf.stable_secret.secret; + else if (net->ipv6.devconf_dflt->stable_secret.initialized) + secret = net->ipv6.devconf_dflt->stable_secret.secret; + else + return -1; + +retry: + spin_lock_bh(&lock); + + sha_init(digest); + memset(&data, 0, sizeof(data)); + memset(workspace, 0, sizeof(workspace)); + memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len); + data.prefix[0] = address->s6_addr32[0]; + data.prefix[1] = address->s6_addr32[1]; + data.secret = secret; + data.dad_count = dad_count; + + sha_transform(digest, data.__data, workspace); + + temp = *address; + temp.s6_addr32[2] = (__force __be32)digest[0]; + temp.s6_addr32[3] = (__force __be32)digest[1]; + + spin_unlock_bh(&lock); + + if (ipv6_reserved_interfaceid(temp)) { + dad_count++; + if (dad_count > dev_net(idev->dev)->ipv6.sysctl.idgen_retries) + return -1; + goto retry; + } + + *address = temp; + return 0; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { - struct in6_addr addr; + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + if (!ipv6_generate_stable_address(&addr, 0, idev)) + addrconf_add_linklocal(idev, &addr, + IFA_F_STABLE_PRIVACY); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. */ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); } @@ -2834,7 +3019,7 @@ static void addrconf_sit_config(struct net_device *dev) */ idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2859,7 +3044,7 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3056,7 +3241,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -3127,10 +3312,10 @@ restart: write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); + spin_lock_bh(&ifa->lock); state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); + spin_unlock_bh(&ifa->lock); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3288,12 +3473,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp) { bool begin_dad = false; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state != INET6_IFADDR_STATE_DEAD) { ifp->state = INET6_IFADDR_STATE_PREDAD; begin_dad = true; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (begin_dad) addrconf_mod_dad_work(ifp, 0); @@ -3315,7 +3500,7 @@ static void addrconf_dad_work(struct work_struct *w) rtnl_lock(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_PREDAD) { action = DAD_BEGIN; ifp->state = INET6_IFADDR_STATE_DAD; @@ -3323,7 +3508,7 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (action == DAD_BEGIN) { addrconf_dad_begin(ifp); @@ -3811,7 +3996,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; @@ -3923,7 +4108,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; if (tb[IFA_CACHEINFO]) { @@ -3938,17 +4123,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) } dev = __dev_get_by_index(net, ifm->ifa_index); - if (dev == NULL) + if (!dev) return -ENODEV; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); - if (ifa == NULL) { + if (!ifa) { /* * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. @@ -4023,7 +4208,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), @@ -4052,11 +4237,11 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (!ipv6_addr_any(&ifa->peer_addr)) { - if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || - nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || + nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; } else - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) @@ -4084,11 +4269,11 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4110,11 +4295,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4283,7 +4468,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) goto errout; addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (addr == NULL) { + if (!addr) { err = -EINVAL; goto errout; } @@ -4326,7 +4511,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); @@ -4398,6 +4583,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + /* we omit DEVCONF_STABLE_SECRET for now */ } static inline size_t inet6_ifla6_size(void) @@ -4478,24 +4664,24 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) + if (!nla) goto nla_put_failure; ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); /* XXX - MC not implemented */ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); - if (nla == NULL) + if (!nla) goto nla_put_failure; if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) @@ -4541,7 +4727,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) ASSERT_RTNL(); - if (token == NULL) + if (!token) return -EINVAL; if (ipv6_addr_any(token)) return -EINVAL; @@ -4632,8 +4818,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE) + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) return -EINVAL; + + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) + return -EINVAL; + idev->addr_gen_mode = mode; err = 0; } @@ -4650,7 +4843,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, void *protoinfo; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; hdr = nlmsg_data(nlh); @@ -4665,11 +4858,11 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); - if (protoinfo == NULL) + if (!protoinfo) goto nla_put_failure; if (inet6_fill_ifla6_attrs(skb, idev) < 0) @@ -4730,7 +4923,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); @@ -4763,7 +4956,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, struct prefix_cacheinfo ci; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; pmsg = nlmsg_data(nlh); @@ -4802,7 +4995,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); @@ -5042,6 +5235,74 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int err; + struct in6_addr addr; + char str[IPV6_MAX_STRLEN]; + struct ctl_table lctl = *ctl; + struct net *net = ctl->extra2; + struct ipv6_stable_secret *secret = ctl->data; + + if (&net->ipv6.devconf_all->stable_secret == ctl->data) + return -EIO; + + lctl.maxlen = IPV6_MAX_STRLEN; + lctl.data = str; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (!write && !secret->initialized) { + err = -EIO; + goto out; + } + + if (!write) { + err = snprintf(str, sizeof(str), "%pI6", + &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; + } + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); + if (err || !write) + goto out; + + if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) { + err = -EIO; + goto out; + } + + secret->initialized = true; + secret->secret = addr; + + if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) { + struct net_device *dev; + + for_each_netdev(net, dev) { + struct inet6_dev *idev = __in6_dev_get(dev); + + if (idev) { + idev->addr_gen_mode = + IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + } + } else { + struct inet6_dev *idev = ctl->extra1; + + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + +out: + rtnl_unlock(); + + return err; +} static struct addrconf_sysctl_table { @@ -5315,6 +5576,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { /* sentinel */ } }, @@ -5328,7 +5596,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) + if (!t) goto out; for (i = 0; t->addrconf_vars[i].data; i++) { @@ -5340,7 +5608,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (t->sysctl_header == NULL) + if (!t->sysctl_header) goto free; p->sysctl = t; @@ -5356,7 +5624,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { struct addrconf_sysctl_table *t; - if (p->sysctl == NULL) + if (!p->sysctl) return; t = p->sysctl; @@ -5399,17 +5667,20 @@ static int __net_init addrconf_init_net(struct net *net) struct ipv6_devconf *all, *dflt; all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) + if (!all) goto err_alloc_all; dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) + if (!dflt) goto err_alloc_dflt; /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; + dflt->stable_secret.initialized = false; + all->stable_secret.initialized = false; + net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 98cc4cd570e2..d873ceea86e6 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -140,7 +140,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list != NULL); + WARN_ON(idev->mc_list); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index e43e79d0a612..882124ebb438 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,9 +29,7 @@ * Policy Table */ struct ip6addrlbl_entry { -#ifdef CONFIG_NET_NS - struct net *lbl_net; -#endif + possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -129,9 +127,6 @@ static const __net_initconst struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { -#ifdef CONFIG_NET_NS - release_net(p->lbl_net); -#endif kfree(p); } @@ -240,9 +235,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); -#ifdef CONFIG_NET_NS - newp->lbl_net = hold_net(net); -#endif + write_pnet(&newp->lbl_net, net); atomic_set(&newp->refcnt, 1); return newp; } @@ -484,7 +477,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); - if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e8c4400f23e9..eef63b394c5a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -164,11 +164,11 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - WARN_ON(answer_prot->slab == NULL); + WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); - if (sk == NULL) + if (!sk) goto out; sock_init_data(sock, sk); @@ -391,7 +391,7 @@ int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; - if (sk == NULL) + if (!sk) return -EINVAL; /* Free mc lists */ @@ -413,11 +413,11 @@ void inet6_destroy_sock(struct sock *sk) /* Release rx options */ skb = xchg(&np->pktoptions, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); /* Free flowlabels */ @@ -426,7 +426,7 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ opt = xchg(&np->opt, NULL); - if (opt != NULL) + if (opt) sock_kfree_s(sk, opt, opt->tot_len); } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -640,7 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { + if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; @@ -766,6 +766,8 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = 0; + net->ipv6.sysctl.idgen_retries = 3; + net->ipv6.sysctl.idgen_delay = 1 * HZ; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); @@ -824,7 +826,7 @@ static int __init inet6_init(void) struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); + sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index a6727add2624..ed7d4e3f9c10 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -681,7 +681,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); - if (ahp == NULL) + if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index baf2742d1ec4..514ac259f543 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -60,6 +60,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; + ASSERT_RTNL(); + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) @@ -68,12 +70,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (pac == NULL) + if (!pac) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -92,7 +93,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto error; } @@ -130,7 +131,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } error: - rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -146,7 +146,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - rtnl_lock(); + ASSERT_RTNL(); + prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -154,10 +155,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) break; prev_pac = pac; } - if (!pac) { - rtnl_unlock(); + if (!pac) return -ENOENT; - } if (prev_pac) prev_pac->acl_next = pac->acl_next; else @@ -166,7 +165,6 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -224,7 +222,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); - if (aca == NULL) + if (!aca) return NULL; aca->aca_addr = *addr; @@ -270,7 +268,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } aca = aca_alloc(rt, addr); - if (aca == NULL) { + if (!aca) { ip6_rt_put(rt); err = -ENOMEM; goto out; @@ -339,7 +337,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; return __ipv6_dev_ac_dec(idev, addr); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ace8daca5c83..762a58c772b8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -373,7 +373,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) err = -EAGAIN; skb = sock_dequeue_err_skb(sk); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; @@ -463,7 +463,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, err = -EAGAIN; skb = xchg(&np->rxpmtu, NULL); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e48f2c7c5c59..31f1b5d5e2ef 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -495,7 +495,7 @@ static int esp_init_authenc(struct xfrm_state *x) int err; err = -EINVAL; - if (x->ealg == NULL) + if (!x->ealg) goto error; err = -ENAMETOOLONG; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 8af3eb57f438..5c5d23e59da5 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -82,7 +82,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { __be16 _frag_off, *fp; @@ -91,7 +91,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -1; *frag_offp = *fp; @@ -218,7 +218,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -EBADMSG; if (nexthdr == NEXTHDR_ROUTING) { @@ -226,7 +226,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, rh = skb_header_pointer(skb, start, sizeof(_rh), &_rh); - if (rh == NULL) + if (!rh) return -EBADMSG; if (flags && (*flags & IP6_FH_F_SKIP_RH) && @@ -245,7 +245,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -EBADMSG; _frag_off = ntohs(*fp) & ~0x7; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 70bc6abc0639..2367a16eae58 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -199,12 +199,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } if (frh->src_len) - nla_memcpy(&rule6->src.addr, tb[FRA_SRC], - sizeof(struct in6_addr)); + rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]); if (frh->dst_len) - nla_memcpy(&rule6->dst.addr, tb[FRA_DST], - sizeof(struct in6_addr)); + rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]); rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; @@ -250,11 +248,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule6->tclass; if ((rule6->dst.plen && - nla_put(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr)) || + nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && - nla_put(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr))) + nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; @@ -299,19 +295,16 @@ static int __net_init fib6_rules_net_init(struct net *net) ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); - net->ipv6.fib6_rules_ops = ops; - - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, - 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) goto out_fib6_rules_ops; + net->ipv6.fib6_rules_ops = ops; out: return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a5e95199585e..2c2b5d51f15c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -160,8 +160,7 @@ static bool is_ineligible(const struct sk_buff *skb) tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); - if (tp == NULL || - !(*tp & ICMPV6_INFOMSG_MASK)) + if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; @@ -231,7 +230,7 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); - if (op == NULL) + if (!op) return true; return (*op & 0xC0) == 0x80; } @@ -244,7 +243,7 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, int err = 0; skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) + if (!skb) goto out; icmp6h = icmp6_hdr(skb); @@ -479,7 +478,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -582,7 +581,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -839,7 +838,7 @@ static int __net_init icmpv6_sk_init(struct net *net) net->ipv6.icmp_sk = kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv6.icmp_sk == NULL) + if (!net->ipv6.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 29b32206e494..6927f3fb5597 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -112,22 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, return c & (synq_hsize - 1); } -struct request_sock *inet6_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, - lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -135,13 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk != NULL); - *prevp = prev; - return req; + break; } } + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - return NULL; + return req; } EXPORT_SYMBOL_GPL(inet6_csk_search_req); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 051dffb49c90..871641bc1ed4 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,11 +23,9 @@ #include <net/secure_seq.h> #include <net/ip.h> -static unsigned int inet6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; @@ -44,54 +42,6 @@ static unsigned int inet6_ehashfn(struct net *net, inet6_ehash_secret + net_hash_mix(net)); } -static int inet6_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; - const struct in6_addr *faddr = &sk->sk_v6_daddr; - const __u16 lport = inet->inet_num; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet6_ehashfn(net, laddr, lport, faddr, fport); -} - -int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) -{ - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - int twrefcnt = 0; - - WARN_ON(!sk_unhashed(sk)); - - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock(&ilb->lock); - __sk_nulls_add_node_rcu(sk, &ilb->head); - spin_unlock(&ilb->lock); - } else { - unsigned int hash; - struct hlist_nulls_head *list; - spinlock_t *lock; - - sk->sk_hash = hash = inet6_sk_ehashfn(sk); - list = &inet_ehash_bucket(hashinfo, hash)->chain; - lock = inet_ehash_lockp(hashinfo, hash); - spin_lock(lock); - __sk_nulls_add_node_rcu(sk, list); - if (tw) { - WARN_ON(sk->sk_hash != tw->tw_hash); - twrefcnt = inet_twsk_unhash(tw); - } - spin_unlock(lock); - } - - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - return twrefcnt; -} -EXPORT_SYMBOL(__inet6_hash); - /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM @@ -296,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } @@ -320,6 +270,6 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), - __inet6_check_established, __inet6_hash); + __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 263ef4143bff..96dbffff5a24 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1206,7 +1206,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, WARN_ON(fn->fn_flags & RTN_RTINFO); WARN_ON(fn->fn_flags & RTN_TL_ROOT); - WARN_ON(fn->leaf != NULL); + WARN_ON(fn->leaf); children = 0; child = NULL; @@ -1361,7 +1361,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->dst.obsolete > 0) { - WARN_ON(fn != NULL); + WARN_ON(fn); return -ENOENT; } #endif diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f45d6db50a45..d491125011c4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -100,7 +100,6 @@ static void fl_free(struct ip6_flowlabel *fl) if (fl) { if (fl->share == IPV6_FL_S_PROCESS) put_pid(fl->owner.pid); - release_net(fl->fl_net); kfree(fl->opt); kfree_rcu(fl, rcu); } @@ -206,7 +205,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); - if (lfl == NULL) + if (!lfl) break; } } @@ -220,7 +219,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, * with the same label can only appear on another sock */ lfl = __fl_lookup(net, fl->label); - if (lfl != NULL) { + if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); return lfl; @@ -298,10 +297,10 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, { struct ipv6_txoptions *fl_opt = fl->opt; - if (fopt == NULL || fopt->opt_flen == 0) + if (!fopt || fopt->opt_flen == 0) return fl_opt; - if (fl_opt != NULL) { + if (fl_opt) { opt_space->hopopt = fl_opt->hopopt; opt_space->dst0opt = fl_opt->dst0opt; opt_space->srcrt = fl_opt->srcrt; @@ -367,7 +366,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl = kzalloc(sizeof(*fl), GFP_KERNEL); - if (fl == NULL) + if (!fl) goto done; if (olen > 0) { @@ -377,7 +376,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); - if (fl->opt == NULL) + if (!fl->opt) goto done; memset(fl->opt, 0, sizeof(*fl->opt)); @@ -403,7 +402,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, } } - fl->fl_net = hold_net(net); + fl->fl_net = net; fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) @@ -597,7 +596,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) return -EINVAL; fl = fl_create(net, sk, &freq, optval, optlen, &err); - if (fl == NULL) + if (!fl) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); @@ -617,7 +616,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } rcu_read_unlock_bh(); - if (fl1 == NULL) + if (!fl1) fl1 = fl_lookup(net, freq.flr_label); if (fl1) { recheck: @@ -634,7 +633,7 @@ recheck: goto release; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto release; if (fl->linger > fl1->linger) fl1->linger = fl->linger; @@ -654,7 +653,7 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto done; err = mem_check(sk); @@ -662,7 +661,7 @@ release: goto done; fl1 = fl_intern(net, fl, freq.flr_label); - if (fl1 != NULL) + if (fl1) goto recheck; if (!freq.flr_label) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc28b7d42a6d..b5e6cc1d4a73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -223,7 +223,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - if (cand != NULL) + if (cand) return cand; dev = ign->fb_tunnel_dev; @@ -395,7 +395,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL) + if (!t) return; switch (type) { @@ -760,7 +760,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); return 0; @@ -980,7 +980,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1073,7 +1073,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } memset(&p, 0, sizeof(p)); @@ -1105,7 +1105,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1144,7 +1144,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(ign->fb_tunnel_dev)) @@ -1216,6 +1216,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6gre_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_dev_free(struct net_device *dev) @@ -1238,7 +1239,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->flags |= IFF_NOARP; - dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); } @@ -1270,8 +1270,6 @@ static int ip6gre_tunnel_init(struct net_device *dev) u64_stats_init(&ip6gre_tunnel_stats->syncp); } - dev->iflink = tunnel->parms.link; - return 0; } @@ -1313,7 +1311,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) t = rtnl_dereference(ign->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ @@ -1412,7 +1410,7 @@ static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) goto out; if (data[IFLA_GRE_REMOTE]) { - nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (ipv6_addr_any(&daddr)) return -EINVAL; } @@ -1446,10 +1444,10 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); @@ -1480,8 +1478,6 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - dev->iflink = tunnel->parms.link; - return 0; } @@ -1493,6 +1489,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6gre_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_tap_setup(struct net_device *dev) @@ -1503,7 +1500,6 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->netdev_ops = &ip6gre_tap_netdev_ops; dev->destructor = ip6gre_dev_free; - dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -1622,8 +1618,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) || - nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || + nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacdcb4dc762..f2e464eba5ef 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -46,8 +46,7 @@ #include <net/xfrm.h> #include <net/inet_ecn.h> - -int ip6_rcv_finish(struct sk_buff *skb) +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) { if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -183,7 +182,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); @@ -198,7 +198,7 @@ drop: */ -static int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; @@ -221,7 +221,7 @@ resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); - if (ipprot != NULL) { + if (ipprot) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { @@ -277,7 +277,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip6_input_finish); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 46d452a56d3e..e893cd18612f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -124,7 +124,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); - if (skb->next != NULL) + if (skb->next) fptr->frag_off |= htons(IP6_MF); offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 36cf0ab685a0..7fde1f265c90 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include <net/checksum.h> #include <linux/mroute6.h> -static int ip6_finish_output2(struct sk_buff *skb) +static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; @@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && + if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_socket(dev_net(dev), skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, @@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip6_finish_output(struct sk_buff *skb) +static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(skb, ip6_finish_output2); + return ip6_fragment(sk, skb, ip6_finish_output2); else - return ip6_finish_output2(skb); + return ip6_finish_output2(sk, skb); } int ip6_output(struct sock *sk, struct sk_buff *skb) @@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb) return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } @@ -177,7 +178,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (skb2 == NULL) { + if (!skb2) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - dst->dev, dst_output); + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, dst_output_sk); } skb->dev = dst->dev; @@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sk_buff *skb) +static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dst->dev, ip6_forward_finish); error: @@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -629,7 +632,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -658,7 +661,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(offset); - if (frag->next != NULL) + if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; ipv6_hdr(frag)->payload_len = @@ -667,7 +670,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip6_copy_metadata(frag, skb); } - err = output(skb); + err = output(sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -776,7 +779,7 @@ slow_path: fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -800,7 +803,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(frag); + err = output(sk, frag); if (err) goto fail; @@ -824,7 +827,7 @@ static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); + (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, @@ -883,7 +886,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, #endif int err; - if (*dst == NULL) + if (!*dst) *dst = ip6_route_output(net, sk, fl6); err = (*dst)->error; @@ -1046,11 +1049,11 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ skb = skb_peek_tail(queue); - if (skb == NULL) { + if (!skb) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); - if (skb == NULL) + if (!skb) return err; /* reserve space for Hardware header */ @@ -1080,7 +1083,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); + ipv6_select_ident(sock_net(sk), &fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; append: @@ -1108,7 +1111,7 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { - if (skb == NULL) { + if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; @@ -1140,7 +1143,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, return -EINVAL; v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(v6_cork->opt == NULL)) + if (unlikely(!v6_cork->opt)) return -ENOBUFS; v6_cork->opt->tot_len = opt->tot_len; @@ -1332,7 +1335,7 @@ alloc_new_skb: else fraggap = 0; /* update mtu and maxfraglen if necessary */ - if (skb == NULL || skb_prev == NULL) + if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); @@ -1384,10 +1387,10 @@ alloc_new_skb: skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); - if (unlikely(skb == NULL)) + if (unlikely(!skb)) err = -ENOBUFS; } - if (skb == NULL) + if (!skb) goto error; /* * Fill in the control structures @@ -1579,7 +1582,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); - if (skb == NULL) + if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ddd94eca19b3..5cafd92c2312 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -64,12 +64,6 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) -#else -#define IP6_TNL_TRACE(x...) do {;} while(0) -#endif - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -137,7 +131,7 @@ struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) struct dst_entry *dst = t->dst_cache; if (dst && dst->obsolete && - dst->ops->check(dst, t->dst_cookie) == NULL) { + !dst->ops->check(dst, t->dst_cookie)) { t->dst_cache = NULL; dst_release(dst); return NULL; @@ -331,7 +325,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -502,7 +496,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); - if (t == NULL) + if (!t) goto out; tproto = ACCESS_ONCE(t->parms.proto); @@ -813,7 +807,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { struct pcpu_sw_netstats *tstats; tproto = ACCESS_ONCE(t->parms.proto); @@ -1106,7 +1100,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; @@ -1270,8 +1264,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); @@ -1280,7 +1272,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1523,6 +1515,13 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return 0; } +int ip6_tnl_get_iflink(const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + return t->parms.link; +} +EXPORT_SYMBOL(ip6_tnl_get_iflink); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, @@ -1531,6 +1530,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats = ip6_get_stats, + .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1646,12 +1646,10 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1745,10 +1743,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || @@ -1821,7 +1817,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 32d9b268e7d8..bba8903e871f 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,7 +62,8 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, @@ -97,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(sk, skb, dev); return 0; } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5fb9e212eca8..ed9d681207fa 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -218,7 +218,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p sprintf(name, "ip6_vti%%d"); dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); @@ -305,7 +304,7 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_lock(); t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -601,8 +600,6 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; - - dev->iflink = p->link; } /** @@ -716,7 +713,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) + if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -736,7 +733,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -767,7 +764,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -ENOENT; vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, 0); - if (t == NULL) + if (!t) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -808,6 +805,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; /** @@ -897,12 +895,10 @@ static void vti6_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_VTI_LINK]); if (data[IFLA_VTI_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]); if (data[IFLA_VTI_IKEY]) parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); @@ -983,10 +979,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || - nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) goto nla_put_failure; @@ -1027,7 +1021,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { unregister_netdevice_queue(t->dev, &list); t = rtnl_dereference(t->next); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 312e0ff47339..74ceb73c1c9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -56,9 +56,7 @@ struct mr6_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock *mroute6_sk; struct timer_list ipmr_expire_timer; @@ -175,7 +173,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, } mrt = ip6mr_get_table(rule->fr_net, rule->table); - if (mrt == NULL) + if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; @@ -239,7 +237,7 @@ static int __net_init ip6mr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv6.mr6_tables); mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) { + if (!mrt) { err = -ENOMEM; goto err1; } @@ -307,11 +305,11 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) unsigned int i; mrt = ip6mr_get_table(net, id); - if (mrt != NULL) + if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (mrt == NULL) + if (!mrt) return NULL; mrt->id = id; write_pnet(&mrt->net, net); @@ -410,7 +408,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); iter->mrt = mrt; @@ -494,7 +492,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); it->mrt = mrt; @@ -667,7 +665,7 @@ static int pim6_rcv(struct sk_buff *skb) dev_hold(reg_dev); read_unlock(&mrt_lock); - if (reg_dev == NULL) + if (!reg_dev) goto drop; skb->mac_header = skb->network_header; @@ -720,8 +718,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static int reg_vif_get_iflink(const struct net_device *dev) +{ + return 0; +} + static const struct net_device_ops reg_vif_netdev_ops = { .ndo_start_xmit = reg_vif_xmit, + .ndo_get_iflink = reg_vif_get_iflink, }; static void reg_vif_setup(struct net_device *dev) @@ -745,7 +749,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) sprintf(name, "pim6reg%u", mrt->id); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -754,7 +758,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) free_netdev(dev); return NULL; } - dev->iflink = 0; if (dev_open(dev)) goto failure; @@ -994,7 +997,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & MIFF_REGISTER) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -1074,7 +1077,7 @@ skip: static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c == NULL) + if (!c) return NULL; c->mfc_un.res.minvif = MAXMIFS; return c; @@ -1083,7 +1086,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) static struct mfc6_cache *ip6mr_cache_alloc_unres(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); - if (c == NULL) + if (!c) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; @@ -1200,7 +1203,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (mrt->mroute6_sk == NULL) { + if (!mrt->mroute6_sk) { kfree_skb(skb); return -EINVAL; } @@ -1495,7 +1498,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return -EINVAL; c = ip6mr_cache_alloc(); - if (c == NULL) + if (!c) return -ENOMEM; c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; @@ -1665,7 +1668,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT6_INIT) { @@ -1814,7 +1817,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (optname) { @@ -1861,7 +1864,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1935,7 +1938,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1983,13 +1986,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif -static inline int ip6mr_forward2_finish(struct sk_buff *skb) +static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -2005,7 +2008,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct dst_entry *dst; struct flowi6 fl6; - if (vif->dev == NULL) + if (!vif->dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 @@ -2061,7 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ip6mr_forward2_finish); out_free: @@ -2194,7 +2198,7 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); - if (cache == NULL) { + if (!cache) { int vif = ip6mr_find_vif(mrt, skb->dev); if (vif >= 0) @@ -2206,7 +2210,7 @@ int ip6_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache == NULL) { + if (!cache) { int vif; vif = ip6mr_find_vif(mrt, skb->dev); @@ -2245,13 +2249,13 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) return -EMSGSIZE; mp_attr = nla_nest_start(skb, RTA_MULTIPATH); - if (mp_attr == NULL) + if (!mp_attr) return -EMSGSIZE; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); - if (nhp == NULL) { + if (!nhp) { nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } @@ -2284,7 +2288,7 @@ int ip6mr_get_route(struct net *net, struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; read_lock(&mrt_lock); @@ -2309,7 +2313,7 @@ int ip6mr_get_route(struct net *net, } dev = skb->dev; - if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { + if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { read_unlock(&mrt_lock); return -ENODEV; } @@ -2361,7 +2365,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2380,8 +2384,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || - nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) + if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || + nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; err = __ip6mr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ @@ -2426,7 +2430,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8d766d9100cb..63e6956917c9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -85,7 +85,7 @@ int ip6_ra_control(struct sock *sk, int sel) return 0; } } - if (new_ra == NULL) { + if (!new_ra) { write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; } @@ -117,6 +117,25 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } +static bool setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -124,8 +143,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; + bool needs_rtnl = setsockopt_needs_rtnl(optname); - if (optval == NULL) + if (!optval) val = 0; else { if (optlen >= sizeof(int)) { @@ -140,6 +160,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -370,7 +392,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ if (optlen == 0) optval = NULL; - else if (optval == NULL) + else if (!optval) goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) @@ -421,7 +443,7 @@ sticky_done: if (optlen == 0) goto e_inval; - else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + else if (optlen < sizeof(struct in6_pktinfo) || !optval) goto e_inval; if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { @@ -460,7 +482,7 @@ sticky_done: opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; - if (opt == NULL) + if (!opt) break; memset(opt, 0, sizeof(*opt)); @@ -624,10 +646,10 @@ done: psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) retv = ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); else retv = ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -660,7 +682,7 @@ done: psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; @@ -837,11 +859,15 @@ pref_skip_coa: } release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return retv; e_inval: release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return -EINVAL; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce107c8aab3..083b2927fc67 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -140,6 +140,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) struct net *net = sock_net(sk); int err; + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -155,13 +157,12 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); - if (mc_lst == NULL) + if (!mc_lst) return -ENOMEM; mc_lst->next = NULL; mc_lst->addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); @@ -172,8 +173,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { - rtnl_unlock(); + if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -190,7 +190,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { - rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; } @@ -198,10 +197,9 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = np->ipv6_mc_list; rcu_assign_pointer(np->ipv6_mc_list, mc_lst); - rtnl_unlock(); - return 0; } +EXPORT_SYMBOL(ipv6_sock_mc_join); /* * socket leave on multicast group @@ -213,10 +211,11 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rtnl_lock(); for (lnk = &np->ipv6_mc_list; (mc_lst = rtnl_dereference(*lnk)) != NULL; lnk = &mc_lst->next) { @@ -227,7 +226,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev != NULL) { + if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -235,17 +234,16 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &mc_lst->addr); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - rtnl_unlock(); atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); return 0; } } - rtnl_unlock(); return -EADDRNOTAVAIL; } +EXPORT_SYMBOL(ipv6_sock_mc_drop); /* called with rcu_read_lock() */ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, @@ -438,7 +436,7 @@ done: read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) - return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); + err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; } @@ -825,7 +823,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, struct ifmcaddr6 *mc; mc = kzalloc(sizeof(*mc), GFP_ATOMIC); - if (mc == NULL) + if (!mc) return NULL; setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); @@ -862,7 +860,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -EINVAL; write_lock_bh(&idev->lock); @@ -1330,7 +1328,7 @@ int igmp6_event_query(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return 0; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1445,7 +1443,7 @@ int igmp6_event_report(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -1646,8 +1644,9 @@ static void mld_sendpack(struct sk_buff *skb) payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net->ipv6.igmp_sk, skb, NULL, skb->dev, + dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -1964,7 +1963,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); - if (skb == NULL) { + if (!skb) { rcu_read_lock(); IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); @@ -2009,8 +2008,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } skb_dst_set(skb, dst); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb->dev, dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); @@ -2613,7 +2612,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2659,7 +2658,7 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2728,10 +2727,10 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) continue; read_lock_bh(&idev->lock); im = idev->mc_list; - if (likely(im != NULL)) { + if (likely(im)) { spin_lock_bh(&im->mca_lock); psf = im->mca_sources; - if (likely(psf != NULL)) { + if (likely(psf)) { state->im = im; state->idev = idev; break; @@ -2752,7 +2751,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2806,11 +2805,11 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im != NULL)) { + if (likely(state->im)) { spin_unlock_bh(&state->im->mca_lock); state->im = NULL; } - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2907,20 +2906,32 @@ static int __net_init igmp6_net_init(struct net *net) inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, + SOCK_RAW, IPPROTO_ICMPV6, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n", + err); + goto out_sock_create; + } + err = igmp6_proc_init(net); if (err) - goto out_sock_create; -out: - return err; + goto out_sock_create_autojoin; + + return 0; +out_sock_create_autojoin: + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); out_sock_create: inet_ctl_sock_destroy(net->ipv6.igmp_sk); - goto out; +out: + return err; } static void __net_exit igmp6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.igmp_sk); + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); igmp6_proc_exit(net); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 14ecdaf06bf7..96f153c0846b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -84,6 +84,7 @@ do { \ static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); +static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -117,7 +118,9 @@ static const struct neigh_ops ndisc_direct_ops = { struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), + .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, + .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, @@ -294,6 +297,11 @@ static u32 ndisc_hash(const void *pkey, return ndisc_hashfn(pkey, dev, hash_rnd); } +static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) +{ + return neigh_key_eq128(n, pkey); +} + static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; @@ -303,7 +311,7 @@ static int ndisc_constructor(struct neighbour *neigh) bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { return -EINVAL; } @@ -348,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); @@ -361,7 +369,7 @@ static void pndisc_destructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); @@ -455,8 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb, idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, + dst_output_sk); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); @@ -552,7 +561,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, int optlen = 0; struct nd_msg *msg; - if (saddr == NULL) { + if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))) return; @@ -1022,13 +1031,13 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); - if (nlh == NULL) { + if (!nlh) { goto nla_put_failure; } @@ -1041,8 +1050,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); - if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), - &ipv6_hdr(ra)->saddr)) + if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -1096,7 +1104,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) */ in6_dev = __in6_dev_get(skb->dev); - if (in6_dev == NULL) { + if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return; @@ -1191,11 +1199,11 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", rt, lifetime, skb->dev->name); - if (rt == NULL && lifetime) { + if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); - if (rt == NULL) { + if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); @@ -1203,7 +1211,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); - if (neigh == NULL) { + if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 398377a9d018..d958718b5031 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,7 +84,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; @@ -98,7 +98,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a069822936e6..ca6998345b42 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +if NF_TABLES + config NF_TABLES_IPV6 - depends on NF_TABLES tristate "IPv6 nf_tables support" help This option enables the IPv6 support for nf_tables. +if NF_TABLES_IPV6 + config NFT_CHAIN_ROUTE_IPV6 - depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" help This option enables the "route" chain for IPv6 in nf_tables. This @@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -config NF_REJECT_IPV6 - tristate "IPv6 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV6 - depends on NF_TABLES_IPV6 select NF_REJECT_IPV6 default NFT_REJECT tristate +endif # NF_TABLES_IPV6 +endif # NF_TABLES + +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_LOG_IPV6 tristate "IPv6 packet logging" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bb00c6f2a885..1a732a1d3c8e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -9,7 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> @@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, @@ -314,8 +317,7 @@ ip6t_next_entry(const struct ip6t_entry *entry) unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -330,8 +332,8 @@ ip6t_do_table(struct sk_buff *skb, unsigned int addend; /* Initialization */ - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -339,8 +341,8 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.family = NFPROTO_IPV6; acpar.hooknum = hook; @@ -390,7 +392,7 @@ ip6t_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + trace_packet(skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 544b0a9da1b5..12331efd49cf 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -83,7 +83,8 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ipv6.proto != IPPROTO_TCP || + if (!(e->ipv6.flags & IP6T_F_PROTO) || + e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { pr_info("TCP_RESET illegal for non-tcp\n"); return -EINVAL; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a0d17270117c..6edb7b106de7 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -315,11 +315,9 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ca7f6c128086..5c33d8abc077 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -33,13 +33,11 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, - net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 307bbb782d14..b551f5b79fe2 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) +ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -57,8 +57,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, - dev_net(out)->ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, + dev_net(state->out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -77,17 +77,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, out); + return ip6t_mangle_out(skb, state); if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(out)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state, + dev_net(state->out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(in)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state, + dev_net(state->in)->ipv6.ip6table_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index b0634ac996b7..c3a7f7af0ed4 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -32,49 +32,40 @@ static const struct xt_table nf_nat_ipv6_table = { static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat); } static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5274740acecc..0b33caad2b69 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -20,13 +20,11 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, - net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ab3b0219ecfa..fcef83c25f7b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -37,13 +37,11 @@ static const struct xt_table security_table = { static unsigned int ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_security); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b68d0e59c1f8..4ba0c34c627b 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -97,9 +97,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -135,9 +133,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops, static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -171,25 +167,21 @@ out: static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct ipv6hdr)) { net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { @@ -290,10 +282,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, - &tuple->src.u3.ip6) || - nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, - &tuple->dst.u3.ip6)) + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) goto nla_put_failure; return 0; @@ -312,10 +302,8 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) return -EINVAL; - memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), - sizeof(u_int32_t) * 4); - memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), - sizeof(u_int32_t) * 4); + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); return 0; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e70382e4dfb5..a45db0b4785c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -54,9 +54,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct sk_buff *reasm; @@ -77,9 +75,9 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, - (struct net_device *) in, (struct net_device *) out, - okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, + state->in, state->out, + state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index ddf07e6f59d7..8dd869642f45 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index c5812e1c1ffb..e76900e0aa92 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -263,11 +263,10 @@ EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { struct nf_conn *ct; @@ -318,7 +317,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, in, out, ct); + ret = do_chain(ops, skb, state, ct); if (ret != NF_ACCEPT) return ret; @@ -332,7 +331,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) goto oif_changed; } break; @@ -341,7 +340,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) goto oif_changed; } @@ -355,17 +354,16 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -376,11 +374,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { #ifdef CONFIG_XFRM @@ -394,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -418,11 +415,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { const struct nf_conn *ct; @@ -434,7 +430,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index d05b36440e8b..94b4c6dfb400 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -13,6 +13,7 @@ #include <net/ip6_checksum.h> #include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv6/nf_reject.h> const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, @@ -65,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - __be16 protocol, int hoplimit) + __u8 protocol, int hoplimit) { struct ipv6hdr *ip6h; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); @@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); nskb->protocol = htons(ETH_P_IPV6); ip6h->payload_len = htons(sizeof(struct tcphdr)); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), @@ -208,4 +210,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset6); +static bool reject6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + proto = ip6h->nexthdr; + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, + unsigned char code, unsigned int hooknum) +{ + if (!reject6_csum_ok(skb_in, hooknum)) + return; + + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach6); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 0d812b31277d..c8148ba76d1a 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -18,14 +18,12 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) return NF_DROP; return nft_do_chain(&pkt, ops); @@ -33,9 +31,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) @@ -44,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv6(ops, skb, in, out, okfn); + return nft_do_chain_ipv6(ops, skb, state); } struct nft_af_info nft_af_ipv6 __read_mostly = { diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 1c4b75dd425b..951bb458b7bd 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -26,51 +26,42 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv6(&pkt, ops, skb, state); return nft_do_chain(&pkt, ops); } static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 42031299585e..0dafdaac5e17 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -24,9 +24,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { unsigned int ret; struct nft_pktinfo pkt; @@ -35,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, u32 mark, flowlabel; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) return NF_DROP; /* save source/dest address, mark, hoplimit, flowlabel, priority */ diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 529c119cbb14..cd1ac1637a05 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -18,19 +18,16 @@ #include <net/netfilter/ipv6/nf_nat_masquerade.h> static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); range.flags = priv->flags; - verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); - - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); } static struct nft_expr_type nft_masq_ipv6_type; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 11820b6b3613..effd393bd517 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -18,26 +18,25 @@ #include <net/netfilter/nf_nat_redirect.h> static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min], range.max_proto.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max], range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + pkt->ops->hooknum); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index f73285924144..d0d1540ecf87 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -20,7 +20,7 @@ #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -34,9 +34,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, case NFT_REJECT_TCP_RST: nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); break; + default: + break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static struct nft_expr_type nft_reject_ipv6_type; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 74581f706c4d..85892af57364 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,13 +9,14 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, - struct in6_addr *src) +static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, + struct in6_addr *dst, struct in6_addr *src) { u32 hash, id; hash = __ipv6_addr_jhash(dst, hashrnd); hash = __ipv6_addr_jhash(src, hash); + hash ^= net_hash_mix(net); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -36,7 +37,7 @@ static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, * * The network header must be set before calling this. */ -void ipv6_proxy_select_ident(struct sk_buff *skb) +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; @@ -53,20 +54,21 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, + id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, &rt->rt6i_src.addr); fhdr->identification = htonl(id); } @@ -134,7 +136,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) EXPORT_SYMBOL(ip6_dst_hoplimit); #endif -int __ip6_local_out(struct sk_buff *skb) +static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int len; @@ -144,19 +146,30 @@ int __ip6_local_out(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); +} + +int __ip6_local_out(struct sk_buff *skb) +{ + return __ip6_local_out_sk(skb->sk, skb); } EXPORT_SYMBOL_GPL(__ip6_local_out); -int ip6_local_out(struct sk_buff *skb) +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; - err = __ip6_local_out(skb); + err = __ip6_local_out_sk(sk, skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } +EXPORT_SYMBOL_GPL(ip6_local_out_sk); + +int ip6_local_out(struct sk_buff *skb) +{ + return ip6_local_out_sk(skb->sk, skb); +} EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a2dfff6ff227..263a5164a6f5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dae7f1a1e464..8072bd4139b7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -32,7 +32,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> #include <linux/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/net_namespace.h> @@ -172,7 +172,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk == NULL) + if (!sk) goto out; net = dev_net(skb->dev); @@ -367,7 +367,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk != NULL) { + if (sk) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; saddr = &ip6h->saddr; @@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -631,7 +630,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); @@ -653,8 +652,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, goto error_fault; IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) @@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -791,7 +789,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -833,13 +831,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); @@ -1132,7 +1130,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) + if (skb) amount = skb_tail_pointer(skb) - skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index d7d70e69973b..8ffa2c8cce77 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -430,7 +430,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int i, plen = 0; clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) + if (!clone) goto out_oom; clone->next = head->next; head->next = clone; @@ -552,7 +552,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq != NULL) { + if (fq) { int ret; spin_lock(&fq->q.lock); @@ -632,7 +632,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table = ip6_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; @@ -648,7 +648,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) } hdr = register_net_sysctl(net, "net/ipv6", table); - if (hdr == NULL) + if (!hdr) goto err_reg; net->ipv6.sysctl.frags_hdr = hdr; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4688bd4d7f59..5c48293ff062 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -194,7 +194,6 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -236,7 +235,6 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .mtu = ip6_blackhole_mtu, @@ -1478,7 +1476,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, int remaining; u32 *mp; - if (cfg->fc_mx == NULL) + if (!cfg->fc_mx) return 0; mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); @@ -2400,6 +2398,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PREF] = { .type = NLA_U8 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2407,6 +2406,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + unsigned int pref; int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2438,7 +2438,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { - nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } @@ -2461,7 +2461,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, } if (tb[RTA_PREFSRC]) - nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); + cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); if (tb[RTA_OIF]) cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); @@ -2482,6 +2482,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); } + if (tb[RTA_PREF]) { + pref = nla_get_u8(tb[RTA_PREF]); + if (pref != ICMPV6_ROUTER_PREF_LOW && + pref != ICMPV6_ROUTER_PREF_HIGH) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + cfg->fc_flags |= RTF_PREF(pref); + } + err = 0; errout: return err; @@ -2511,7 +2519,7 @@ beginning: nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_gateway = nla_get_in6_addr(nla); r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -2585,7 +2593,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + + nla_total_size(1); /* RTA_PREF */ } static int rt6_fill_node(struct net *net, @@ -2660,19 +2669,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - if (nla_put(skb, RTA_DST, 16, dst)) + if (nla_put_in6_addr(skb, RTA_DST, dst)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { - if (nla_put(skb, RTA_SRC, 16, src)) + if (nla_put_in6_addr(skb, RTA_SRC, src)) goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) goto nla_put_failure; #endif if (iif) { @@ -2696,14 +2705,14 @@ static int rt6_fill_node(struct net *net, } else if (dst) { struct in6_addr saddr_buf; if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && - nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; saddr_buf = rt->rt6i_prefsrc.addr; - if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } @@ -2711,7 +2720,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2726,6 +2735,9 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e4cbd5798eba..ac35a28599be 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -118,7 +118,7 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, return t; } t = rcu_dereference(sitn->tunnels_wc[0]); - if ((t != NULL) && (t->dev->flags & IFF_UP)) + if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } @@ -251,7 +251,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -555,7 +555,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL) + if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { @@ -671,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && @@ -733,7 +733,7 @@ static int ipip_rcv(struct sk_buff *skb) iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { if (tunnel->parms.iph.protocol != IPPROTO_IPIP && tunnel->parms.iph.protocol != 0) goto drop; @@ -838,7 +838,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -867,7 +867,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -983,7 +983,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -1076,7 +1076,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dev->iflink = tunnel->parms.link; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) @@ -1158,7 +1157,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } @@ -1206,7 +1205,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1242,7 +1241,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -ENOENT; t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1336,6 +1335,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; static void ipip6_dev_free(struct net_device *dev) @@ -1366,7 +1366,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->mtu = ETH_DATA_LEN - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); - dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; dev->features |= SIT_FEATURES; @@ -1530,8 +1529,7 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], if (data[IFLA_IPTUN_6RD_PREFIX]) { ret = true; - nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], - sizeof(struct in6_addr)); + ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { @@ -1683,8 +1681,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, @@ -1694,10 +1692,10 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD - if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), - &tunnel->ip6rd.prefix) || - nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, - tunnel->ip6rd.relay_prefix) || + if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, + &tunnel->ip6rd.prefix) || + nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, @@ -1795,7 +1793,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7337fc7947e2..21bc2eb53c57 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -189,13 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); if (!req) goto out; ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->listener = NULL; + treq->tfo_listener = false; if (security_inet_conn_request(sk, skb, req)) goto out_free; @@ -220,7 +221,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); - req->expires = 0UL; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index c5c10fafcfe2..abcc79f649b3 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -54,6 +54,20 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "idgen_retries", + .data = &init_net.ipv6.sysctl.idgen_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "idgen_delay", + .data = &init_net.ipv6.sysctl.idgen_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; @@ -93,6 +107,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; + ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; + ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) @@ -163,7 +179,7 @@ int ipv6_sysctl_register(void) int err = -ENOMEM; ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); - if (ip6_header == NULL) + if (!ip6_header) goto out; err = register_pernet_subsys(&ipv6_sysctl_net_ops); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1f5e62229aaa..ad51df85aa00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -104,19 +104,6 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_v6_hash(struct sock *sk) -{ - if (sk->sk_state != TCP_CLOSE) { - if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { - tcp_prot.hash(sk); - return; - } - local_bh_disable(); - __inet6_hash(sk, NULL); - local_bh_enable(); - } -} - static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, @@ -154,7 +141,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; fl6_sock_release(flowlabel); } @@ -233,11 +220,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; - } else { - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &sk->sk_v6_rcv_saddr); } + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -263,7 +247,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } - if (saddr == NULL) { + if (!saddr) { saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; } @@ -340,18 +324,20 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + struct net *net = dev_net(skb->dev); + struct request_sock *fastopen; struct ipv6_pinfo *np; - struct sock *sk; - int err; struct tcp_sock *tp; - struct request_sock *fastopen; __u32 seq, snd_una; - struct net *net = dev_net(skb->dev); + struct sock *sk; + int err; - sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, - th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = __inet6_lookup_established(net, &tcp_hashinfo, + &hdr->daddr, th->dest, + &hdr->saddr, ntohs(th->source), + skb->dev->ifindex); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -361,6 +347,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -375,7 +364,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -419,37 +407,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, inet6_iif(skb)); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - * an established socket here. - */ - WARN_ON(req->sk != NULL); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - inet_csk_reqsk_queue_drop(sk, req, prev); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * is already accepted it is treated as a connected one below. */ - if (fastopen && fastopen->sk == NULL) + if (fastopen && !fastopen->sk) break; if (!sock_owned_by_user(sk)) { @@ -497,7 +460,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && (ireq->pktopts != NULL)) + if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); @@ -523,17 +486,11 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } -static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); -} - static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, int optlen) { @@ -619,9 +576,9 @@ clear_hash_noput: return 1; } -static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_skb(char *md5_hash, + const struct tcp_md5sig_key *key, const struct sock *sk, - const struct request_sock *req, const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; @@ -629,12 +586,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct hash_desc *desc; const struct tcphdr *th = tcp_hdr(skb); - if (sk) { - saddr = &inet6_sk(sk)->saddr; + if (sk) { /* valid for establish/request sockets */ + saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; - } else if (req) { - saddr = &inet_rsk(req)->ir_v6_loc_addr; - daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -670,8 +624,7 @@ clear_hash_noput: return 1; } -static int __tcp_v6_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -685,44 +638,32 @@ static int __tcp_v6_inbound_md5_hash(struct sock *sk, /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) - return 0; + return false; if (hash_expected && !hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return 1; + return true; } if (!hash_expected && hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return 1; + return true; } /* check the signature */ genhash = tcp_v6_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); - return 1; + return true; } - return 0; -} - -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - int ret; - - rcu_read_lock(); - ret = __tcp_v6_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; + return false; } - #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, @@ -734,8 +675,6 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - ireq->ir_iif = sk->sk_bound_dev_if; - /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -774,7 +713,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v6_reqsk_md5_lookup, + .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif .init_req = tcp_v6_init_req, @@ -811,7 +750,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); - if (buff == NULL) + if (!buff) return; skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); @@ -931,7 +870,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -997,17 +936,19 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); + struct request_sock *req; struct sock *nsk; /* Find possible connection requests. */ - req = inet6_csk_search_req(sk, &prev, th->source, + req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) - return tcp_check_req(sk, skb, req, prev, false); - + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -1067,7 +1008,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); - if (newsk == NULL) + if (!newsk) return NULL; newtcp6sk = (struct tcp6_sock *)newsk; @@ -1079,11 +1020,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1128,7 +1065,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } newsk = tcp_create_openreq_child(sk, req, skb); - if (newsk == NULL) + if (!newsk) goto out_nonewsk; /* @@ -1170,7 +1107,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq->pktopts != NULL) { + if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); @@ -1215,7 +1152,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); - if (key != NULL) { + if (key) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1232,7 +1169,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_done(newsk); goto out; } - __inet6_hash(newsk, NULL); + __inet_hash(newsk, NULL); return newsk; @@ -1547,9 +1484,9 @@ do_time_wait: &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif(skb)); - if (sk2 != NULL) { + if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); sk = sk2; tcp_v6_restore_cb(skb); @@ -1595,7 +1532,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) @@ -1700,9 +1637,9 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, kuid_t uid) + struct request_sock *req, int i, kuid_t uid) { - int ttd = req->expires - jiffies; + long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; @@ -1791,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { + long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; @@ -1838,7 +1775,7 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); + get_openreq6(seq, v, st->num, st->uid); break; } out: @@ -1902,7 +1839,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .hash = tcp_v6_hash, + .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1ab77105b4c..d883c9204c01 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -41,8 +41,8 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) return tcp_gro_complete(skb); } -struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d048d46779fc..3477c919fcc8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,11 +53,11 @@ #include <trace/events/skb.h> #include "udp_impl.h" -static unsigned int udp6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +static u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } -static unsigned int udp6_portaddr_hash(struct net *net, - const struct in6_addr *addr6, - unsigned int port) +static u32 udp6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) { unsigned int hash, mix = net_hash_mix(net); @@ -120,7 +120,6 @@ static unsigned int udp6_portaddr_hash(struct net *net, return hash ^ port; } - int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = @@ -385,14 +384,12 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be } EXPORT_SYMBOL_GPL(udp6_lib_lookup); - /* * This should be easy, if there is something there we * return it, otherwise we block. */ -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -551,7 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -649,7 +646,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -750,7 +747,7 @@ static void flush_stack(struct sock **stack, unsigned int count, for (i = 0; i < count; i++) { sk = stack[i]; - if (likely(skb1 == NULL)) + if (likely(!skb1)) skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); if (!skb1) { atomic_inc(&sk->sk_drops); @@ -900,7 +897,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * for sock caches... i'll skip this for now. */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk != NULL) { + if (sk) { int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1101,8 +1098,7 @@ out: return err; } -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); @@ -1164,12 +1160,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, do_udp_sendmsg: if (__ipv6_only_sock(sk)) return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); } } if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). @@ -1209,7 +1205,7 @@ do_udp_sendmsg: fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -1257,14 +1253,14 @@ do_udp_sendmsg: } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; connected = 0; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); @@ -1557,7 +1553,6 @@ static struct inet_protosw udpv6_protosw = { .flags = INET_PROTOSW_PERMANENT, }; - int __init udpv6_init(void) { int ret; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index c779c3c90b9d..0682c031ccdc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index be2c0ba82c85..7441e1e63893 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -54,7 +54,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Set the IPv6 fragment id if not set yet */ if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); segs = NULL; goto out; @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr->nexthdr = nexthdr; fptr->reserved = 0; if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f48fbe4d16f5..74bd17882a2f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -42,7 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, ip6_rcv_finish); return -1; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 9949a356d62c..1e205c3253ac 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; - ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; err = 0; out: return err; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 010f8bd2d577..09c76a7b474d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -128,10 +128,10 @@ int xfrm6_output_finish(struct sk_buff *skb) IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm6_output(struct sk_buff *skb) +static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; @@ -140,7 +140,7 @@ static int __xfrm6_output(struct sk_buff *skb) #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif @@ -160,14 +160,15 @@ static int __xfrm6_output(struct sk_buff *skb) if (x->props.mode == XFRM_MODE_TUNNEL && ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); + return ip6_fragment(sk, skb, + x->outer_mode->afinfo->output_finish); } - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8d2d01b4800a..f337a908a76a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net, return -EHOSTUNREACH; dev = ip6_dst_idev(dst)->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, - (struct in6_addr *)&daddr->a6, 0, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } @@ -293,7 +291,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, @@ -371,7 +368,7 @@ static void __net_exit xfrm6_net_exit(struct net *net) { struct ctl_table *table; - if (net->ipv6.sysctl.xfrm6_hdr == NULL) + if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f11ad1d95e0e..4ea5d7497b5f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1688,8 +1688,7 @@ out: return rc; } -static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); @@ -1754,8 +1753,8 @@ out: } -static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 568edc72d737..ee0ea25c8e7a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock) } /* - * Function irda_sendmsg (iocb, sock, msg, len) + * Function irda_sendmsg (sock, msg, len) * * Send message down to TinyTP. This function is used for both STREAM and * SEQPACK services. This is possible since it forces the client to * fragment the message if necessary */ -static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1348,13 +1347,13 @@ out: } /* - * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags) + * Function irda_recvmsg_dgram (sock, msg, size, flags) * * Try to receive message and copy it to user. The frame is discarded * after being read, regardless of how much the user actually read */ -static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_recvmsg_stream (iocb, sock, msg, size, flags) + * Function irda_recvmsg_stream (sock, msg, size, flags) */ -static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_sendmsg_dgram (iocb, sock, msg, len) + * Function irda_sendmsg_dgram (sock, msg, len) * * Send message down to TinyTP for the unreliable sequenced * packet service... * */ -static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1594,14 +1593,14 @@ out: } /* - * Function irda_sendmsg_ultra (iocb, sock, msg, len) + * Function irda_sendmsg_ultra (sock, msg, len) * * Send message down to IrLMP for the unreliable Ultra * packet service... */ #ifdef CONFIG_IRDA_ULTRA -static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 53d931172088..6daa52a18d40 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, (void *) prmdata, 8); } -static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1315,8 +1315,8 @@ static void iucv_process_message_q(struct sock *sk) } } -static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index f8ac939d52b4..f0d52d721b3a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -709,7 +709,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; - sin6->sin6_addr = *(struct in6_addr *)xaddr->a6; + sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } @@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } #endif -static int pfkey_sendmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len) +static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; @@ -3630,8 +3629,7 @@ out: return err ? : len; } -static int pfkey_recvmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len, +static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 781b3a226ba7..4b552873b556 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -74,7 +74,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) priv->dev = dev; eth_hw_addr_random(dev); - memset(&dev->broadcast[0], 0xff, 6); + eth_broadcast_addr(dev->broadcast); dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; return 0; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 05dfc8aa36af..79649937ec71 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -385,7 +385,7 @@ drop: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) +static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct sk_buff *skb; int rc; @@ -506,7 +506,7 @@ no_route: goto out; } -static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8611f1b63141..d1ded3777815 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -480,8 +480,7 @@ out: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -643,9 +642,8 @@ do_confirm: goto done; } -static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index b4e923f77954..9e13c2ff8789 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -205,9 +205,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info #endif if (info->attrs[L2TP_ATTR_IP_SADDR] && info->attrs[L2TP_ATTR_IP_DADDR]) { - cfg.local_ip.s_addr = nla_get_be32( + cfg.local_ip.s_addr = nla_get_in_addr( info->attrs[L2TP_ATTR_IP_SADDR]); - cfg.peer_ip.s_addr = nla_get_be32( + cfg.peer_ip.s_addr = nla_get_in_addr( info->attrs[L2TP_ATTR_IP_DADDR]); } else { ret = -EINVAL; @@ -376,15 +376,17 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (np) { - if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), - &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), - &sk->sk_v6_daddr)) + if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, + &np->saddr) || + nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif - if (nla_put_be32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) || - nla_put_be32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr)) + if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, + inet->inet_saddr) || + nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, + inet->inet_daddr)) goto nla_put_failure; break; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index cc7a828fc914..e9b0dec56b8e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) /* Receive message. This is the recvmsg for the PPPoL2TP socket. */ -static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, - int flags) +static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int err; struct sk_buff *skb; @@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) * when a user application does a sendmsg() on the session socket. L2TP and * PPP headers must be inserted into the user's data. */ -static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, +static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { static const unsigned char ppph[2] = { 0xff, 0x03 }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2c0b83ce43bd..17a8dff06090 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -704,8 +704,8 @@ out: * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; @@ -878,8 +878,7 @@ copy_uaddr: * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7869bb40acaa..208df7c0b6ea 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -85,11 +85,15 @@ struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], return tfm; err = crypto_aead_setkey(tfm, key, key_len); - if (!err) - err = crypto_aead_setauthsize(tfm, mic_len); - if (!err) - return tfm; + if (err) + goto free_aead; + err = crypto_aead_setauthsize(tfm, mic_len); + if (err) + goto free_aead; + + return tfm; +free_aead: crypto_free_aead(tfm); return ERR_PTR(err); } diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c index c2bf6698d738..fd278bbe1b0d 100644 --- a/net/mac80211/aes_gcm.c +++ b/net/mac80211/aes_gcm.c @@ -80,11 +80,15 @@ struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], return tfm; err = crypto_aead_setkey(tfm, key, key_len); - if (!err) - err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); - if (!err) - return tfm; + if (err) + goto free_aead; + err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); + if (err) + goto free_aead; + + return tfm; +free_aead: crypto_free_aead(tfm); return ERR_PTR(err); } diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index 1c72edcb0083..f1321b7d6506 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -70,9 +70,9 @@ struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], err = crypto_aead_setkey(tfm, key, key_len); if (!err) - return tfm; - if (!err) err = crypto_aead_setauthsize(tfm, GMAC_MIC_LEN); + if (!err) + return tfm; crypto_free_aead(tfm); return ERR_PTR(err); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 7702978a4c99..5c564a68fb50 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -238,6 +238,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + if (!sta->sta.ht_cap.ht_supported) { + ht_dbg(sta->sdata, + "STA %pM erroneously requests BA session on tid %d w/o QoS\n", + sta->sta.addr, tid); + /* send a response anyway, it's an error case if we get here */ + goto end_no_lock; + } + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index a360c15cc978..cce9d425c718 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -188,6 +188,43 @@ ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) __release(agg_queue); } +static void +ieee80211_agg_stop_txq(struct sta_info *sta, int tid) +{ + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + + if (!txq) + return; + + txqi = to_txq_info(txq); + + /* Lock here to protect against further seqno updates on dequeue */ + spin_lock_bh(&txqi->queue.lock); + set_bit(IEEE80211_TXQ_STOP, &txqi->flags); + spin_unlock_bh(&txqi->queue.lock); +} + +static void +ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) +{ + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + + if (!txq) + return; + + txqi = to_txq_info(txq); + + if (enable) + set_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); + else + clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); + + clear_bit(IEEE80211_TXQ_STOP, &txqi->flags); + drv_wake_tx_queue(sta->sdata->local, txqi); +} + /* * splice packets from the STA's pending to the local pending, * requires a call to ieee80211_agg_splice_finish later @@ -247,6 +284,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); + ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -418,6 +456,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + /* * Make sure no packets are being processed. This ensures that * we have a valid starting sequence number and that in-flight @@ -440,6 +480,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ieee80211_agg_splice_finish(sdata, tid); spin_unlock_bh(&sta->lock); + ieee80211_agg_start_txq(sta, tid, false); + kfree_rcu(tid_tx, rcu_head); return; } @@ -509,11 +551,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, struct tid_ampdu_tx *tid_tx; int ret = 0; + trace_api_start_tx_ba_session(pubsta, tid); + if (WARN(sta->reserved_tid == tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - trace_api_start_tx_ba_session(pubsta, tid); + if (!pubsta->ht_cap.ht_supported) + return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) return -EINVAL; @@ -666,6 +711,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, ieee80211_agg_splice_finish(sta->sdata, tid); spin_unlock_bh(&sta->lock); + + ieee80211_agg_start_txq(sta, tid, true); } void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) @@ -793,6 +840,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct tid_ampdu_tx *tid_tx; + bool send_delba = false; trace_api_stop_tx_ba_cb(sdata, ra, tid); @@ -824,13 +872,17 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) } if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR && tid_tx->tx_stop) - ieee80211_send_delba(sta->sdata, ra, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + send_delba = true; ieee80211_remove_tid_tx(sta, tid); unlock_sta: spin_unlock_bh(&sta->lock); + + if (send_delba) + ieee80211_send_delba(sdata, ra, tid, + WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + mutex_unlock(&sta->ampdu_mlme.mtx); unlock: mutex_unlock(&local->sta_mtx); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dd4ff36c557a..265e42721a66 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -24,6 +24,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, + unsigned char name_assign_type, enum nl80211_iftype type, u32 *flags, struct vif_params *params) @@ -33,7 +34,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; int err; - err = ieee80211_if_add(local, name, &wdev, type, params); + err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params); if (err) return ERR_PTR(err); @@ -977,6 +978,14 @@ static int sta_apply_auth_flags(struct ieee80211_local *local, if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && set & BIT(NL80211_STA_FLAG_ASSOCIATED) && !test_sta_flag(sta, WLAN_STA_ASSOC)) { + /* + * When peer becomes associated, init rate control as + * well. Some drivers require rate control initialized + * before drv_sta_state() is called. + */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + rate_control_rate_init(sta); + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) return ret; @@ -1050,6 +1059,10 @@ static int sta_apply_parameters(struct ieee80211_local *local, } } + if (mask & BIT(NL80211_STA_FLAG_WME) && + local->hw.queues >= IEEE80211_NUM_ACS) + sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); + /* auth flags will be set later for TDLS stations */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { ret = sta_apply_auth_flags(local, sta, mask, set); @@ -1064,10 +1077,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } - if (mask & BIT(NL80211_STA_FLAG_WME)) - sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); - if (mask & BIT(NL80211_STA_FLAG_MFP)) { + sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else @@ -1377,11 +1388,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (err) goto out_err; - /* When peer becomes authorized, init rate control as well */ - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && - test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - rate_control_rate_init(sta); - mutex_unlock(&local->sta_mtx); if ((sdata->vif.type == NL80211_IFTYPE_AP || @@ -1488,7 +1494,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else - memset(next_hop, 0, ETH_ALEN); + eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); @@ -2273,7 +2279,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, { struct sta_info *sta; enum ieee80211_smps_mode old_req; - int i; if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP)) return -EINVAL; @@ -2297,52 +2302,44 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, } ht_dbg(sdata, - "SMSP %d requested in AP mode, sending Action frame to %d stations\n", + "SMPS %d requested in AP mode, sending Action frame to %d stations\n", smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta)); mutex_lock(&sdata->local->sta_mtx); - for (i = 0; i < STA_HASH_SIZE; i++) { - for (sta = rcu_dereference_protected(sdata->local->sta_hash[i], - lockdep_is_held(&sdata->local->sta_mtx)); - sta; - sta = rcu_dereference_protected(sta->hnext, - lockdep_is_held(&sdata->local->sta_mtx))) { - /* - * Only stations associated to our AP and - * associated VLANs - */ - if (sta->sdata->bss != &sdata->u.ap) - continue; + list_for_each_entry(sta, &sdata->local->sta_list, list) { + /* + * Only stations associated to our AP and + * associated VLANs + */ + if (sta->sdata->bss != &sdata->u.ap) + continue; - /* This station doesn't support MIMO - skip it */ - if (sta_info_tx_streams(sta) == 1) - continue; + /* This station doesn't support MIMO - skip it */ + if (sta_info_tx_streams(sta) == 1) + continue; - /* - * Don't wake up a STA just to send the action frame - * unless we are getting more restrictive. - */ - if (test_sta_flag(sta, WLAN_STA_PS_STA) && - !ieee80211_smps_is_restrictive(sta->known_smps_mode, - smps_mode)) { - ht_dbg(sdata, - "Won't send SMPS to sleeping STA %pM\n", - sta->sta.addr); - continue; - } + /* + * Don't wake up a STA just to send the action frame + * unless we are getting more restrictive. + */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + !ieee80211_smps_is_restrictive(sta->known_smps_mode, + smps_mode)) { + ht_dbg(sdata, "Won't send SMPS to sleeping STA %pM\n", + sta->sta.addr); + continue; + } - /* - * If the STA is not authorized, wait until it gets - * authorized and the action frame will be sent then. - */ - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - continue; + /* + * If the STA is not authorized, wait until it gets + * authorized and the action frame will be sent then. + */ + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + continue; - ht_dbg(sdata, "Sending SMPS to %pM\n", sta->sta.addr); - ieee80211_send_smps_action(sdata, smps_mode, - sta->sta.addr, - sdata->vif.bss_conf.bssid); - } + ht_dbg(sdata, "Sending SMPS to %pM\n", sta->sta.addr); + ieee80211_send_smps_action(sdata, smps_mode, sta->sta.addr, + sdata->vif.bss_conf.bssid); } mutex_unlock(&sdata->local->sta_mtx); @@ -3581,7 +3578,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, nullfunc->qos_ctrl = cpu_to_le16(7); local_bh_disable(); - ieee80211_xmit(sdata, skb); + ieee80211_xmit(sdata, sta, skb); local_bh_enable(); rcu_read_unlock(); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index eeb0bbd69d98..23813ebb349c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -18,172 +18,6 @@ #define DEBUGFS_FORMAT_BUFFER_SIZE 100 -#define TX_LATENCY_BIN_DELIMTER_C ',' -#define TX_LATENCY_BIN_DELIMTER_S "," -#define TX_LATENCY_BINS_DISABLED "enable(bins disabled)\n" -#define TX_LATENCY_DISABLED "disable\n" - - -/* - * Display if Tx latency statistics & bins are enabled/disabled - */ -static ssize_t sta_tx_latency_stat_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - struct ieee80211_tx_latency_bin_ranges *tx_latency; - char *buf; - int bufsz, i, ret; - int pos = 0; - - rcu_read_lock(); - - tx_latency = rcu_dereference(local->tx_latency); - - if (tx_latency && tx_latency->n_ranges) { - bufsz = tx_latency->n_ranges * 15; - buf = kzalloc(bufsz, GFP_ATOMIC); - if (!buf) - goto err; - - for (i = 0; i < tx_latency->n_ranges; i++) - pos += scnprintf(buf + pos, bufsz - pos, "%d,", - tx_latency->ranges[i]); - pos += scnprintf(buf + pos, bufsz - pos, "\n"); - } else if (tx_latency) { - bufsz = sizeof(TX_LATENCY_BINS_DISABLED) + 1; - buf = kzalloc(bufsz, GFP_ATOMIC); - if (!buf) - goto err; - - pos += scnprintf(buf + pos, bufsz - pos, "%s\n", - TX_LATENCY_BINS_DISABLED); - } else { - bufsz = sizeof(TX_LATENCY_DISABLED) + 1; - buf = kzalloc(bufsz, GFP_ATOMIC); - if (!buf) - goto err; - - pos += scnprintf(buf + pos, bufsz - pos, "%s\n", - TX_LATENCY_DISABLED); - } - - rcu_read_unlock(); - - ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); - kfree(buf); - - return ret; -err: - rcu_read_unlock(); - return -ENOMEM; -} - -/* - * Receive input from user regarding Tx latency statistics - * The input should indicate if Tx latency statistics and bins are - * enabled/disabled. - * If bins are enabled input should indicate the amount of different bins and - * their ranges. Each bin will count how many Tx frames transmitted within the - * appropriate latency. - * Legal input is: - * a) "enable(bins disabled)" - to enable only general statistics - * b) "a,b,c,d,...z" - to enable general statistics and bins, where all are - * numbers and a < b < c < d.. < z - * c) "disable" - disable all statistics - * NOTE: must configure Tx latency statistics bins before stations connected. - */ - -static ssize_t sta_tx_latency_stat_write(struct file *file, - const char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[128] = {}; - char *bins = buf; - char *token; - int buf_size, i, alloc_size; - int prev_bin = 0; - int n_ranges = 0; - int ret = count; - struct ieee80211_tx_latency_bin_ranges *tx_latency; - - if (sizeof(buf) <= count) - return -EINVAL; - buf_size = count; - if (copy_from_user(buf, userbuf, buf_size)) - return -EFAULT; - - mutex_lock(&local->sta_mtx); - - /* cannot change config once we have stations */ - if (local->num_sta) - goto unlock; - - tx_latency = - rcu_dereference_protected(local->tx_latency, - lockdep_is_held(&local->sta_mtx)); - - /* disable Tx statistics */ - if (!strcmp(buf, TX_LATENCY_DISABLED)) { - if (!tx_latency) - goto unlock; - RCU_INIT_POINTER(local->tx_latency, NULL); - synchronize_rcu(); - kfree(tx_latency); - goto unlock; - } - - /* Tx latency already enabled */ - if (tx_latency) - goto unlock; - - if (strcmp(TX_LATENCY_BINS_DISABLED, buf)) { - /* check how many bins and between what ranges user requested */ - token = buf; - while (*token != '\0') { - if (*token == TX_LATENCY_BIN_DELIMTER_C) - n_ranges++; - token++; - } - n_ranges++; - } - - alloc_size = sizeof(struct ieee80211_tx_latency_bin_ranges) + - n_ranges * sizeof(u32); - tx_latency = kzalloc(alloc_size, GFP_ATOMIC); - if (!tx_latency) { - ret = -ENOMEM; - goto unlock; - } - tx_latency->n_ranges = n_ranges; - for (i = 0; i < n_ranges; i++) { /* setting bin ranges */ - token = strsep(&bins, TX_LATENCY_BIN_DELIMTER_S); - sscanf(token, "%d", &tx_latency->ranges[i]); - /* bins values should be in ascending order */ - if (prev_bin >= tx_latency->ranges[i]) { - ret = -EINVAL; - kfree(tx_latency); - goto unlock; - } - prev_bin = tx_latency->ranges[i]; - } - rcu_assign_pointer(local->tx_latency, tx_latency); - -unlock: - mutex_unlock(&local->sta_mtx); - - return ret; -} - -static const struct file_operations stats_tx_latency_ops = { - .write = sta_tx_latency_stat_write, - .read = sta_tx_latency_stat_read, - .open = simple_open, - .llseek = generic_file_llseek, -}; - int mac80211_format_buffer(char __user *userbuf, size_t count, loff_t *ppos, char *fmt, ...) { @@ -440,8 +274,6 @@ void debugfs_hw_add(struct ieee80211_local *local) #ifdef CONFIG_MAC80211_DEBUG_COUNTERS DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted, - local->tx_handlers_drop_unencrypted); DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, local->tx_handlers_drop_fragment); DEBUGFS_STATS_ADD(tx_handlers_drop_wep, @@ -475,6 +307,4 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); - - DEBUGFS_DEVSTATS_ADD(tx_latency); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c68896adfa96..29236e832e44 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -177,7 +177,6 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ IEEE80211_IF_FILE_R(name) /* common attributes */ -IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], @@ -562,7 +561,6 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, static void add_common_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(drop_unencrypted); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); DEBUGFS_ADD(rc_rateidx_mcs_mask_2ghz); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 94c70091bbd7..252859e90e8a 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -39,13 +39,6 @@ static const struct file_operations sta_ ##name## _ops = { \ .llseek = generic_file_llseek, \ } -#define STA_OPS_W(name) \ -static const struct file_operations sta_ ##name## _ops = { \ - .write = sta_##name##_write, \ - .open = simple_open, \ - .llseek = generic_file_llseek, \ -} - #define STA_OPS_RW(name) \ static const struct file_operations sta_ ##name## _ops = { \ .read = sta_##name##_read, \ @@ -398,131 +391,6 @@ static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf, } STA_OPS(last_rx_rate); -static int -sta_tx_latency_stat_header(struct ieee80211_tx_latency_bin_ranges *tx_latency, - char *buf, int pos, int bufsz) -{ - int i; - int range_count = tx_latency->n_ranges; - u32 *bin_ranges = tx_latency->ranges; - - pos += scnprintf(buf + pos, bufsz - pos, - "Station\t\t\tTID\tMax\tAvg"); - if (range_count) { - pos += scnprintf(buf + pos, bufsz - pos, - "\t<=%d", bin_ranges[0]); - for (i = 0; i < range_count - 1; i++) - pos += scnprintf(buf + pos, bufsz - pos, "\t%d-%d", - bin_ranges[i], bin_ranges[i+1]); - pos += scnprintf(buf + pos, bufsz - pos, - "\t%d<", bin_ranges[range_count - 1]); - } - - pos += scnprintf(buf + pos, bufsz - pos, "\n"); - - return pos; -} - -static int -sta_tx_latency_stat_table(struct ieee80211_tx_latency_bin_ranges *tx_lat_range, - struct ieee80211_tx_latency_stat *tx_lat, - char *buf, int pos, int bufsz, int tid) -{ - u32 avg = 0; - int j; - int bin_count = tx_lat->bin_count; - - pos += scnprintf(buf + pos, bufsz - pos, "\t\t\t%d", tid); - /* make sure you don't divide in 0 */ - if (tx_lat->counter) - avg = tx_lat->sum / tx_lat->counter; - - pos += scnprintf(buf + pos, bufsz - pos, "\t%d\t%d", - tx_lat->max, avg); - - if (tx_lat_range->n_ranges && tx_lat->bins) - for (j = 0; j < bin_count; j++) - pos += scnprintf(buf + pos, bufsz - pos, - "\t%d", tx_lat->bins[j]); - pos += scnprintf(buf + pos, bufsz - pos, "\n"); - - return pos; -} - -/* - * Output Tx latency statistics station && restart all statistics information - */ -static ssize_t sta_tx_latency_stat_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct ieee80211_local *local = sta->local; - struct ieee80211_tx_latency_bin_ranges *tx_latency; - char *buf; - int bufsz, ret, i; - int pos = 0; - - bufsz = 20 * IEEE80211_NUM_TIDS * - sizeof(struct ieee80211_tx_latency_stat); - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rcu_read_lock(); - - tx_latency = rcu_dereference(local->tx_latency); - - if (!sta->tx_lat) { - pos += scnprintf(buf + pos, bufsz - pos, - "Tx latency statistics are not enabled\n"); - goto unlock; - } - - pos = sta_tx_latency_stat_header(tx_latency, buf, pos, bufsz); - - pos += scnprintf(buf + pos, bufsz - pos, "%pM\n", sta->sta.addr); - for (i = 0; i < IEEE80211_NUM_TIDS; i++) - pos = sta_tx_latency_stat_table(tx_latency, &sta->tx_lat[i], - buf, pos, bufsz, i); -unlock: - rcu_read_unlock(); - - ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); - kfree(buf); - - return ret; -} -STA_OPS(tx_latency_stat); - -static ssize_t sta_tx_latency_stat_reset_write(struct file *file, - const char __user *userbuf, - size_t count, loff_t *ppos) -{ - u32 *bins; - int bin_count; - struct sta_info *sta = file->private_data; - int i; - - if (!sta->tx_lat) - return -EINVAL; - - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - bins = sta->tx_lat[i].bins; - bin_count = sta->tx_lat[i].bin_count; - - sta->tx_lat[i].max = 0; - sta->tx_lat[i].sum = 0; - sta->tx_lat[i].counter = 0; - - if (bin_count) - memset(bins, 0, bin_count * sizeof(u32)); - } - - return count; -} -STA_OPS_W(tx_latency_stat_reset); - #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); @@ -576,8 +444,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_ack_signal); DEBUGFS_ADD(current_tx_rate); DEBUGFS_ADD(last_rx_rate); - DEBUGFS_ADD(tx_latency_stat); - DEBUGFS_ADD(tx_latency_stat_reset); DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fdeda17b8dd2..26e1ca8a474a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -941,13 +941,13 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline void drv_rssi_callback(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - const enum ieee80211_rssi_event event) +static inline void drv_event_callback(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct ieee80211_event *event) { - trace_drv_rssi_callback(local, sdata, event); - if (local->ops->rssi_callback) - local->ops->rssi_callback(&local->hw, &sdata->vif, event); + trace_drv_event_callback(local, sdata, event); + if (local->ops->event_callback) + local->ops->event_callback(&local->hw, &sdata->vif, event); trace_drv_return_void(local); } @@ -1367,4 +1367,16 @@ drv_tdls_recv_channel_switch(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_wake_tx_queue(struct ieee80211_local *local, + struct txq_info *txq) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_wake_tx_queue(local, sdata, txq); + local->ops->wake_tx_queue(&local->hw, &txq->txq); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index ff630be2ca75..7a76ce639d58 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -252,8 +252,6 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, break; } - if (bw != sta->sta.bandwidth) - changed = true; sta->sta.bandwidth = bw; sta->cur_max_bandwidth = diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b606b53a49a7..bfef1b215050 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -188,6 +188,16 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, */ pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, chandef, 0); + + /* add VHT capability and information IEs */ + if (chandef->width != NL80211_CHAN_WIDTH_20 && + chandef->width != NL80211_CHAN_WIDTH_40 && + sband->vht_cap.vht_supported) { + pos = ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, + sband->vht_cap.cap); + pos = ieee80211_ie_build_vht_oper(pos, &sband->vht_cap, + chandef); + } } if (local->hw.queues >= IEEE80211_NUM_ACS) @@ -249,8 +259,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, if (presp) kfree_rcu(presp, rcu_head); - sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - /* make a copy of the chandef, it could be modified below. */ chandef = *req_chandef; chan = chandef.chan; @@ -417,6 +425,11 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, NL80211_CHAN_WIDTH_20_NOHT); chandef.width = sdata->u.ibss.chandef.width; break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_160: + chandef = sdata->u.ibss.chandef; + chandef.chan = cbss->channel; + break; default: /* fall back to 20 MHz for unsupported modes */ cfg80211_chandef_create(&chandef, cbss->channel, @@ -470,22 +483,19 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp, *old_presp; struct cfg80211_bss *cbss; const struct cfg80211_bss_ies *ies; - u16 capability; + u16 capability = 0; u64 tsf; int ret = 0; sdata_assert_lock(sdata); - capability = WLAN_CAPABILITY_IBSS; - if (ifibss->privacy) - capability |= WLAN_CAPABILITY_PRIVACY; + capability = WLAN_CAPABILITY_PRIVACY; cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, - ifibss->ssid_len, WLAN_CAPABILITY_IBSS | - WLAN_CAPABILITY_PRIVACY, - capability); + ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, + IEEE80211_PRIVACY(ifibss->privacy)); if (WARN_ON(!cbss)) { ret = -EINVAL; @@ -525,23 +535,17 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; int err, changed = 0; - u16 capability; sdata_assert_lock(sdata); /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { - capability = WLAN_CAPABILITY_IBSS; - - if (ifibss->privacy) - capability |= WLAN_CAPABILITY_PRIVACY; - cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, - ifibss->ssid_len, WLAN_CAPABILITY_IBSS | - WLAN_CAPABILITY_PRIVACY, - capability); + ifibss->ssid_len, + IEEE80211_BSS_TYPE_IBSS, + IEEE80211_PRIVACY(ifibss->privacy)); /* XXX: should not really modify cfg80211 data */ if (cbss) { cbss->channel = sdata->csa_chandef.chan; @@ -682,19 +686,13 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) struct cfg80211_bss *cbss; struct beacon_data *presp; struct sta_info *sta; - u16 capability; if (!is_zero_ether_addr(ifibss->bssid)) { - capability = WLAN_CAPABILITY_IBSS; - - if (ifibss->privacy) - capability |= WLAN_CAPABILITY_PRIVACY; - cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, - ifibss->ssid_len, WLAN_CAPABILITY_IBSS | - WLAN_CAPABILITY_PRIVACY, - capability); + ifibss->ssid_len, + IEEE80211_BSS_TYPE_IBSS, + IEEE80211_PRIVACY(ifibss->privacy)); if (cbss) { cfg80211_unlink_bss(local->hw.wiphy, cbss); @@ -980,110 +978,140 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0, 0); } -static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems) +static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status, + struct ieee802_11_elems *elems, + struct ieee80211_channel *channel) { - struct ieee80211_local *local = sdata->local; - struct cfg80211_bss *cbss; - struct ieee80211_bss *bss; struct sta_info *sta; - struct ieee80211_channel *channel; - u64 beacon_timestamp, rx_timestamp; - u32 supp_rates = 0; enum ieee80211_band band = rx_status->band; enum nl80211_bss_scan_width scan_width; + struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; bool rates_updated = false; + u32 supp_rates = 0; - channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); - if (!channel) + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return; - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - ether_addr_equal(mgmt->bssid, sdata->u.ibss.bssid)) { + if (!ether_addr_equal(mgmt->bssid, sdata->u.ibss.bssid)) + return; - rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->sa); - - if (elems->supp_rates) { - supp_rates = ieee80211_sta_get_rates(sdata, elems, - band, NULL); - if (sta) { - u32 prev_rates; - - prev_rates = sta->sta.supp_rates[band]; - /* make sure mandatory rates are always added */ - scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->flag & RX_FLAG_5MHZ) - scan_width = NL80211_BSS_CHAN_WIDTH_5; - if (rx_status->flag & RX_FLAG_10MHZ) - scan_width = NL80211_BSS_CHAN_WIDTH_10; - - sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, - scan_width); - if (sta->sta.supp_rates[band] != prev_rates) { - ibss_dbg(sdata, - "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", - sta->sta.addr, prev_rates, - sta->sta.supp_rates[band]); - rates_updated = true; - } - } else { - rcu_read_unlock(); - sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, - mgmt->sa, supp_rates); + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + + if (elems->supp_rates) { + supp_rates = ieee80211_sta_get_rates(sdata, elems, + band, NULL); + if (sta) { + u32 prev_rates; + + prev_rates = sta->sta.supp_rates[band]; + /* make sure mandatory rates are always added */ + scan_width = NL80211_BSS_CHAN_WIDTH_20; + if (rx_status->flag & RX_FLAG_5MHZ) + scan_width = NL80211_BSS_CHAN_WIDTH_5; + if (rx_status->flag & RX_FLAG_10MHZ) + scan_width = NL80211_BSS_CHAN_WIDTH_10; + + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(sband, scan_width); + if (sta->sta.supp_rates[band] != prev_rates) { + ibss_dbg(sdata, + "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", + sta->sta.addr, prev_rates, + sta->sta.supp_rates[band]); + rates_updated = true; } + } else { + rcu_read_unlock(); + sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, + mgmt->sa, supp_rates); } + } - if (sta && elems->wmm_info) - sta->sta.wme = true; - - if (sta && elems->ht_operation && elems->ht_cap_elem && - sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && - sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && - sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { - /* we both use HT */ - struct ieee80211_ht_cap htcap_ie; - struct cfg80211_chan_def chandef; - - ieee80211_ht_oper_to_chandef(channel, - elems->ht_operation, - &chandef); - - memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); - - /* - * fall back to HT20 if we don't use or use - * the other extension channel - */ - if (chandef.center_freq1 != - sdata->u.ibss.chandef.center_freq1) - htcap_ie.cap_info &= - cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); - - rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap( - sdata, sband, &htcap_ie, sta); + if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) + sta->sta.wme = true; + + if (sta && elems->ht_operation && elems->ht_cap_elem && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { + /* we both use HT */ + struct ieee80211_ht_cap htcap_ie; + struct cfg80211_chan_def chandef; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; + + ieee80211_ht_oper_to_chandef(channel, + elems->ht_operation, + &chandef); + + memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); + rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + &htcap_ie, + sta); + + if (elems->vht_operation && elems->vht_cap_elem && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20 && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) { + /* we both use VHT */ + struct ieee80211_vht_cap cap_ie; + struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; + + ieee80211_vht_oper_to_chandef(channel, + elems->vht_operation, + &chandef); + memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &cap_ie, sta); + if (memcmp(&cap, &sta->sta.vht_cap, sizeof(cap))) + rates_updated |= true; } - if (sta && rates_updated) { - u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; - u8 rx_nss = sta->sta.rx_nss; + if (bw != sta->sta.bandwidth) + rates_updated |= true; - /* Force rx_nss recalculation */ - sta->sta.rx_nss = 0; - rate_control_rate_init(sta); - if (sta->sta.rx_nss != rx_nss) - changed |= IEEE80211_RC_NSS_CHANGED; + if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef, + &chandef)) + WARN_ON_ONCE(1); + } - drv_sta_rc_update(local, sdata, &sta->sta, changed); - } + if (sta && rates_updated) { + u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; + u8 rx_nss = sta->sta.rx_nss; - rcu_read_unlock(); + /* Force rx_nss recalculation */ + sta->sta.rx_nss = 0; + rate_control_rate_init(sta); + if (sta->sta.rx_nss != rx_nss) + changed |= IEEE80211_RC_NSS_CHANGED; + + drv_sta_rc_update(local, sdata, &sta->sta, changed); } + rcu_read_unlock(); +} + +static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status, + struct ieee802_11_elems *elems) +{ + struct ieee80211_local *local = sdata->local; + struct cfg80211_bss *cbss; + struct ieee80211_bss *bss; + struct ieee80211_channel *channel; + u64 beacon_timestamp, rx_timestamp; + u32 supp_rates = 0; + enum ieee80211_band band = rx_status->band; + + channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); + if (!channel) + return; + + ieee80211_update_sta_info(sdata, mgmt, len, rx_status, elems, channel); + bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); if (!bss) @@ -1273,7 +1301,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, - NULL, scan_width); + NULL, 0, scan_width); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -1304,14 +1332,82 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; - else - sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, &ifibss->chandef, ifibss->basic_rates, capability, 0, true); } +static unsigned ibss_setup_channels(struct wiphy *wiphy, + struct ieee80211_channel **channels, + unsigned int channels_max, + u32 center_freq, u32 width) +{ + struct ieee80211_channel *chan = NULL; + unsigned int n_chan = 0; + u32 start_freq, end_freq, freq; + + if (width <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - width / 2 + 10; + end_freq = center_freq + width / 2 - 10; + } + + for (freq = start_freq; freq <= end_freq; freq += 20) { + chan = ieee80211_get_channel(wiphy, freq); + if (!chan) + continue; + if (n_chan >= channels_max) + return n_chan; + + channels[n_chan] = chan; + n_chan++; + } + + return n_chan; +} + +static unsigned int +ieee80211_ibss_setup_scan_channels(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + struct ieee80211_channel **channels, + unsigned int channels_max) +{ + unsigned int n_chan = 0; + u32 width, cf1, cf2 = 0; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + cf2 = chandef->center_freq2; + /* fall through */ + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + width = 20; + break; + } + + cf1 = chandef->center_freq1; + + n_chan = ibss_setup_channels(wiphy, channels, channels_max, cf1, width); + + if (cf2) + n_chan += ibss_setup_channels(wiphy, &channels[n_chan], + channels_max - n_chan, cf2, + width); + + return n_chan; +} + /* * This function is called with state == IEEE80211_IBSS_MLME_SEARCH */ @@ -1325,7 +1421,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) const u8 *bssid = NULL; enum nl80211_bss_scan_width scan_width; int active_ibss; - u16 capability; sdata_assert_lock(sdata); @@ -1335,9 +1430,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) if (active_ibss) return; - capability = WLAN_CAPABILITY_IBSS; - if (ifibss->privacy) - capability |= WLAN_CAPABILITY_PRIVACY; if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) @@ -1346,8 +1438,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) bssid = ifibss->bssid; cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, ifibss->ssid, ifibss->ssid_len, - WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY, - capability); + IEEE80211_BSS_TYPE_IBSS, + IEEE80211_PRIVACY(ifibss->privacy)); if (cbss) { struct ieee80211_bss *bss; @@ -1381,11 +1473,18 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { + struct ieee80211_channel *channels[8]; + unsigned int num; + sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); + num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, + &ifibss->chandef, + channels, + ARRAY_SIZE(channels)); scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, chan, + ifibss->ssid_len, channels, num, scan_width); } else { int interval = IEEE80211_SCAN_INTERVAL; @@ -1742,7 +1841,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) ieee80211_ibss_disconnect(sdata); ifibss->ssid_len = 0; - memset(ifibss->bssid, 0, ETH_ALEN); + eth_zero_addr(ifibss->bssid); /* remove beacon */ kfree(sdata->u.ibss.ie); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8d53d65bd2ab..ab46ab4a7249 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -26,6 +26,7 @@ #include <linux/etherdevice.h> #include <linux/leds.h> #include <linux/idr.h> +#include <linux/rhashtable.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> @@ -810,6 +811,19 @@ struct mac80211_qos_map { struct rcu_head rcu_head; }; +enum txq_info_flags { + IEEE80211_TXQ_STOP, + IEEE80211_TXQ_AMPDU, +}; + +struct txq_info { + struct sk_buff_head queue; + unsigned long flags; + + /* keep last! */ + struct ieee80211_txq txq; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -830,8 +844,6 @@ struct ieee80211_sub_if_data { unsigned long state; - int drop_unencrypted; - char name[IFNAMSIZ]; /* Fragment table for host-based reassembly */ @@ -854,6 +866,7 @@ struct ieee80211_sub_if_data { bool control_port_no_encrypt; int encrypt_headroom; + atomic_t txqs_len[IEEE80211_NUM_ACS]; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct mac80211_qos_map __rcu *qos_map; @@ -1042,24 +1055,6 @@ struct tpt_led_trigger { }; #endif -/* - * struct ieee80211_tx_latency_bin_ranges - Tx latency statistics bins ranges - * - * Measuring Tx latency statistics. Counts how many Tx frames transmitted in a - * certain latency range (in Milliseconds). Each station that uses these - * ranges will have bins to count the amount of frames received in that range. - * The user can configure the ranges via debugfs. - * If ranges is NULL then Tx latency statistics bins are disabled for all - * stations. - * - * @n_ranges: number of ranges that are taken in account - * @ranges: the ranges that the user requested or NULL if disabled. - */ -struct ieee80211_tx_latency_bin_ranges { - int n_ranges; - u32 ranges[]; -}; - /** * mac80211 scan flags - currently active scan mode * @@ -1207,16 +1202,10 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct sta_info __rcu *sta_hash[STA_HASH_SIZE]; + struct rhashtable sta_hash; struct timer_list sta_cleanup; int sta_generation; - /* - * Tx latency statistics parameters for all stations. - * Can enable via debugfs (NULL when disabled). - */ - struct ieee80211_tx_latency_bin_ranges __rcu *tx_latency; - struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; @@ -1298,7 +1287,6 @@ struct ieee80211_local { /* TX/RX handler statistics */ unsigned int tx_handlers_drop; unsigned int tx_handlers_queued; - unsigned int tx_handlers_drop_unencrypted; unsigned int tx_handlers_drop_fragment; unsigned int tx_handlers_drop_wep; unsigned int tx_handlers_drop_not_assoc; @@ -1476,6 +1464,10 @@ static inline struct ieee80211_local *hw_to_local( return container_of(hw, struct ieee80211_local, hw); } +static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) +{ + return container_of(txq, struct txq_info, txq); +} static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { @@ -1568,7 +1560,8 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata); void ieee80211_scan_work(struct work_struct *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan, + struct ieee80211_channel **channels, + unsigned int n_channels, enum nl80211_bss_scan_width scan_width); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); @@ -1617,6 +1610,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, int ieee80211_iface_init(void); void ieee80211_iface_exit(void); int ieee80211_if_add(struct ieee80211_local *local, const char *name, + unsigned char name_assign_type, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params); int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, @@ -1784,7 +1778,8 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke gfp_t gfp); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify); -void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, @@ -1929,6 +1924,9 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) return true; } +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct txq_info *txq, int tid); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *bssid, @@ -1967,10 +1965,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata); -size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, - const u8 *after_ric, int n_after_ric, - size_t offset); size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset); u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 cap); @@ -1979,6 +1973,8 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 prot_mode); u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); +u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, + const struct cfg80211_chan_def *chandef); int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); @@ -1994,6 +1990,9 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); +void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, + const struct ieee80211_vht_operation *oper, + struct cfg80211_chan_def *chandef); u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); int __must_check diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 81a27516813e..b4ac596a7cb7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -969,6 +969,13 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + if (sdata->vif.txq) { + struct txq_info *txqi = to_txq_info(sdata->vif.txq); + + ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); + } + if (local->open_count == 0) ieee80211_clear_tx_pending(local); @@ -1508,7 +1515,6 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, } /* reset some values that shouldn't be kept across type changes */ - sdata->drop_unencrypted = 0; if (type == NL80211_IFTYPE_STATION) sdata->u.mgd.use_4addr = false; @@ -1649,11 +1655,13 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } int ieee80211_if_add(struct ieee80211_local *local, const char *name, + unsigned char name_assign_type, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) { struct net_device *ndev = NULL; struct ieee80211_sub_if_data *sdata = NULL; + struct txq_info *txqi; int ret, i; int txqs = 1; @@ -1673,11 +1681,19 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, wdev->address, type); memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); } else { + int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, + sizeof(void *)); + int txq_size = 0; + + if (local->ops->wake_tx_queue) + txq_size += sizeof(struct txq_info) + + local->hw.txq_data_size; + if (local->hw.queues >= IEEE80211_NUM_ACS) txqs = IEEE80211_NUM_ACS; - ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, - name, NET_NAME_UNKNOWN, + ndev = alloc_netdev_mqs(size + txq_size, + name, name_assign_type, ieee80211_if_setup, txqs, 1); if (!ndev) return -ENOMEM; @@ -1711,6 +1727,11 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); memcpy(sdata->name, ndev->name, IFNAMSIZ); + if (txq_size) { + txqi = netdev_priv(ndev) + size; + ieee80211_init_tx_queue(sdata, NULL, txqi, 0); + } + sdata->dev = ndev; } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0825d76edcfc..2291cd730091 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -492,6 +492,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, for (j = 0; j < len; j++) key->u.gen.rx_pn[i][j] = seq[len - j - 1]; + key->flags |= KEY_FLAG_CIPHER_SCHEME; } } memcpy(key->conf.key, key_data, key_len); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index d57a9915494f..c5a31835be0e 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -30,10 +30,12 @@ struct sta_info; * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. * @KEY_FLAG_TAINTED: Key is tainted and packets should be dropped. + * @KEY_FLAG_CIPHER_SCHEME: This key is for a hardware cipher scheme */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), KEY_FLAG_TAINTED = BIT(1), + KEY_FLAG_CIPHER_SCHEME = BIT(2), }; enum ieee80211_internal_tkip_state { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5e09d354c5a5..df3051d96aff 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -557,6 +557,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local = wiphy_priv(wiphy); + if (sta_info_init(local)) + goto err_free; + local->hw.wiphy = wiphy; local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); @@ -629,8 +632,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); - sta_info_init(local); - for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_head_init(&local->pending[i]); atomic_set(&local->agg_queue_stop[i], 0); @@ -650,6 +651,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); return &local->hw; + err_free: + wiphy_free(wiphy); + return NULL; } EXPORT_SYMBOL(ieee80211_alloc_hw_nm); @@ -1035,6 +1039,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.txq_ac_max_pending) + local->hw.txq_ac_max_pending = 64; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", @@ -1057,7 +1064,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && !(hw->flags & IEEE80211_HW_NO_AUTO_VIF)) { - result = ieee80211_if_add(local, "wlan%d", NULL, + result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL, NL80211_IFTYPE_STATION, NULL); if (result) wiphy_warn(local->hw.wiphy, @@ -1173,7 +1180,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); - sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); @@ -1201,8 +1207,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) ieee80211_free_ack_frame, NULL); idr_destroy(&local->ack_status_frames); - kfree(rcu_access_pointer(local->tx_latency)); - sta_info_stop(local); wiphy_free(local->hw.wiphy); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 0c8b2a77d312..d4684242e78b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -520,7 +520,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, } else { *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ - memset(hdr->addr1, 0, ETH_ALEN); /* RA is resolved later */ + eth_zero_addr(hdr->addr1); /* RA is resolved later */ memcpy(hdr->addr2, meshsa, ETH_ALEN); memcpy(hdr->addr3, meshda, ETH_ALEN); memcpy(hdr->addr4, meshsa, ETH_ALEN); @@ -574,7 +574,8 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; - ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); + if (ifmsh->mshcfg.plink_timeout > 0) + ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index b488e1859b18..60d737f144e3 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -17,7 +17,7 @@ #define PLINK_GET_PLID(p) (p + 4) #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ - jiffies + HZ * t / 1000)) + jiffies + msecs_to_jiffies(t))) enum plink_event { PLINK_UNDEFINED, @@ -382,6 +382,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, enum ieee80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); @@ -401,6 +402,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, elems->ht_cap_elem, sta)) changed |= IEEE80211_RC_BW_CHANGED; + if (bw != sta->sta.bandwidth) + changed |= IEEE80211_RC_BW_CHANGED; + /* HT peer is operating 20MHz-only */ if (elems->ht_operation && !(elems->ht_operation->ht_param & @@ -621,9 +625,9 @@ static void mesh_plink_timer(unsigned long data) sta->llid, sta->plid, reason); } -static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) +static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout) { - sta->plink_timer.expires = jiffies + (HZ * timeout / 1000); + sta->plink_timer.expires = jiffies + msecs_to_jiffies(timeout); sta->plink_timer.data = (unsigned long) sta; sta->plink_timer.function = mesh_plink_timer; sta->plink_timeout = timeout; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 142f66aece18..26053bf2faa8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1168,11 +1168,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (!conf) { sdata_info(sdata, "no channel context assigned to vif?, disconnecting\n"); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - return; + goto drop_connection; } chanctx = container_of(conf, struct ieee80211_chanctx, conf); @@ -1181,11 +1177,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) { sdata_info(sdata, "driver doesn't support chan-switch with channel contexts\n"); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - return; + goto drop_connection; } ch_switch.timestamp = timestamp; @@ -1197,11 +1189,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (drv_pre_channel_switch(sdata, &ch_switch)) { sdata_info(sdata, "preparing for channel switch failed, disconnecting\n"); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - return; + goto drop_connection; } res = ieee80211_vif_reserve_chanctx(sdata, &csa_ie.chandef, @@ -1210,11 +1198,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "failed to reserve channel context for channel switch, disconnecting (err=%d)\n", res); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - return; + goto drop_connection; } mutex_unlock(&local->chanctx_mtx); @@ -1244,6 +1228,11 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mod_timer(&ifmgd->chswitch_timer, TU_TO_EXP_TIME((csa_ie.count - 1) * cbss->beacon_interval)); + return; + drop_connection: + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); + mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); } static bool @@ -1359,15 +1348,15 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, */ if (has_80211h_pwr && (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) { - sdata_info(sdata, - "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", - pwr_level_80211h, chan_pwr, pwr_reduction_80211h, - sdata->u.mgd.bssid); + sdata_dbg(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + pwr_level_80211h, chan_pwr, pwr_reduction_80211h, + sdata->u.mgd.bssid); new_ap_level = pwr_level_80211h; } else { /* has_cisco_pwr is always true here. */ - sdata_info(sdata, - "Limiting TX power to %d dBm as advertised by %pM\n", - pwr_level_cisco, sdata->u.mgd.bssid); + sdata_dbg(sdata, + "Limiting TX power to %d dBm as advertised by %pM\n", + pwr_level_cisco, sdata->u.mgd.bssid); new_ap_level = pwr_level_cisco; } @@ -1633,9 +1622,6 @@ void ieee80211_dynamic_ps_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; - if (local->quiescing || local->suspended) - return; - ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); } @@ -2045,7 +2031,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_flush_queues(local, sdata, false); /* clear bssid only after building the needed mgmt frames */ - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); /* remove AP and TDLS peers */ sta_info_flush(sdata); @@ -2260,7 +2246,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) else ssid_len = ssid[1]; - ieee80211_send_probe_req(sdata, sdata->vif.addr, NULL, + ieee80211_send_probe_req(sdata, sdata->vif.addr, dst, ssid + 2, ssid_len, NULL, 0, (u32) -1, true, 0, ifmgd->associated->channel, false); @@ -2372,6 +2358,24 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ap_probereq_get); +static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, + const u8 *buf, size_t len, bool tx, + u16 reason) +{ + struct ieee80211_event event = { + .type = MLME_EVENT, + .u.mlme.data = tx ? DEAUTH_TX_EVENT : DEAUTH_RX_EVENT, + .u.mlme.reason = reason, + }; + + if (tx) + cfg80211_tx_mlme_mgmt(sdata->dev, buf, len); + else + cfg80211_rx_mlme_mgmt(sdata->dev, buf, len); + + drv_event_callback(sdata->local, sdata, &event); +} + static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -2397,8 +2401,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) } mutex_unlock(&local->mtx); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + sdata_unlock(sdata); } @@ -2477,7 +2482,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, auth_data->bss->bssid); - memset(sdata->u.mgd.bssid, 0, ETH_ALEN); + eth_zero_addr(sdata->u.mgd.bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; mutex_lock(&sdata->local->mtx); @@ -2522,6 +2527,10 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN]; u16 auth_alg, auth_transaction, status_code; struct sta_info *sta; + struct ieee80211_event event = { + .type = MLME_EVENT, + .u.mlme.data = AUTH_EVENT, + }; sdata_assert_lock(sdata); @@ -2554,6 +2563,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + event.u.mlme.status = MLME_DENIED; + event.u.mlme.reason = status_code; + drv_event_callback(sdata->local, sdata, &event); return; } @@ -2576,6 +2588,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, return; } + event.u.mlme.status = MLME_SUCCESS; + drv_event_callback(sdata->local, sdata, &event); sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; @@ -2694,7 +2708,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code); } @@ -2720,7 +2734,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2790,7 +2804,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->bss->bssid); - memset(sdata->u.mgd.bssid, 0, ETH_ALEN); + eth_zero_addr(sdata->u.mgd.bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; mutex_lock(&sdata->local->mtx); @@ -2982,10 +2996,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); - if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) + if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) { set_sta_flag(sta, WLAN_STA_MFP); + sta->sta.mfp = true; + } else { + sta->sta.mfp = false; + } - sta->sta.wme = elems.wmm_param; + sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS; err = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) @@ -3055,6 +3073,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u8 *pos; bool reassoc; struct cfg80211_bss *bss; + struct ieee80211_event event = { + .type = MLME_EVENT, + .u.mlme.data = ASSOC_EVENT, + }; sdata_assert_lock(sdata); @@ -3106,6 +3128,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); + event.u.mlme.status = MLME_DENIED; + event.u.mlme.reason = status_code; + drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ @@ -3113,6 +3138,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, cfg80211_assoc_timeout(sdata->dev, bss); return; } + event.u.mlme.status = MLME_SUCCESS; + drv_event_callback(sdata->local, sdata, &event); sdata_info(sdata, "associated\n"); /* @@ -3315,6 +3342,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { int sig = ifmgd->ave_beacon_signal; int last_sig = ifmgd->last_ave_beacon_signal; + struct ieee80211_event event = { + .type = RSSI_EVENT, + }; /* * if signal crosses either of the boundaries, invoke callback @@ -3323,12 +3353,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sig > ifmgd->rssi_max_thold && (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH); + event.u.rssi.data = RSSI_EVENT_HIGH; + drv_event_callback(local, sdata, &event); } else if (sig < ifmgd->rssi_min_thold && (last_sig >= ifmgd->rssi_max_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, sdata, RSSI_EVENT_LOW); + event.u.rssi.data = RSSI_EVENT_LOW; + drv_event_callback(local, sdata, &event); } } @@ -3433,6 +3465,26 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->csa_waiting_bcn) ieee80211_chswitch_post_beacon(sdata); + /* + * Update beacon timing and dtim count on every beacon appearance. This + * will allow the driver to use the most updated values. Do it before + * comparing this one with last received beacon. + * IMPORTANT: These parameters would possibly be out of sync by the time + * the driver will use them. The synchronized view is currently + * guaranteed only in certain callbacks. + */ + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } + if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) return; ifmgd->beacon_crc = ncrc; @@ -3460,18 +3512,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, else bss_conf->dtim_period = 1; - if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { - sdata->vif.bss_conf.sync_tsf = - le64_to_cpu(mgmt->u.beacon.timestamp); - sdata->vif.bss_conf.sync_device_ts = - rx_status->device_timestamp; - if (elems.tim) - sdata->vif.bss_conf.sync_dtim_count = - elems.tim->dtim_count; - else - sdata->vif.bss_conf.sync_dtim_count = 0; - } - changed |= BSS_CHANGED_BEACON_INFO; ifmgd->have_beacon = true; @@ -3502,8 +3542,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf, - sizeof(deauth_buf)); + ieee80211_report_disconnect(sdata, deauth_buf, + sizeof(deauth_buf), true, + WLAN_REASON_DEAUTH_LEAVING); return; } @@ -3621,8 +3662,8 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, + reason); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3816,12 +3857,18 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); } else if (ieee80211_probe_auth(sdata)) { u8 bssid[ETH_ALEN]; + struct ieee80211_event event = { + .type = MLME_EVENT, + .u.mlme.data = AUTH_EVENT, + .u.mlme.status = MLME_TIMEOUT, + }; memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); ieee80211_destroy_auth_data(sdata, false); cfg80211_auth_timeout(sdata->dev, bssid); + drv_event_callback(sdata->local, sdata, &event); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) run_again(sdata, ifmgd->auth_data->timeout); @@ -3831,9 +3878,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; + struct ieee80211_event event = { + .type = MLME_EVENT, + .u.mlme.data = ASSOC_EVENT, + .u.mlme.status = MLME_TIMEOUT, + }; ieee80211_destroy_assoc_data(sdata, false); cfg80211_assoc_timeout(sdata->dev, bss); + drv_event_callback(sdata->local, sdata, &event); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); @@ -3905,12 +3958,8 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (local->quiescing) - return; - if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) return; @@ -3926,9 +3975,6 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - if (local->quiescing) - return; - if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) return; @@ -3991,6 +4037,34 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) IEEE80211_DEAUTH_FRAME_LEN); } + /* This is a bit of a hack - we should find a better and more generic + * solution to this. Normally when suspending, cfg80211 will in fact + * deauthenticate. However, it doesn't (and cannot) stop an ongoing + * auth (not so important) or assoc (this is the problem) process. + * + * As a consequence, it can happen that we are in the process of both + * associating and suspending, and receive an association response + * after cfg80211 has checked if it needs to disconnect, but before + * we actually set the flag to drop incoming frames. This will then + * cause the workqueue flush to process the association response in + * the suspend, resulting in a successful association just before it + * tries to remove the interface from the driver, which now though + * has a channel context assigned ... this results in issues. + * + * To work around this (for now) simply deauth here again if we're + * now connected. + */ + if (ifmgd->associated && !sdata->local->wowlan) { + u8 bssid[ETH_ALEN]; + struct cfg80211_deauth_request req = { + .reason_code = WLAN_REASON_DEAUTH_LEAVING, + .bssid = bssid, + }; + + memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); + ieee80211_mgd_deauth(sdata, &req); + } + sdata_unlock(sdata); } @@ -4379,6 +4453,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } else WARN_ON_ONCE(!ether_addr_equal(ifmgd->bssid, cbss->bssid)); + /* Cancel scan to ensure that nothing interferes with connection */ + if (local->scanning) + ieee80211_scan_cancel(local); + return 0; } @@ -4467,8 +4545,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - sizeof(frame_buf)); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + WLAN_REASON_UNSPECIFIED); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4488,7 +4567,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return 0; err_clear: - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->auth_data = NULL; err_free: @@ -4568,8 +4647,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - sizeof(frame_buf)); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + WLAN_REASON_UNSPECIFIED); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4831,7 +4911,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, return 0; err_clear: - memset(ifmgd->bssid, 0, ETH_ALEN); + eth_zero_addr(ifmgd->bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: @@ -4859,8 +4939,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + req->reason_code); return 0; } @@ -4874,8 +4955,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + req->reason_code); return 0; } @@ -4907,8 +4989,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, req->reason_code, !req->local_state_change, frame_buf); - cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, + req->reason_code); return 0; } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index ca405b6b686d..ac6ad6238e3a 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -59,9 +59,26 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) cancel_work_sync(&local->dynamic_ps_enable_work); del_timer_sync(&local->dynamic_ps_timer); - local->wowlan = wowlan && local->open_count; + local->wowlan = wowlan; if (local->wowlan) { - int err = drv_suspend(local, wowlan); + int err; + + /* Drivers don't expect to suspend while some operations like + * authenticating or associating are in progress. It doesn't + * make sense anyway to accept that, since the authentication + * or association would never finish since the driver can't do + * that on its own. + * Thus, clean up in-progress auth/assoc first. + */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + continue; + ieee80211_mgd_quiesce(sdata); + } + + err = drv_suspend(local, wowlan); if (err < 0) { local->quiescing = false; local->wowlan = false; @@ -80,6 +97,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) return err; } else if (err > 0) { WARN_ON(err != 1); + /* cfg80211 will call back into mac80211 to disconnect + * all interfaces, allow that to proceed properly + */ + ieee80211_wake_queues_by_reason(hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); return err; } else { goto suspend; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ef6e8a6c4253..247552a7f6c2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -69,14 +69,39 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) return i; } +/* return current EMWA throughput */ +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) +{ + int usecs; + + usecs = mr->perfect_tx_time; + if (!usecs) + usecs = 1000000; + + /* reset thr. below 10% success */ + if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100)) + return 0; + + if (prob_ewma > MINSTREL_FRAC(90, 100)) + return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs)); + else + return MINSTREL_TRUNC(100000 * (prob_ewma / usecs)); +} + /* find & sort topmost throughput rates */ static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { int j = MAX_THR_RATES; + struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && mi->r[i].stats.cur_tp > mi->r[tp_list[j - 1]].stats.cur_tp) + while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { j--; + tmp_mrs = &mi->r[tp_list[j - 1]].stats; + } + if (j < MAX_THR_RATES - 1) memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1)); if (j < MAX_THR_RATES) @@ -127,13 +152,47 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) rate_control_set_rates(mp->hw, mi->sta, ratetbl); } +/* +* Recalculate statistics and counters of a given rate +*/ +void +minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs) +{ + if (unlikely(mrs->attempts > 0)) { + mrs->sample_skipped = 0; + mrs->cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); + if (unlikely(!mrs->att_hist)) { + mrs->prob_ewma = mrs->cur_prob; + } else { + /* update exponential weighted moving variance */ + mrs->prob_ewmsd = minstrel_ewmsd(mrs->prob_ewmsd, + mrs->cur_prob, + mrs->prob_ewma, + EWMA_LEVEL); + + /*update exponential weighted moving avarage */ + mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma, + mrs->cur_prob, + EWMA_LEVEL); + } + mrs->att_hist += mrs->attempts; + mrs->succ_hist += mrs->success; + } else { + mrs->sample_skipped++; + } + + mrs->last_success = mrs->success; + mrs->last_attempts = mrs->attempts; + mrs->success = 0; + mrs->attempts = 0; +} + static void minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) { u8 tmp_tp_rate[MAX_THR_RATES]; u8 tmp_prob_rate = 0; - u32 usecs; - int i; + int i, tmp_cur_tp, tmp_prob_tp; for (i = 0; i < MAX_THR_RATES; i++) tmp_tp_rate[i] = 0; @@ -141,38 +200,15 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; + struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats; - usecs = mr->perfect_tx_time; - if (!usecs) - usecs = 1000000; - - if (unlikely(mrs->attempts > 0)) { - mrs->sample_skipped = 0; - mrs->cur_prob = MINSTREL_FRAC(mrs->success, - mrs->attempts); - mrs->succ_hist += mrs->success; - mrs->att_hist += mrs->attempts; - mrs->probability = minstrel_ewma(mrs->probability, - mrs->cur_prob, - EWMA_LEVEL); - } else - mrs->sample_skipped++; - - mrs->last_success = mrs->success; - mrs->last_attempts = mrs->attempts; - mrs->success = 0; - mrs->attempts = 0; - - /* Update throughput per rate, reset thr. below 10% success */ - if (mrs->probability < MINSTREL_FRAC(10, 100)) - mrs->cur_tp = 0; - else - mrs->cur_tp = mrs->probability * (1000000 / usecs); + /* Update statistics of success probability per rate */ + minstrel_calc_rate_stats(mrs); /* Sample less often below the 10% chance of success. * Sample less often above the 95% chance of success. */ - if (mrs->probability > MINSTREL_FRAC(95, 100) || - mrs->probability < MINSTREL_FRAC(10, 100)) { + if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || + mrs->prob_ewma < MINSTREL_FRAC(10, 100)) { mr->adjusted_retry_count = mrs->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; @@ -192,11 +228,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is chosen as max_prob_rate */ - if (mrs->probability >= MINSTREL_FRAC(95, 100)) { - if (mrs->cur_tp >= mi->r[tmp_prob_rate].stats.cur_tp) + if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) { + tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma); + tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate], + tmp_mrs->prob_ewma); + if (tmp_cur_tp >= tmp_prob_tp) tmp_prob_rate = i; } else { - if (mrs->probability >= mi->r[tmp_prob_rate].stats.probability) + if (mrs->prob_ewma >= tmp_mrs->prob_ewma) tmp_prob_rate = i; } } @@ -215,7 +254,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) #endif /* Reset update timer */ - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; minstrel_update_rates(mp, mi); } @@ -253,7 +292,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, if (mi->sample_deferred > 0) mi->sample_deferred--; - if (time_after(jiffies, mi->stats_update + + if (time_after(jiffies, mi->last_stats_update + (mp->update_interval * HZ) / 1000)) minstrel_update_stats(mp, mi); } @@ -385,7 +424,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * has a probability of >95%, we shouldn't be attempting * to use it, as this only wastes precious airtime */ if (!mrr_capable && - (mi->r[ndx].stats.probability > MINSTREL_FRAC(95, 100))) + (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100))) return; mi->prev_sample = true; @@ -519,7 +558,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, } mi->n_rates = n; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; init_sample_table(mi); minstrel_update_rates(mp, mi); @@ -553,7 +592,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) if (!mi->sample_table) goto error1; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; return mi; error1: @@ -663,12 +702,18 @@ minstrel_free(void *priv) static u32 minstrel_get_expected_throughput(void *priv_sta) { struct minstrel_sta_info *mi = priv_sta; + struct minstrel_rate_stats *tmp_mrs; int idx = mi->max_tp_rate[0]; + int tmp_cur_tp; /* convert pkt per sec in kbps (1200 is the average pkt size used for * computing cur_tp */ - return MINSTREL_TRUNC(mi->r[idx].stats.cur_tp) * 1200 * 8 / 1024; + tmp_mrs = &mi->r[idx].stats; + tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma); + tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; + + return tmp_cur_tp; } const struct rate_control_ops mac80211_minstrel = { diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 410efe620c57..c230bbe93262 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -13,7 +13,6 @@ #define EWMA_DIV 128 #define SAMPLE_COLUMNS 10 /* number of columns in sample table */ - /* scaled fraction values */ #define MINSTREL_SCALE 16 #define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) @@ -24,11 +23,34 @@ /* * Perform EWMA (Exponentially Weighted Moving Average) calculation - */ + */ static inline int minstrel_ewma(int old, int new, int weight) { - return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV; + int diff, incr; + + diff = new - old; + incr = (EWMA_DIV - weight) * diff / EWMA_DIV; + + return old + incr; +} + +/* + * Perform EWMSD (Exponentially Weighted Moving Standard Deviation) calculation + */ +static inline int +minstrel_ewmsd(int old_ewmsd, int cur_prob, int prob_ewma, int weight) +{ + int diff, incr, tmp_var; + + /* calculate exponential weighted moving variance */ + diff = MINSTREL_TRUNC((cur_prob - prob_ewma) * 1000000); + incr = (EWMA_DIV - weight) * diff / EWMA_DIV; + tmp_var = old_ewmsd * old_ewmsd; + tmp_var = weight * (tmp_var + diff * incr / 1000000) / EWMA_DIV; + + /* return standard deviation */ + return (u16) int_sqrt(tmp_var); } struct minstrel_rate_stats { @@ -39,11 +61,13 @@ struct minstrel_rate_stats { /* total attempts/success counters */ u64 att_hist, succ_hist; - /* current throughput */ - unsigned int cur_tp; - - /* packet delivery probabilities */ - unsigned int cur_prob, probability; + /* statistis of packet delivery probability + * cur_prob - current prob within last update intervall + * prob_ewma - exponential weighted moving average of prob + * prob_ewmsd - exp. weighted moving standard deviation of prob */ + unsigned int cur_prob; + unsigned int prob_ewma; + u16 prob_ewmsd; /* maximum retry counts */ u8 retry_count; @@ -71,7 +95,7 @@ struct minstrel_rate { struct minstrel_sta_info { struct ieee80211_sta *sta; - unsigned long stats_update; + unsigned long last_stats_update; unsigned int sp_ack_dur; unsigned int rate_avg; @@ -95,6 +119,7 @@ struct minstrel_sta_info { #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *dbg_stats; + struct dentry *dbg_stats_csv; #endif }; @@ -121,7 +146,6 @@ struct minstrel_priv { u32 fixed_rate_idx; struct dentry *dbg_fixed_rate; #endif - }; struct minstrel_debugfs_info { @@ -133,8 +157,13 @@ extern const struct rate_control_ops mac80211_minstrel; void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); void minstrel_remove_sta_debugfs(void *priv, void *priv_sta); +/* Recalculate success probabilities and counters for a given rate using EWMA */ +void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs); +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma); + /* debugfs */ int minstrel_stats_open(struct inode *inode, struct file *file); +int minstrel_stats_csv_open(struct inode *inode, struct file *file); ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); int minstrel_stats_release(struct inode *inode, struct file *file); diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index 2acab1bcaa4b..1db5f7c3318a 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -54,12 +54,28 @@ #include <net/mac80211.h> #include "rc80211_minstrel.h" +ssize_t +minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) +{ + struct minstrel_debugfs_info *ms; + + ms = file->private_data; + return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len); +} + +int +minstrel_stats_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + int minstrel_stats_open(struct inode *inode, struct file *file) { struct minstrel_sta_info *mi = inode->i_private; struct minstrel_debugfs_info *ms; - unsigned int i, tp, prob, eprob; + unsigned int i, tp_max, tp_avg, prob, eprob; char *p; ms = kmalloc(2048, GFP_KERNEL); @@ -68,8 +84,14 @@ minstrel_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, "rate tpt eprob *prob" - " *ok(*cum) ok( cum)\n"); + p += sprintf(p, "\n"); + p += sprintf(p, "best __________rate_________ ______" + "statistics______ ________last_______ " + "______sum-of________\n"); + p += sprintf(p, "rate [name idx airtime max_tp] [ ø(tp) ø(prob) " + "sd(prob)] [prob.|retry|suc|att] " + "[#success | #attempts]\n"); + for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; @@ -79,18 +101,26 @@ minstrel_stats_open(struct inode *inode, struct file *file) *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' '; *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' '; *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; - p += sprintf(p, "%3u%s", mr->bitrate / 2, + + p += sprintf(p, " %3u%s ", mr->bitrate / 2, (mr->bitrate & 1 ? ".5" : " ")); + p += sprintf(p, "%3u ", i); + p += sprintf(p, "%6u ", mr->perfect_tx_time); - tp = MINSTREL_TRUNC(mrs->cur_tp / 10); + tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mrs->probability * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); - p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u" - " %4u(%4u) %9llu(%9llu)\n", - tp / 10, tp % 10, + p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" + " %3u.%1u %3u %3u %-3u " + "%9llu %-9llu\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, prob / 10, prob % 10, + mrs->retry_count, mrs->last_success, mrs->last_attempts, (unsigned long long)mrs->succ_hist, @@ -107,25 +137,75 @@ minstrel_stats_open(struct inode *inode, struct file *file) return 0; } -ssize_t -minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) +static const struct file_operations minstrel_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, + .llseek = default_llseek, +}; + +int +minstrel_stats_csv_open(struct inode *inode, struct file *file) { + struct minstrel_sta_info *mi = inode->i_private; struct minstrel_debugfs_info *ms; + unsigned int i, tp_max, tp_avg, prob, eprob; + char *p; - ms = file->private_data; - return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len); -} + ms = kmalloc(2048, GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + struct minstrel_rate_stats *mrs = &mi->r[i].stats; + + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[2]) ? "C" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[3]) ? "D" : "")); + p += sprintf(p, "%s" ,((i == mi->max_prob_rate) ? "P" : "")); + + p += sprintf(p, ",%u%s", mr->bitrate / 2, + (mr->bitrate & 1 ? ".5," : ",")); + p += sprintf(p, "%u,", i); + p += sprintf(p, "%u,",mr->perfect_tx_time); + + tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,%u," + "%llu,%llu,%d,%d\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, + eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, + prob / 10, prob % 10, + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist, + mi->total_packets - mi->sample_packets, + mi->sample_packets); + + } + ms->len = p - ms->buf; + + WARN_ON(ms->len + sizeof(*ms) > 2048); -int -minstrel_stats_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); return 0; } -static const struct file_operations minstrel_stat_fops = { +static const struct file_operations minstrel_stat_csv_fops = { .owner = THIS_MODULE, - .open = minstrel_stats_open, + .open = minstrel_stats_csv_open, .read = minstrel_stats_read, .release = minstrel_stats_release, .llseek = default_llseek, @@ -138,6 +218,9 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi, &minstrel_stat_fops); + + mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir, + mi, &minstrel_stat_csv_fops); } void @@ -146,4 +229,6 @@ minstrel_remove_sta_debugfs(void *priv, void *priv_sta) struct minstrel_sta_info *mi = priv_sta; debugfs_remove(mi->dbg_stats); + + debugfs_remove(mi->dbg_stats_csv); } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 80452cfd2dc5..7430a1df2ab1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -17,10 +17,11 @@ #include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" +#define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 /* Number of bits for an average sized packet */ -#define MCS_NBITS (AVG_PKT_SIZE << 3) +#define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) /* Number of symbols for a packet with (bps) bits per symbol */ #define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) @@ -33,7 +34,8 @@ ) /* Transmit duration for the raw data part of an average sized packet */ -#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) +#define MCS_DURATION(streams, sgi, bps) \ + (MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) / AVG_AMPDU_SIZE) #define BW_20 0 #define BW_40 1 @@ -311,67 +313,35 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; } - /* - * Recalculate success probabilities and counters for a rate using EWMA + * Return current throughput based on the average A-MPDU length, taking into + * account the expected number of retransmissions and their expected length */ -static void -minstrel_calc_rate_ewma(struct minstrel_rate_stats *mr) +int +minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, + int prob_ewma) { - if (unlikely(mr->attempts > 0)) { - mr->sample_skipped = 0; - mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); - if (!mr->att_hist) - mr->probability = mr->cur_prob; - else - mr->probability = minstrel_ewma(mr->probability, - mr->cur_prob, EWMA_LEVEL); - mr->att_hist += mr->attempts; - mr->succ_hist += mr->success; - } else { - mr->sample_skipped++; - } - mr->last_success = mr->success; - mr->last_attempts = mr->attempts; - mr->success = 0; - mr->attempts = 0; -} - -/* - * Calculate throughput based on the average A-MPDU length, taking into account - * the expected number of retransmissions and their expected length - */ -static void -minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) -{ - struct minstrel_rate_stats *mr; unsigned int nsecs = 0; - unsigned int tp; - unsigned int prob; - mr = &mi->groups[group].rates[rate]; - prob = mr->probability; - - if (prob < MINSTREL_FRAC(1, 10)) { - mr->cur_tp = 0; - return; - } - - /* - * For the throughput calculation, limit the probability value to 90% to - * account for collision related packet error rate fluctuation - */ - if (prob > MINSTREL_FRAC(9, 10)) - prob = MINSTREL_FRAC(9, 10); + /* do not account throughput if sucess prob is below 10% */ + if (prob_ewma < MINSTREL_FRAC(10, 100)) + return 0; if (group != MINSTREL_CCK_GROUP) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - /* prob is scaled - see MINSTREL_FRAC above */ - tp = 1000000 * ((prob * 1000) / nsecs); - mr->cur_tp = MINSTREL_TRUNC(tp); + /* + * For the throughput calculation, limit the probability value to 90% to + * account for collision related packet error rate fluctuation + * (prob is scaled - see MINSTREL_FRAC above) + */ + if (prob_ewma > MINSTREL_FRAC(90, 100)) + return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000) + / nsecs)); + else + return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs)); } /* @@ -385,22 +355,23 @@ static void minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, u16 *tp_list) { - int cur_group, cur_idx, cur_thr, cur_prob; - int tmp_group, tmp_idx, tmp_thr, tmp_prob; + int cur_group, cur_idx, cur_tp_avg, cur_prob; + int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int j = MAX_THR_RATES; cur_group = index / MCS_GROUP_RATES; cur_idx = index % MCS_GROUP_RATES; - cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp; - cur_prob = mi->groups[cur_group].rates[cur_idx].probability; + cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma; + cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - if (cur_thr < tmp_thr || - (cur_thr == tmp_thr && cur_prob <= tmp_prob)) + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, + tmp_prob); + if (cur_tp_avg < tmp_tp_avg || + (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob)) break; j--; } while (j > 0); @@ -420,16 +391,21 @@ static void minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int tmp_group, tmp_idx, tmp_tp, tmp_prob, max_tp_group; + struct minstrel_rate_stats *mrs; + int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; + int max_tp_group, cur_tp_avg, cur_group, cur_idx; + int max_gpr_group, max_gpr_idx; + int max_gpr_tp_avg, max_gpr_prob; + cur_group = index / MCS_GROUP_RATES; + cur_idx = index % MCS_GROUP_RATES; mg = &mi->groups[index / MCS_GROUP_RATES]; - mr = &mg->rates[index % MCS_GROUP_RATES]; + mrs = &mg->rates[index % MCS_GROUP_RATES]; tmp_group = mi->max_prob_rate / MCS_GROUP_RATES; tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES; - tmp_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ @@ -438,15 +414,24 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) (max_tp_group != MINSTREL_CCK_GROUP)) return; - if (mr->probability > MINSTREL_FRAC(75, 100)) { - if (mr->cur_tp > tmp_tp) + if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) { + cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, + mrs->prob_ewma); + if (cur_tp_avg > tmp_tp_avg) mi->max_prob_rate = index; - if (mr->cur_tp > mg->rates[mg->max_group_prob_rate].cur_tp) + + max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; + max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; + max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma; + max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, + max_gpr_idx, + max_gpr_prob); + if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { - if (mr->probability > tmp_prob) + if (mrs->prob_ewma > tmp_prob) mi->max_prob_rate = index; - if (mr->probability > mg->rates[mg->max_group_prob_rate].probability) + if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma) mg->max_group_prob_rate = index; } } @@ -463,16 +448,18 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], u16 tmp_cck_tp_rate[MAX_THR_RATES]) { - unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp; + unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES; - tmp_cck_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES; - tmp_mcs_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { @@ -491,8 +478,7 @@ static inline void minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int tmp_max_streams, group; + int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] / @@ -501,11 +487,16 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) mg = &mi->groups[group]; if (!mg->supported || group == MINSTREL_CCK_GROUP) continue; - mr = minstrel_get_ratestats(mi, mg->max_group_prob_rate); - if (tmp_tp < mr->cur_tp && + + tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; + tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma; + + if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { mi->max_prob_rate = mg->max_group_prob_rate; - tmp_tp = mr->cur_tp; + tmp_tp = minstrel_ht_get_tp_avg(mi, group, + tmp_idx, + tmp_prob); } } } @@ -523,8 +514,8 @@ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int group, i, j; + struct minstrel_rate_stats *mrs; + int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_cck_tp_rate[MAX_THR_RATES], index; @@ -563,12 +554,12 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) index = MCS_GROUP_RATES * group + i; - mr = &mg->rates[i]; - mr->retry_updated = false; - minstrel_calc_rate_ewma(mr); - minstrel_ht_calc_tp(mi, group, i); + mrs = &mg->rates[i]; + mrs->retry_updated = false; + minstrel_calc_rate_stats(mrs); + cur_prob = mrs->prob_ewma; - if (!mr->cur_tp) + if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; /* Find max throughput rate set */ @@ -612,7 +603,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) #endif /* Reset update timer */ - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; } static bool @@ -635,7 +626,7 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rat } static void -minstrel_next_sample_idx(struct minstrel_ht_sta *mi) +minstrel_set_next_sample_idx(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; @@ -776,7 +767,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, update = true; } - if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { + if (time_after(jiffies, mi->last_stats_update + + (mp->update_interval / 2 * HZ) / 1000)) { update = true; minstrel_ht_update_stats(mp, mi); } @@ -789,7 +781,7 @@ static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) { - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; const struct mcs_group *group; unsigned int tx_time, tx_time_rtscts, tx_time_data; unsigned int cw = mp->cw_min; @@ -798,16 +790,16 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); unsigned int overhead = 0, overhead_rtscts = 0; - mr = minstrel_get_ratestats(mi, index); - if (mr->probability < MINSTREL_FRAC(1, 10)) { - mr->retry_count = 1; - mr->retry_count_rtscts = 1; + mrs = minstrel_get_ratestats(mi, index); + if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) { + mrs->retry_count = 1; + mrs->retry_count_rtscts = 1; return; } - mr->retry_count = 2; - mr->retry_count_rtscts = 2; - mr->retry_updated = true; + mrs->retry_count = 2; + mrs->retry_count_rtscts = 2; + mrs->retry_updated = true; group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000; @@ -838,9 +830,9 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) - mr->retry_count_rtscts++; + mrs->retry_count_rtscts++; } while ((tx_time < mp->segment_size) && - (++mr->retry_count < mp->max_retry)); + (++mrs->retry_count < mp->max_retry)); } @@ -849,22 +841,22 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; - mr = minstrel_get_ratestats(mi, index); - if (!mr->retry_updated) + mrs = minstrel_get_ratestats(mi, index); + if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) { + if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; } else { - ratetbl->rate[offset].count = mr->retry_count; - ratetbl->rate[offset].count_cts = mr->retry_count; - ratetbl->rate[offset].count_rts = mr->retry_count_rtscts; + ratetbl->rate[offset].count = mrs->retry_count; + ratetbl->rate[offset].count_cts = mrs->retry_count; + ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) @@ -922,7 +914,7 @@ minstrel_get_duration(int index) static int minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; struct minstrel_mcs_group_data *mg; unsigned int sample_dur, sample_group, cur_max_tp_streams; int sample_idx = 0; @@ -938,12 +930,12 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) sample_group = mi->sample_group; mg = &mi->groups[sample_group]; sample_idx = sample_table[mg->column][mg->index]; - minstrel_next_sample_idx(mi); + minstrel_set_next_sample_idx(mi); if (!(mg->supported & BIT(sample_idx))) return -1; - mr = &mg->rates[sample_idx]; + mrs = &mg->rates[sample_idx]; sample_idx += sample_group * MCS_GROUP_RATES; /* @@ -960,7 +952,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * Do not sample if the probability is already higher than 95% * to avoid wasting airtime. */ - if (mr->probability > MINSTREL_FRAC(95, 100)) + if (mrs->prob_ewma > MINSTREL_FRAC(95, 100)) return -1; /* @@ -975,7 +967,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) (cur_max_tp_streams - 1 < minstrel_mcs_groups[sample_group].streams || sample_dur >= minstrel_get_duration(mi->max_prob_rate))) { - if (mr->sample_skipped < 20) + if (mrs->sample_skipped < 20) return -1; if (mi->sample_slow++ > 2) @@ -1129,7 +1121,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, memset(mi, 0, sizeof(*mi)); mi->sta = sta; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0); mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0); @@ -1326,16 +1318,19 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { struct minstrel_ht_sta_priv *msp = priv_sta; struct minstrel_ht_sta *mi = &msp->ht; - int i, j; + int i, j, prob, tp_avg; if (!msp->is_ht) return mac80211_minstrel.get_expected_throughput(priv_sta); i = mi->max_tp_rate[0] / MCS_GROUP_RATES; j = mi->max_tp_rate[0] % MCS_GROUP_RATES; + prob = mi->groups[i].rates[j].prob_ewma; + + /* convert tp_avg from pkt per second in kbps */ + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024; - /* convert cur_tp from pkt per second in kbps */ - return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; + return tp_avg; } static const struct rate_control_ops mac80211_minstrel_ht = { diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index f2217d6aa0c2..e8b52a94d24b 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -78,7 +78,7 @@ struct minstrel_ht_sta { u16 max_prob_rate; /* time of last status update */ - unsigned long stats_update; + unsigned long last_stats_update; /* overhead time in usec for each frame */ unsigned int overhead; @@ -112,6 +112,7 @@ struct minstrel_ht_sta_priv { }; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *dbg_stats; + struct dentry *dbg_stats_csv; #endif void *ratelist; void *sample_table; @@ -120,5 +121,7 @@ struct minstrel_ht_sta_priv { void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta); +int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, + int prob_ewma); #endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 20c676b8e5b6..6822ce0f95e5 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -19,7 +19,7 @@ static char * minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) { const struct mcs_group *mg; - unsigned int j, tp, prob, eprob; + unsigned int j, tp_max, tp_avg, prob, eprob, tx_time; char htmode = '2'; char gimode = 'L'; u32 gflags; @@ -38,19 +38,26 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) gimode = 'S'; for (j = 0; j < MCS_GROUP_RATES; j++) { - struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; static const int bitrates[4] = { 10, 20, 55, 110 }; int idx = i * MCS_GROUP_RATES + j; if (!(mi->groups[i].supported & BIT(j))) continue; - if (gflags & IEEE80211_TX_RC_MCS) - p += sprintf(p, " HT%c0/%cGI ", htmode, gimode); - else if (gflags & IEEE80211_TX_RC_VHT_MCS) - p += sprintf(p, "VHT%c0/%cGI ", htmode, gimode); - else - p += sprintf(p, " CCK/%cP ", j < 4 ? 'L' : 'S'); + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, "HT%c0 ", htmode); + p += sprintf(p, "%cGI ", gimode); + p += sprintf(p, "%d ", mg->streams); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, "VHT%c0 ", htmode); + p += sprintf(p, "%cGI ", gimode); + p += sprintf(p, "%d ", mg->streams); + } else { + p += sprintf(p, "CCK "); + p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S'); + p += sprintf(p, "1 "); + } *(p++) = (idx == mi->max_tp_rate[0]) ? 'A' : ' '; *(p++) = (idx == mi->max_tp_rate[1]) ? 'B' : ' '; @@ -59,29 +66,39 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; if (gflags & IEEE80211_TX_RC_MCS) { - p += sprintf(p, " MCS%-2u ", (mg->streams - 1) * 8 + j); + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { - p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); + p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); } else { int r = bitrates[j % 4]; - p += sprintf(p, " %2u.%1uM ", r / 10, r % 10); + p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } - tp = mr->cur_tp / 10; - prob = MINSTREL_TRUNC(mr->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mr->probability * 1000); + p += sprintf(p, " %3u ", idx); - p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u " - "%3u %4u(%4u) %9llu(%9llu)\n", - tp / 10, tp % 10, + /* tx_time[rate(i)] in usec */ + tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000); + p += sprintf(p, "%6u ", tx_time); + + tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" + " %3u.%1u %3u %3u %-3u " + "%9llu %-9llu\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, prob / 10, prob % 10, - mr->retry_count, - mr->last_success, - mr->last_attempts, - (unsigned long long)mr->succ_hist, - (unsigned long long)mr->att_hist); + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist); } return p; @@ -94,8 +111,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; unsigned int i; - char *p; int ret; + char *p; if (!msp->is_ht) { inode->i_private = &msp->legacy; @@ -110,8 +127,14 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, " type rate tpt eprob *prob " - "ret *ok(*cum) ok( cum)\n"); + + p += sprintf(p, "\n"); + p += sprintf(p, " best ____________rate__________ " + "______statistics______ ________last_______ " + "______sum-of________\n"); + p += sprintf(p, "mode guard # rate [name idx airtime max_tp] " + "[ ø(tp) ø(prob) sd(prob)] [prob.|retry|suc|att] [#success | " + "#attempts]\n"); p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p); for (i = 0; i < MINSTREL_CCK_GROUP; i++) @@ -123,11 +146,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), mi->sample_packets); - p += sprintf(p, "Average A-MPDU length: %d.%d\n", + p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n", MINSTREL_TRUNC(mi->avg_ampdu_len), MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; - WARN_ON(ms->len + sizeof(*ms) > 32768); return nonseekable_open(inode, file); @@ -141,6 +163,143 @@ static const struct file_operations minstrel_ht_stat_fops = { .llseek = no_llseek, }; +static char * +minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) +{ + const struct mcs_group *mg; + unsigned int j, tp_max, tp_avg, prob, eprob, tx_time; + char htmode = '2'; + char gimode = 'L'; + u32 gflags; + + if (!mi->groups[i].supported) + return p; + + mg = &minstrel_mcs_groups[i]; + gflags = mg->flags; + + if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) + htmode = '8'; + if (gflags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; + static const int bitrates[4] = { 10, 20, 55, 110 }; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, "HT%c0,", htmode); + p += sprintf(p, "%cGI,", gimode); + p += sprintf(p, "%d,", mg->streams); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, "VHT%c0,", htmode); + p += sprintf(p, "%cGI,", gimode); + p += sprintf(p, "%d,", mg->streams); + } else { + p += sprintf(p, "CCK,"); + p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S'); + p += sprintf(p, "1,"); + } + + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[0]) ? "A" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[1]) ? "B" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[2]) ? "C" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[3]) ? "D" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_prob_rate) ? "P" : "")); + + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, ",MCS%-2u,", (mg->streams - 1) * 8 + j); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams); + } else { + int r = bitrates[j % 4]; + p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10); + } + + p += sprintf(p, "%u,", idx); + tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000); + p += sprintf(p, "%u,", tx_time); + + tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u," + "%u,%llu,%llu,", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, + eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, + prob / 10, prob % 10, + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist); + p += sprintf(p, "%d,%d,%d.%d\n", + max(0, (int) mi->total_packets - + (int) mi->sample_packets), + mi->sample_packets, + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + } + + return p; +} + +static int +minstrel_ht_stats_csv_open(struct inode *inode, struct file *file) +{ + struct minstrel_ht_sta_priv *msp = inode->i_private; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_debugfs_info *ms; + unsigned int i; + int ret; + char *p; + + if (!msp->is_ht) { + inode->i_private = &msp->legacy; + ret = minstrel_stats_csv_open(inode, file); + inode->i_private = msp; + return ret; + } + + ms = kmalloc(32768, GFP_KERNEL); + + if (!ms) + return -ENOMEM; + + file->private_data = ms; + + p = ms->buf; + + p = minstrel_ht_stats_csv_dump(mi, MINSTREL_CCK_GROUP, p); + for (i = 0; i < MINSTREL_CCK_GROUP; i++) + p = minstrel_ht_stats_csv_dump(mi, i, p); + for (i++; i < ARRAY_SIZE(mi->groups); i++) + p = minstrel_ht_stats_csv_dump(mi, i, p); + + ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 32768); + + return nonseekable_open(inode, file); +} + +static const struct file_operations minstrel_ht_stat_csv_fops = { + .owner = THIS_MODULE, + .open = minstrel_ht_stats_csv_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, + .llseek = no_llseek, +}; + void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { @@ -148,6 +307,8 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, &minstrel_ht_stat_fops); + msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, + dir, msp, &minstrel_ht_stat_csv_fops); } void @@ -156,4 +317,5 @@ minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta) struct minstrel_ht_sta_priv *msp = priv_sta; debugfs_remove(msp->dbg_stats); + debugfs_remove(msp->dbg_stats_csv); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1eb730bf8752..260eed45b6d2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1185,6 +1185,7 @@ static void sta_ps_start(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ps_data *ps; + int tid; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -1198,6 +1199,18 @@ static void sta_ps_start(struct sta_info *sta) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); + + if (!sta->sta.txq[0]) + return; + + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); + + if (!skb_queue_len(&txqi->queue)) + set_bit(tid, &sta->txq_buffered_tids); + else + clear_bit(tid, &sta->txq_buffered_tids); + } } static void sta_ps_end(struct sta_info *sta) @@ -1913,8 +1926,7 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && !ieee80211_is_nullfunc(fc) && - ieee80211_is_data(fc) && - (rx->key || rx->sdata->drop_unencrypted))) + ieee80211_is_data(fc) && rx->key)) return -EACCES; return 0; @@ -2044,6 +2056,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) struct sta_info *dsta; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += rx->skb->len; + skb = rx->skb; xmit_skb = NULL; @@ -2174,8 +2189,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) dev_kfree_skb(rx->skb); continue; } - dev->stats.rx_packets++; - dev->stats.rx_bytes += rx->skb->len; ieee80211_deliver_skb(rx); } @@ -2401,9 +2414,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) rx->skb->dev = dev; - dev->stats.rx_packets++; - dev->stats.rx_bytes += rx->skb->len; - if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && !is_multicast_ether_addr( ((struct ethhdr *)rx->skb->data)->h_dest) && @@ -3129,6 +3139,12 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, goto rxh_next; \ } while (0); + /* Lock here to avoid hitting all of the data used in the RX + * path (e.g. key data, station data, ...) concurrently when + * a frame is released from the reorder buffer due to timeout + * from the timer, potentially concurrently with RX from the + * driver. + */ spin_lock_bh(&rx->local->rx_path_lock); while ((skb = __skb_dequeue(frames))) { @@ -3421,7 +3437,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct sta_info *sta, *tmp, *prev_sta; + struct sta_info *sta, *prev_sta; + struct rhash_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -3456,9 +3473,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_scan_rx(local, skb); if (ieee80211_is_data(fc)) { + const struct bucket_table *tbl; + prev_sta = NULL; - for_each_sta_info(local, hdr->addr2, sta, tmp) { + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 05f0d711b6d8..7bb6a9383f58 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -928,11 +928,12 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan, + struct ieee80211_channel **channels, + unsigned int n_channels, enum nl80211_bss_scan_width scan_width) { struct ieee80211_local *local = sdata->local; - int ret = -EBUSY; + int ret = -EBUSY, i, n_ch = 0; enum ieee80211_band band; mutex_lock(&local->mtx); @@ -942,9 +943,8 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, goto unlock; /* fill internal scan request */ - if (!chan) { - int i, max_n; - int n_ch = 0; + if (!channels) { + int max_n; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!local->hw.wiphy->bands[band]) @@ -969,12 +969,19 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, local->int_scan_req->n_channels = n_ch; } else { - if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IR | - IEEE80211_CHAN_DISABLED))) + for (i = 0; i < n_channels; i++) { + if (channels[i]->flags & (IEEE80211_CHAN_NO_IR | + IEEE80211_CHAN_DISABLED)) + continue; + + local->int_scan_req->channels[n_ch] = channels[i]; + n_ch++; + } + + if (WARN_ON_ONCE(n_ch == 0)) goto unlock; - local->int_scan_req->channels[0] = chan; - local->int_scan_req->n_channels = 1; + local->int_scan_req->n_channels = n_ch; } local->int_scan_req->ssids = &local->scan_ssid; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 00ca8dcc2bcf..12971b71d0fa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -64,32 +64,20 @@ * freed before they are done using it. */ +static const struct rhashtable_params sta_rht_params = { + .nelem_hint = 3, /* start small */ + .head_offset = offsetof(struct sta_info, hash_node), + .key_offset = offsetof(struct sta_info, sta.addr), + .key_len = ETH_ALEN, + .hashfn = sta_addr_hash, +}; + /* Caller must hold local->sta_mtx */ static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - struct sta_info *s; - - s = rcu_dereference_protected(local->sta_hash[STA_HASH(sta->sta.addr)], - lockdep_is_held(&local->sta_mtx)); - if (!s) - return -ENOENT; - if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], - s->hnext); - return 0; - } - - while (rcu_access_pointer(s->hnext) && - rcu_access_pointer(s->hnext) != sta) - s = rcu_dereference_protected(s->hnext, - lockdep_is_held(&local->sta_mtx)); - if (rcu_access_pointer(s->hnext)) { - rcu_assign_pointer(s->hnext, sta->hnext); - return 0; - } - - return -ENOENT; + return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -118,6 +106,16 @@ static void __cleanup_single_sta(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); } + if (sta->sta.txq[0]) { + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); + int n = skb_queue_len(&txqi->queue); + + ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]); + } + } + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]); @@ -159,18 +157,8 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_mtx)); - while (sta) { - if (sta->sdata == sdata && - ether_addr_equal(sta->sta.addr, addr)) - break; - sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_mtx)); - } - return sta; + return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params); } /* @@ -182,18 +170,24 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct sta_info *sta; + struct rhash_head *tmp; + const struct bucket_table *tbl; - sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_mtx)); - while (sta) { - if ((sta->sdata == sdata || - (sta->sdata->bss && sta->sdata->bss == sdata->bss)) && - ether_addr_equal(sta->sta.addr, addr)) - break; - sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_mtx)); + rcu_read_lock(); + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, addr, sta, tmp) { + if (sta->sdata == sdata || + (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { + rcu_read_unlock(); + /* this is safe as the caller must already hold + * another rcu read section or the mutex + */ + return sta; + } } - return sta; + rcu_read_unlock(); + return NULL; } struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, @@ -229,19 +223,13 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { - int i; - if (sta->rate_ctrl) rate_control_free_sta(sta); - if (sta->tx_lat) { - for (i = 0; i < IEEE80211_NUM_TIDS; i++) - kfree(sta->tx_lat[i].bins); - kfree(sta->tx_lat); - } - sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + if (sta->sta.txq[0]) + kfree(to_txq_info(sta->sta.txq[0])); kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } @@ -250,9 +238,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - lockdep_assert_held(&local->sta_mtx); - sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); + rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -293,44 +280,15 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, gfp_t gfp) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; struct timespec uptime; - struct ieee80211_tx_latency_bin_ranges *tx_latency; int i; - sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); + sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); if (!sta) return NULL; - rcu_read_lock(); - tx_latency = rcu_dereference(local->tx_latency); - /* init stations Tx latency statistics && TID bins */ - if (tx_latency) { - sta->tx_lat = kzalloc(IEEE80211_NUM_TIDS * - sizeof(struct ieee80211_tx_latency_stat), - GFP_ATOMIC); - if (!sta->tx_lat) { - rcu_read_unlock(); - goto free; - } - - if (tx_latency->n_ranges) { - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - /* size of bins is size of the ranges +1 */ - sta->tx_lat[i].bin_count = - tx_latency->n_ranges + 1; - sta->tx_lat[i].bins = - kcalloc(sta->tx_lat[i].bin_count, - sizeof(u32), GFP_ATOMIC); - if (!sta->tx_lat[i].bins) { - rcu_read_unlock(); - goto free; - } - } - } - } - rcu_read_unlock(); - spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); @@ -359,8 +317,24 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) ewma_init(&sta->chain_signal_avg[i], 1024, 8); + if (local->ops->wake_tx_queue) { + void *txq_data; + int size = sizeof(struct txq_info) + + ALIGN(hw->txq_data_size, sizeof(void *)); + + txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); + if (!txq_data) + goto free; + + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txq = txq_data + i * size; + + ieee80211_init_tx_queue(sdata, sta, txq, i); + } + } + if (sta_prepare_rate_control(local, sta, gfp)) - goto free; + goto free_txq; for (i = 0; i < IEEE80211_NUM_TIDS; i++) { /* @@ -382,7 +356,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { struct ieee80211_supported_band *sband = - local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; + hw->wiphy->bands[ieee80211_get_sdata_band(sdata)]; u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> IEEE80211_HT_CAP_SM_PS_SHIFT; /* @@ -405,14 +379,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); + return sta; +free_txq: + if (sta->sta.txq[0]) + kfree(to_txq_info(sta->sta.txq[0])); free: - if (sta->tx_lat) { - for (i = 0; i < IEEE80211_NUM_TIDS; i++) - kfree(sta->tx_lat[i].bins); - kfree(sta->tx_lat); - } kfree(sta); return NULL; } @@ -684,6 +657,8 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) indicate_tim |= sta->driver_buffered_tids & tids; + indicate_tim |= + sta->txq_buffered_tids & tids; } done: @@ -992,19 +967,32 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -void sta_info_init(struct ieee80211_local *local) +u32 sta_addr_hash(const void *key, u32 length, u32 seed) +{ + return jhash(key, ETH_ALEN, seed); +} + +int sta_info_init(struct ieee80211_local *local) { + int err; + + err = rhashtable_init(&local->sta_hash, &sta_rht_params); + if (err) + return err; + spin_lock_init(&local->tim_lock); mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); setup_timer(&local->sta_cleanup, sta_info_cleanup, (unsigned long)local); + return 0; } void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); + rhashtable_destroy(&local->sta_hash); } @@ -1068,16 +1056,21 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, - const u8 *addr, - const u8 *localaddr) + const u8 *addr, + const u8 *localaddr) { - struct sta_info *sta, *nxt; + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + struct rhash_head *tmp; + const struct bucket_table *tbl; + + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { + for_each_sta_info(local, tbl, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; @@ -1115,7 +1108,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff_head pending; - int filtered = 0, buffered = 0, ac; + int filtered = 0, buffered = 0, ac, i; unsigned long flags; struct ps_data *ps; @@ -1134,10 +1127,22 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); sta->driver_buffered_tids = 0; + sta->txq_buffered_tids = 0; if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); + if (sta->sta.txq[0]) { + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); + + if (!skb_queue_len(&txqi->queue)) + continue; + + drv_wake_tx_queue(local, txqi); + } + } + skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ @@ -1275,7 +1280,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, } info->band = chanctx_conf->def.chan->band; - ieee80211_xmit(sdata, skb); + ieee80211_xmit(sdata, sta, skb); rcu_read_unlock(); } @@ -1319,8 +1324,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* if we already have frames from software, then we can't also * release from hardware queues */ - if (skb_queue_empty(&frames)) + if (skb_queue_empty(&frames)) { driver_release_tids |= sta->driver_buffered_tids & tids; + driver_release_tids |= sta->txq_buffered_tids & tids; + } if (driver_release_tids) { /* If the driver has data on more than one TID then @@ -1491,6 +1498,9 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { + unsigned long tids = sta->txq_buffered_tids & driver_release_tids; + int tid; + /* * We need to release a frame that is buffered somewhere in the * driver ... it'll have to handle that. @@ -1510,8 +1520,22 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * that the TID(s) became empty before returning here from the * release function. * Either way, however, when the driver tells us that the TID(s) - * became empty we'll do the TIM recalculation. + * became empty or we find that a txq became empty, we'll do the + * TIM recalculation. */ + + if (!sta->sta.txq[0]) + return; + + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); + + if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue)) + continue; + + sta_info_recalc_tim(sta); + break; + } } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index fb0fc1302a58..5c164fb3f6c5 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/average.h> #include <linux/etherdevice.h> +#include <linux/rhashtable.h> #include "key.h" /** @@ -236,25 +237,6 @@ struct sta_ampdu_mlme { u8 dialog_token_allocator; }; -/* - * struct ieee80211_tx_latency_stat - Tx latency statistics - * - * Measures TX latency and jitter for a station per TID. - * - * @max: worst case latency - * @sum: sum of all latencies - * @counter: amount of Tx frames sent from interface - * @bins: each bin counts how many frames transmitted within a certain - * latency range. when disabled it is NULL. - * @bin_count: amount of bins. - */ -struct ieee80211_tx_latency_stat { - u32 max; - u32 sum; - u32 counter; - u32 *bins; - u32 bin_count; -}; /* Value to indicate no TID reservation */ #define IEEE80211_TID_UNRESERVED 0xff @@ -267,7 +249,7 @@ struct ieee80211_tx_latency_stat { * * @list: global linked list entry * @free_list: list entry for keeping track of stations to free - * @hnext: hash table linked list pointer + * @hash_node: hash node for rhashtable * @local: pointer to the global information * @sdata: virtual interface this station belongs to * @ptk: peer keys negotiated with this station, if any @@ -295,6 +277,7 @@ struct ieee80211_tx_latency_stat { * entered power saving state, these are also delivered to * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on + * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA * @last_rx: time (in jiffies) when last frame was received from this STA @@ -316,7 +299,6 @@ struct ieee80211_tx_latency_stat { * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers - * @tx_lat: Tx latency statistics * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -361,7 +343,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct sta_info __rcu *hnext; + struct rhash_head hash_node; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; @@ -390,6 +372,7 @@ struct sta_info { struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; + unsigned long txq_buffered_tids; /* Updated from RX path only, no locking requirements */ unsigned long rx_packets; @@ -437,8 +420,6 @@ struct sta_info { struct sta_ampdu_mlme ampdu_mlme; u8 timer_to_tid[IEEE80211_NUM_TIDS]; - struct ieee80211_tx_latency_stat *tx_lat; - #ifdef CONFIG_MAC80211_MESH /* * Mesh peer link attributes @@ -559,10 +540,6 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) lockdep_is_held(&sta->ampdu_mlme.mtx)); } -#define STA_HASH_SIZE 256 -#define STA_HASH(sta) (sta[5]) - - /* Maximum number of frames to buffer per power saving station per AC */ #define STA_MAX_TX_BUFFER 64 @@ -583,26 +560,15 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -static inline -void for_each_sta_info_type_check(struct ieee80211_local *local, - const u8 *addr, - struct sta_info *sta, - struct sta_info *nxt) -{ -} +u32 sta_addr_hash(const void *key, u32 length, u32 seed); + +#define _sta_bucket_idx(_tbl, _a) \ + rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) -#define for_each_sta_info(local, _addr, _sta, nxt) \ - for ( /* initialise loop */ \ - _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ - nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ - /* typecheck */ \ - for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ - /* continue condition */ \ - _sta; \ - /* advance loop */ \ - _sta = nxt, \ - nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ - ) \ +#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ + rht_for_each_entry_rcu(_sta, _tmp, tbl, \ + _sta_bucket_idx(tbl, _addr), \ + hash_node) \ /* compare address and run code only if it matches */ \ if (ether_addr_equal(_sta->sta.addr, (_addr))) @@ -639,7 +605,7 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, void sta_info_recalc_tim(struct sta_info *sta); -void sta_info_init(struct ieee80211_local *local); +int sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); /** diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e679b7c9b160..005fdbe39a8b 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -12,7 +12,6 @@ #include <linux/export.h> #include <linux/etherdevice.h> -#include <linux/time.h> #include <net/mac80211.h> #include <asm/unaligned.h> #include "ieee80211_i.h" @@ -515,73 +514,6 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } /* - * Measure Tx frame completion and removal time for Tx latency statistics - * calculation. A single Tx frame latency should be measured from when it - * is entering the Kernel until we receive Tx complete confirmation indication - * and remove the skb. - */ -static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, - struct sk_buff *skb, - struct sta_info *sta, - struct ieee80211_hdr *hdr) -{ - u32 msrmnt; - u16 tid; - u8 *qc; - int i, bin_range_count; - u32 *bin_ranges; - __le16 fc; - struct ieee80211_tx_latency_stat *tx_lat; - struct ieee80211_tx_latency_bin_ranges *tx_latency; - ktime_t skb_arv = skb->tstamp; - - tx_latency = rcu_dereference(local->tx_latency); - - /* assert Tx latency stats are enabled & frame arrived when enabled */ - if (!tx_latency || !ktime_to_ns(skb_arv)) - return; - - fc = hdr->frame_control; - - if (!ieee80211_is_data(fc)) /* make sure it is a data frame */ - return; - - /* get frame tid */ - if (ieee80211_is_data_qos(hdr->frame_control)) { - qc = ieee80211_get_qos_ctl(hdr); - tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK; - } else { - tid = 0; - } - - tx_lat = &sta->tx_lat[tid]; - - /* Calculate the latency */ - msrmnt = ktime_to_ms(ktime_sub(ktime_get(), skb_arv)); - - if (tx_lat->max < msrmnt) /* update stats */ - tx_lat->max = msrmnt; - tx_lat->counter++; - tx_lat->sum += msrmnt; - - if (!tx_lat->bins) /* bins not activated */ - return; - - /* count how many Tx frames transmitted with the appropriate latency */ - bin_range_count = tx_latency->n_ranges; - bin_ranges = tx_latency->ranges; - - for (i = 0; i < bin_range_count; i++) { - if (msrmnt <= bin_ranges[i]) { - tx_lat->bins[i]++; - break; - } - } - if (i == bin_range_count) /* msrmnt is bigger than the biggest range */ - tx_lat->bins[i]++; -} - -/* * Use a static threshold for now, best value to be determined * by testing ... * Should it depend on: @@ -722,7 +654,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; - struct sta_info *sta, *tmp; + struct sta_info *sta; + struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -731,6 +664,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) int rtap_len; int shift = 0; int tid = IEEE80211_NUM_TIDS; + const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -739,7 +673,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - for_each_sta_info(local, hdr->addr1, sta, tmp) { + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; @@ -853,12 +789,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (acked) sta->last_ack_signal = info->status.ack_signal; - - /* - * Measure frame removal for tx latency - * statistics calculation - */ - ieee80211_tx_latency_end_msrmnt(local, skb, sta, hdr); } rcu_read_unlock(); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index c9f9752217ac..fff0d864adfa 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -136,6 +136,24 @@ ieee80211_tdls_add_supp_channels(struct ieee80211_sub_if_data *sdata, *pos = 2 * subband_cnt; } +static void ieee80211_tdls_add_oper_classes(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + u8 *pos; + u8 op_class; + + if (!ieee80211_chandef_to_operating_class(&sdata->vif.bss_conf.chandef, + &op_class)) + return; + + pos = skb_put(skb, 4); + *pos++ = WLAN_EID_SUPPORTED_REGULATORY_CLASSES; + *pos++ = 2; /* len */ + + *pos++ = op_class; + *pos++ = op_class; /* give current operating class as alternate too */ +} + static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) { u8 *pos = (void *)skb_put(skb, 3); @@ -193,6 +211,17 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN); } +static void +ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + u8 *pos = (void *)skb_put(skb, 4); + + *pos++ = WLAN_EID_AID; + *pos++ = 2; /* len */ + put_unaligned_le16(ifmgd->aid, pos); +} + /* translate numbering in the WMM parameter IE to the mac80211 notation */ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac) { @@ -271,21 +300,11 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; struct sta_info *sta = NULL; size_t offset = 0, noffset; u8 *pos; - rcu_read_lock(); - - /* we should have the peer STA if we're already responding */ - if (action_code == WLAN_TDLS_SETUP_RESPONSE) { - sta = sta_info_get(sdata, peer); - if (WARN_ON_ONCE(!sta)) { - rcu_read_unlock(); - return; - } - } - ieee80211_add_srates_ie(sdata, skb, false, band); ieee80211_add_ext_srates_ie(sdata, skb, false, band); ieee80211_tdls_add_supp_channels(sdata, skb); @@ -338,6 +357,19 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } + rcu_read_lock(); + + /* we should have the peer STA if we're already responding */ + if (action_code == WLAN_TDLS_SETUP_RESPONSE) { + sta = sta_info_get(sdata, peer); + if (WARN_ON_ONCE(!sta)) { + rcu_read_unlock(); + return; + } + } + + ieee80211_tdls_add_oper_classes(sdata, skb); + /* * with TDLS we can switch channels, and HT-caps are not necessarily * the same on all bands. The specification limits the setup to a @@ -346,7 +378,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); - if (action_code == WLAN_TDLS_SETUP_REQUEST && ht_cap.ht_supported) { + if ((action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) && + ht_cap.ht_supported) { ieee80211_apply_htcap_overrides(sdata, &ht_cap); /* disable SMPS in TDLS initiator */ @@ -368,12 +402,63 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); } - rcu_read_unlock(); - if (ht_cap.ht_supported && (ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) ieee80211_tdls_add_bss_coex_ie(skb); + ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + + /* add any custom IEs that go before VHT capabilities */ + if (extra_ies_len) { + static const u8 before_vht_cap[] = { + WLAN_EID_SUPP_RATES, + WLAN_EID_COUNTRY, + WLAN_EID_EXT_SUPP_RATES, + WLAN_EID_SUPPORTED_CHANNELS, + WLAN_EID_RSN, + WLAN_EID_EXT_CAPABILITY, + WLAN_EID_QOS_CAPA, + WLAN_EID_FAST_BSS_TRANSITION, + WLAN_EID_TIMEOUT_INTERVAL, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + WLAN_EID_MULTI_BAND, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_vht_cap, + ARRAY_SIZE(before_vht_cap), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* build the VHT-cap similarly to the HT-cap */ + memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + if ((action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) && + vht_cap.vht_supported) { + ieee80211_apply_vhtcap_overrides(sdata, &vht_cap); + + /* the AID is present only when VHT is implemented */ + if (action_code == WLAN_TDLS_SETUP_REQUEST) + ieee80211_tdls_add_aid(sdata, skb); + + pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); + ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); + } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && + vht_cap.vht_supported && sta->sta.vht_cap.vht_supported) { + /* the peer caps are already intersected with our own */ + memcpy(&vht_cap, &sta->sta.vht_cap, sizeof(vht_cap)); + + /* the AID is present only when VHT is implemented */ + ieee80211_tdls_add_aid(sdata, skb); + + pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); + ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); + } + + rcu_read_unlock(); + /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; @@ -381,7 +466,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, memcpy(pos, extra_ies + offset, noffset - offset); } - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); } static void @@ -394,6 +478,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); u8 *pos; rcu_read_lock(); @@ -453,6 +538,21 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, } } + ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + + /* only include VHT-operation if not on the 2.4GHz band */ + if (band != IEEE80211_BAND_2GHZ && !ap_sta->sta.vht_cap.vht_supported && + sta->sta.vht_cap.vht_supported) { + struct ieee80211_chanctx_conf *chanctx_conf = + rcu_dereference(sdata->vif.chanctx_conf); + if (!WARN_ON(!chanctx_conf)) { + pos = skb_put(skb, 2 + + sizeof(struct ieee80211_vht_operation)); + ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap, + &chanctx_conf->def); + } + } + rcu_read_unlock(); /* add any remaining IEs */ @@ -461,8 +561,6 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, noffset - offset); memcpy(pos, extra_ies + offset, noffset - offset); } - - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); } static void @@ -708,8 +806,12 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, 26 + /* max(WMM-info, WMM-param) */ 2 + max(sizeof(struct ieee80211_ht_cap), sizeof(struct ieee80211_ht_operation)) + + 2 + max(sizeof(struct ieee80211_vht_cap), + sizeof(struct ieee80211_vht_operation)) + 50 + /* supported channels */ 3 + /* 40/20 BSS coex */ + 4 + /* AID */ + 4 + /* oper classes */ extra_ies_len + sizeof(struct ieee80211_tdls_lnkie)); if (!skb) @@ -907,7 +1009,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) && !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { ret = -EBUSY; - goto exit; + goto out_unlock; } /* @@ -922,27 +1024,34 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, if (!sta_info_get(sdata, peer)) { rcu_read_unlock(); ret = -ENOLINK; - goto exit; + goto out_unlock; } rcu_read_unlock(); } ieee80211_flush_queues(local, sdata, false); + memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); + mutex_unlock(&local->mtx); + /* we cannot take the mutex while preparing the setup packet */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, NULL); - if (ret < 0) - goto exit; + if (ret < 0) { + mutex_lock(&local->mtx); + eth_zero_addr(sdata->u.mgd.tdls_peer); + mutex_unlock(&local->mtx); + return ret; + } - memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); ieee80211_queue_delayed_work(&sdata->local->hw, &sdata->u.mgd.tdls_peer_del_work, TDLS_PEER_SETUP_TIMEOUT); + return 0; -exit: +out_unlock: mutex_unlock(&local->mtx); return ret; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 755a5388dbca..4c2e7690226a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1256,28 +1256,28 @@ TRACE_EVENT(drv_set_rekey_data, LOCAL_PR_ARG, VIF_PR_ARG) ); -TRACE_EVENT(drv_rssi_callback, +TRACE_EVENT(drv_event_callback, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - enum ieee80211_rssi_event rssi_event), + const struct ieee80211_event *_event), - TP_ARGS(local, sdata, rssi_event), + TP_ARGS(local, sdata, _event), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY - __field(u32, rssi_event) + __field(u32, type) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - __entry->rssi_event = rssi_event; + __entry->type = _event->type; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event + LOCAL_PR_FMT VIF_PR_FMT " event:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type ) ); @@ -2312,6 +2312,37 @@ TRACE_EVENT(drv_tdls_recv_channel_switch, ) ); +TRACE_EVENT(drv_wake_tx_queue, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct txq_info *txq), + + TP_ARGS(local, sdata, txq), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __field(u8, ac) + __field(u8, tid) + ), + + TP_fast_assign( + struct ieee80211_sta *sta = txq->txq.sta; + + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + __entry->ac = txq->txq.ac; + __entry->tid = txq->txq.tid; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 07bd8db00af8..667111ee6a20 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -20,7 +20,6 @@ #include <linux/bitmap.h> #include <linux/rcupdate.h> #include <linux/export.h> -#include <linux/time.h> #include <net/net_namespace.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> @@ -595,23 +594,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) else if (!is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(tx->sdata->default_unicast_key))) tx->key = key; - else if (info->flags & IEEE80211_TX_CTL_INJECTED) - tx->key = NULL; - else if (!tx->sdata->drop_unencrypted) - tx->key = NULL; - else if (tx->skb->protocol == tx->sdata->control_port_protocol) - tx->key = NULL; - else if (ieee80211_is_robust_mgmt_frame(tx->skb) && - !(ieee80211_is_action(hdr->frame_control) && - tx->sta && test_sta_flag(tx->sta, WLAN_STA_MFP))) - tx->key = NULL; - else if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_robust_mgmt_frame(tx->skb)) + else tx->key = NULL; - else { - I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); - return TX_DROP; - } if (tx->key) { bool skip_hw = false; @@ -783,12 +767,22 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) +{ + u16 *seq = &sta->tid_seq[tid]; + __le16 ret = cpu_to_le16(*seq); + + /* Increase the sequence number. */ + *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + + return ret; +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - u16 *seq; u8 *qc; int tid; @@ -839,13 +833,10 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - seq = &tx->sta->tid_seq[tid]; tx->sta->tx_msdu[tid]++; - hdr->seq_ctrl = cpu_to_le16(*seq); - - /* Increase the sequence number. */ - *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + if (!tx->sta->sta.txq[0]) + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1086,7 +1077,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, * nothing -- this aggregation session is being started * but that might still fail with the driver */ - } else { + } else if (!tx->sta->sta.txq[tid]) { spin_lock(&tx->sta->lock); /* * Need to re-check now, because we may get here @@ -1137,11 +1128,13 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, /* * initialises @tx + * pass %NULL for the station if unknown, a valid pointer if known + * or an ERR_PTR() if the station is known not to exist */ static ieee80211_tx_result ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_data *tx, - struct sk_buff *skb) + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; @@ -1164,17 +1157,22 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, hdr = (struct ieee80211_hdr *) skb->data; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { - tx->sta = rcu_dereference(sdata->u.vlan.sta); - if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) - return TX_DROP; - } else if (info->flags & (IEEE80211_TX_CTL_INJECTED | - IEEE80211_TX_INTFL_NL80211_FRAME_TX) || - tx->sdata->control_port_protocol == tx->skb->protocol) { - tx->sta = sta_info_get_bss(sdata, hdr->addr1); + if (likely(sta)) { + if (!IS_ERR(sta)) + tx->sta = sta; + } else { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + tx->sta = rcu_dereference(sdata->u.vlan.sta); + if (!tx->sta && sdata->wdev.use_4addr) + return TX_DROP; + } else if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_INJECTED) || + tx->sdata->control_port_protocol == tx->skb->protocol) { + tx->sta = sta_info_get_bss(sdata, hdr->addr1); + } + if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) + tx->sta = sta_info_get(sdata, hdr->addr1); } - if (!tx->sta) - tx->sta = sta_info_get(sdata, hdr->addr1); if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && !ieee80211_is_qos_nullfunc(hdr->frame_control) && @@ -1220,13 +1218,102 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } +static void ieee80211_drv_tx(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_control control = { + .sta = pubsta, + }; + struct ieee80211_txq *txq = NULL; + struct txq_info *txqi; + u8 ac; + + if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE) + goto tx_normal; + + if (!ieee80211_is_data(hdr->frame_control)) + goto tx_normal; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + goto tx_normal; + + ac = txq->ac; + txqi = to_txq_info(txq); + atomic_inc(&sdata->txqs_len[ac]); + if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending) + netif_stop_subqueue(sdata->dev, ac); + + skb_queue_tail(&txqi->queue, skb); + drv_wake_tx_queue(local, txqi); + + return; + +tx_normal: + drv_tx(local, &control, skb); +} + +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); + struct txq_info *txqi = container_of(txq, struct txq_info, txq); + struct ieee80211_hdr *hdr; + struct sk_buff *skb = NULL; + u8 ac = txq->ac; + + spin_lock_bh(&txqi->queue.lock); + + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) + goto out; + + skb = __skb_dequeue(&txqi->queue); + if (!skb) + goto out; + + atomic_dec(&sdata->txqs_len[ac]); + if (__netif_subqueue_stopped(sdata->dev, ac)) + ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]); + + hdr = (struct ieee80211_hdr *)skb->data; + if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { + struct sta_info *sta = container_of(txq->sta, struct sta_info, + sta); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + } + +out: + spin_unlock_bh(&txqi->queue.lock); + + return skb; +} +EXPORT_SYMBOL(ieee80211_tx_dequeue); + static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct sk_buff_head *skbs, bool txpending) { - struct ieee80211_tx_control control; struct sk_buff *skb, *tmp; unsigned long flags; @@ -1284,10 +1371,9 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - control.sta = sta; __skb_unlink(skb, skbs); - drv_tx(local, &control, skb); + ieee80211_drv_tx(local, vif, sta, skb); } return true; @@ -1422,8 +1508,9 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_tx_data tx; + struct sk_buff *skb2; - if (ieee80211_tx_prepare(sdata, &tx, skb) == TX_DROP) + if (ieee80211_tx_prepare(sdata, &tx, NULL, skb) == TX_DROP) return false; info->band = band; @@ -1440,6 +1527,14 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, *sta = NULL; } + /* this function isn't suitable for fragmented data frames */ + skb2 = __skb_dequeue(&tx.skbs); + if (WARN_ON(skb2 != skb || !skb_queue_empty(&tx.skbs))) { + ieee80211_free_txskb(hw, skb2); + ieee80211_purge_tx_queue(hw, &tx.skbs); + return false; + } + return true; } EXPORT_SYMBOL(ieee80211_tx_prepare_skb); @@ -1448,7 +1543,8 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb); * Returns false if the frame couldn't be transmitted but was queued instead. */ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool txpending) + struct sta_info *sta, struct sk_buff *skb, + bool txpending) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_data tx; @@ -1464,7 +1560,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* initialises tx */ led_len = skb->len; - res_prepare = ieee80211_tx_prepare(sdata, &tx, skb); + res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb); if (unlikely(res_prepare == TX_DROP)) { ieee80211_free_txskb(&local->hw, skb); @@ -1520,7 +1616,8 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, return 0; } -void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -1555,7 +1652,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) } ieee80211_set_qos_hdr(sdata, skb); - ieee80211_tx(sdata, skb, false); + ieee80211_tx(sdata, sta, skb, false); } static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb) @@ -1776,7 +1873,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, goto fail_rcu; info->band = chandef->chan->band; - ieee80211_xmit(sdata, skb); + ieee80211_xmit(sdata, NULL, skb); rcu_read_unlock(); return NETDEV_TX_OK; @@ -1788,21 +1885,89 @@ fail: return NETDEV_TX_OK; /* meaning, we dealt with the skb */ } -/* - * Measure Tx frame arrival time for Tx latency statistics calculation - * A single Tx frame latency should be measured from when it is entering the - * Kernel until we receive Tx complete confirmation indication and the skb is - * freed. - */ -static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local, - struct sk_buff *skb) +static inline bool ieee80211_is_tdls_setup(struct sk_buff *skb) { - struct ieee80211_tx_latency_bin_ranges *tx_latency; + u16 ethertype = (skb->data[12] << 8) | skb->data[13]; - tx_latency = rcu_dereference(local->tx_latency); - if (!tx_latency) - return; - skb->tstamp = ktime_get(); + return ethertype == ETH_P_TDLS && + skb->len > 14 && + skb->data[14] == WLAN_TDLS_SNAP_RFTYPE; +} + +static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct sta_info **sta_out) +{ + struct sta_info *sta; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + sta = rcu_dereference(sdata->u.vlan.sta); + if (sta) { + *sta_out = sta; + return 0; + } else if (sdata->wdev.use_4addr) { + return -ENOLINK; + } + /* fall through */ + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_ADHOC: + if (is_multicast_ether_addr(skb->data)) { + *sta_out = ERR_PTR(-ENOENT); + return 0; + } + sta = sta_info_get_bss(sdata, skb->data); + break; + case NL80211_IFTYPE_WDS: + sta = sta_info_get(sdata, sdata->u.wds.remote_addr); + break; +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + /* determined much later */ + *sta_out = NULL; + return 0; +#endif + case NL80211_IFTYPE_STATION: + if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { + sta = sta_info_get(sdata, skb->data); + if (sta) { + bool tdls_peer, tdls_auth; + + tdls_peer = test_sta_flag(sta, + WLAN_STA_TDLS_PEER); + tdls_auth = test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH); + + if (tdls_peer && tdls_auth) { + *sta_out = sta; + return 0; + } + + /* + * TDLS link during setup - throw out frames to + * peer. Allow TDLS-setup frames to unauthorized + * peers for the special case of a link teardown + * after a TDLS sta is removed due to being + * unreachable. + */ + if (tdls_peer && !tdls_auth && + !ieee80211_is_tdls_setup(skb)) + return -EINVAL; + } + + } + + sta = sta_info_get(sdata, sdata->u.mgd.bssid); + if (!sta) + return -ENOLINK; + break; + default: + return -EINVAL; + } + + *sta_out = sta ?: ERR_PTR(-ENOENT); + return 0; } /** @@ -1824,7 +1989,8 @@ static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local, * Returns: the (possibly reallocated) skb or an ERR_PTR() code */ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, u32 info_flags) + struct sk_buff *skb, u32 info_flags, + struct sta_info *sta) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info; @@ -1837,9 +2003,8 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; - struct sta_info *sta = NULL; - bool wme_sta = false, authorized = false, tdls_auth = false; - bool tdls_peer = false, tdls_setup_frame = false; + bool wme_sta = false, authorized = false; + bool tdls_peer; bool multicast; u16 info_id = 0; struct ieee80211_chanctx_conf *chanctx_conf; @@ -1847,6 +2012,9 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, enum ieee80211_band band; int ret; + if (IS_ERR(sta)) + sta = NULL; + /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ ethertype = (skb->data[12] << 8) | skb->data[13]; @@ -1854,8 +2022,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - sta = rcu_dereference(sdata->u.vlan.sta); - if (sta) { + if (sdata->wdev.use_4addr) { fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr1, sta->sta.addr, ETH_ALEN); @@ -1874,7 +2041,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } band = chanctx_conf->def.chan->band; - if (sta) + if (sdata->wdev.use_4addr) break; /* fall through */ case NL80211_IFTYPE_AP: @@ -1978,38 +2145,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, break; #endif case NL80211_IFTYPE_STATION: - if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { - sta = sta_info_get(sdata, skb->data); - if (sta) { - authorized = test_sta_flag(sta, - WLAN_STA_AUTHORIZED); - wme_sta = sta->sta.wme; - tdls_peer = test_sta_flag(sta, - WLAN_STA_TDLS_PEER); - tdls_auth = test_sta_flag(sta, - WLAN_STA_TDLS_PEER_AUTH); - } - - if (tdls_peer) - tdls_setup_frame = - ethertype == ETH_P_TDLS && - skb->len > 14 && - skb->data[14] == WLAN_TDLS_SNAP_RFTYPE; - } - - /* - * TDLS link during setup - throw out frames to peer. We allow - * TDLS-setup frames to unauthorized peers for the special case - * of a link teardown after a TDLS sta is removed due to being - * unreachable. - */ - if (tdls_peer && !tdls_auth && !tdls_setup_frame) { - ret = -EINVAL; - goto free; - } + /* we already did checks when looking up the RA STA */ + tdls_peer = test_sta_flag(sta, WLAN_STA_TDLS_PEER); - /* send direct packets to authorized TDLS peers */ - if (tdls_peer && tdls_auth) { + if (tdls_peer) { /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -2071,26 +2210,19 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } - /* - * There's no need to try to look up the destination - * if it is a multicast address (which can only happen - * in AP mode) - */ multicast = is_multicast_ether_addr(hdr.addr1); - if (!multicast) { - sta = sta_info_get(sdata, hdr.addr1); - if (sta) { - authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); - wme_sta = sta->sta.wme; - } - } - /* For mesh, the use of the QoS header is mandatory */ - if (ieee80211_vif_is_mesh(&sdata->vif)) + /* sta is always NULL for mesh */ + if (sta) { + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + wme_sta = sta->sta.wme; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* For mesh, the use of the QoS header is mandatory */ wme_sta = true; + } - /* receiver and we are QoS enabled, use a QoS type frame */ - if (wme_sta && local->hw.queues >= IEEE80211_NUM_ACS) { + /* receiver does QoS (which also means we do) use it */ + if (wme_sta) { fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -2260,7 +2392,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, u32 info_flags) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + struct sta_info *sta; if (unlikely(skb->len < ETH_HLEN)) { kfree_skb(skb); @@ -2269,10 +2401,12 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); - /* Measure frame arrival for Tx latency statistics calculation */ - ieee80211_tx_latency_start_msrmnt(local, skb); + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { + kfree_skb(skb); + goto out; + } - skb = ieee80211_build_hdr(sdata, skb, info_flags); + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); if (IS_ERR(skb)) goto out; @@ -2280,7 +2414,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, dev->stats.tx_bytes += skb->len; dev->trans_start = jiffies; - ieee80211_xmit(sdata, skb); + ieee80211_xmit(sdata, sta, skb); out: rcu_read_unlock(); } @@ -2308,10 +2442,17 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, .local = sdata->local, .sdata = sdata, }; + struct sta_info *sta; rcu_read_lock(); - skb = ieee80211_build_hdr(sdata, skb, info_flags); + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { + kfree_skb(skb); + skb = ERR_PTR(-EINVAL); + goto out; + } + + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); if (IS_ERR(skb)) goto out; @@ -2369,7 +2510,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, return true; } info->band = chanctx_conf->def.chan->band; - result = ieee80211_tx(sdata, skb, true); + result = ieee80211_tx(sdata, NULL, skb, true); } else { struct sk_buff_head skbs; @@ -3107,7 +3248,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, if (sdata->vif.type == NL80211_IFTYPE_AP) sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); - if (!ieee80211_tx_prepare(sdata, &tx, skb)) + if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb)) break; dev_kfree_skb_any(skb); } @@ -3239,6 +3380,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, */ local_bh_disable(); IEEE80211_SKB_CB(skb)->band = band; - ieee80211_xmit(sdata, skb); + ieee80211_xmit(sdata, NULL, skb); local_bh_enable(); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 747bdcf72e92..79412f16b61d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -308,6 +308,11 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) for (ac = 0; ac < n_acs; ac++) { int ac_queue = sdata->vif.hw_queue[ac]; + if (local->ops->wake_tx_queue && + (atomic_read(&sdata->txqs_len[ac]) > + local->hw.txq_ac_max_pending)) + continue; + if (ac_queue == queue || (sdata->vif.cab_queue == queue && local->queue_stop_reasons[ac_queue] == 0 && @@ -625,13 +630,14 @@ void ieee80211_wake_vif_queues(struct ieee80211_local *local, reason, true); } -static void __iterate_active_interfaces(struct ieee80211_local *local, - u32 iter_flags, - void (*iterator)(void *data, u8 *mac, - struct ieee80211_vif *vif), - void *data) +static void __iterate_interfaces(struct ieee80211_local *local, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) { struct ieee80211_sub_if_data *sdata; + bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE; list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { @@ -645,9 +651,9 @@ static void __iterate_active_interfaces(struct ieee80211_local *local, break; } if (!(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL) && - !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) + active_only && !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) continue; - if (ieee80211_sdata_running(sdata)) + if (ieee80211_sdata_running(sdata) || !active_only) iterator(data, sdata->vif.addr, &sdata->vif); } @@ -656,12 +662,12 @@ static void __iterate_active_interfaces(struct ieee80211_local *local, lockdep_is_held(&local->iflist_mtx) || lockdep_rtnl_is_held()); if (sdata && - (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || + (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); } -void ieee80211_iterate_active_interfaces( +void ieee80211_iterate_interfaces( struct ieee80211_hw *hw, u32 iter_flags, void (*iterator)(void *data, u8 *mac, struct ieee80211_vif *vif), @@ -670,10 +676,10 @@ void ieee80211_iterate_active_interfaces( struct ieee80211_local *local = hw_to_local(hw); mutex_lock(&local->iflist_mtx); - __iterate_active_interfaces(local, iter_flags, iterator, data); + __iterate_interfaces(local, iter_flags, iterator, data); mutex_unlock(&local->iflist_mtx); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); +EXPORT_SYMBOL_GPL(ieee80211_iterate_interfaces); void ieee80211_iterate_active_interfaces_atomic( struct ieee80211_hw *hw, u32 iter_flags, @@ -684,7 +690,8 @@ void ieee80211_iterate_active_interfaces_atomic( struct ieee80211_local *local = hw_to_local(hw); rcu_read_lock(); - __iterate_active_interfaces(local, iter_flags, iterator, data); + __iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE, + iterator, data); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); @@ -699,7 +706,8 @@ void ieee80211_iterate_active_interfaces_rtnl( ASSERT_RTNL(); - __iterate_active_interfaces(local, iter_flags, iterator, data); + __iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE, + iterator, data); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); @@ -742,6 +750,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev) } EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); +struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (!ieee80211_sdata_running(sdata) || + !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) + return NULL; + + return &sdata->wdev; +} +EXPORT_SYMBOL_GPL(ieee80211_vif_to_wdev); + /* * Nothing should have been stuffed into the workqueue during * the suspend->resume cycle. Since we can't check each caller @@ -1811,8 +1831,25 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_MONITOR && - ieee80211_sdata_running(sdata)) + ieee80211_sdata_running(sdata)) { res = drv_add_interface(local, sdata); + if (WARN_ON(res)) + break; + } + } + + /* If adding any of the interfaces failed above, roll back and + * report failure. + */ + if (res) { + list_for_each_entry_continue_reverse(sdata, &local->interfaces, + list) + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_MONITOR && + ieee80211_sdata_running(sdata)) + drv_remove_interface(local, sdata); + ieee80211_handle_reconfig_failure(local); + return res; } /* add channel contexts */ @@ -2157,46 +2194,6 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } -static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) -{ - int i; - - for (i = 0; i < n_ids; i++) - if (ids[i] == id) - return true; - return false; -} - -size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, - const u8 *after_ric, int n_after_ric, - size_t offset) -{ - size_t pos = offset; - - while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { - if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { - pos += 2 + ies[pos + 1]; - - while (pos < ielen && - !ieee80211_id_in_list(after_ric, n_after_ric, - ies[pos])) - pos += 2 + ies[pos + 1]; - } else { - pos += 2 + ies[pos + 1]; - } - } - - return pos; -} - -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset) -{ - return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); -} -EXPORT_SYMBOL(ieee80211_ie_split); - size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) { size_t pos = offset; @@ -2344,6 +2341,41 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, return pos + sizeof(struct ieee80211_ht_operation); } +u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, + const struct cfg80211_chan_def *chandef) +{ + struct ieee80211_vht_operation *vht_oper; + + *pos++ = WLAN_EID_VHT_OPERATION; + *pos++ = sizeof(struct ieee80211_vht_operation); + vht_oper = (struct ieee80211_vht_operation *)pos; + vht_oper->center_freq_seg1_idx = ieee80211_frequency_to_channel( + chandef->center_freq1); + if (chandef->center_freq2) + vht_oper->center_freq_seg2_idx = + ieee80211_frequency_to_channel(chandef->center_freq2); + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_160: + vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80P80: + vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80P80MHZ; + break; + case NL80211_CHAN_WIDTH_80: + vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; + break; + default: + vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT; + break; + } + + /* don't require special VHT peer rates */ + vht_oper->basic_mcs_set = cpu_to_le16(0xffff); + + return pos + sizeof(struct ieee80211_vht_operation); +} + void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) @@ -2373,6 +2405,39 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, cfg80211_chandef_create(chandef, control_chan, channel_type); } +void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, + const struct ieee80211_vht_operation *oper, + struct cfg80211_chan_def *chandef) +{ + if (!oper) + return; + + chandef->chan = control_chan; + + switch (oper->chan_width) { + case IEEE80211_VHT_CHANWIDTH_USE_HT: + break; + case IEEE80211_VHT_CHANWIDTH_80MHZ: + chandef->width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_VHT_CHANWIDTH_160MHZ: + chandef->width = NL80211_CHAN_WIDTH_160; + break; + case IEEE80211_VHT_CHANWIDTH_80P80MHZ: + chandef->width = NL80211_CHAN_WIDTH_80P80; + break; + default: + break; + } + + chandef->center_freq1 = + ieee80211_channel_to_frequency(oper->center_freq_seg1_idx, + control_chan->band); + chandef->center_freq2 = + ieee80211_channel_to_frequency(oper->center_freq_seg2_idx, + control_chan->band); +} + int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates) @@ -3252,3 +3317,20 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo) return buf; } + +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct txq_info *txqi, int tid) +{ + skb_queue_head_init(&txqi->queue); + txqi->txq.vif = &sdata->vif; + + if (sta) { + txqi->txq.sta = &sta->sta; + sta->sta.txq[tid] = &txqi->txq; + txqi->txq.ac = ieee802_1d_to_ac[tid & 7]; + } else { + sdata->vif.txq = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_BE; + } +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 85f9596da07b..80694d55db74 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -129,10 +129,6 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; - /* don't support VHT for TDLS peers for now */ - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) - return; - /* * A VHT STA must support 40 MHz, but if we verify that here * then we break a few things - some APs (e.g. Netgear R6300v2 diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 75de6fac40d1..9d63d93c836e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -780,9 +780,8 @@ ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - const struct ieee80211_cipher_scheme *cs = key->sta->cipher_scheme; int hdrlen; - u8 *pos; + u8 *pos, iv_len = key->conf.iv_len; if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { @@ -790,14 +789,14 @@ ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, return TX_CONTINUE; } - if (unlikely(skb_headroom(skb) < cs->hdr_len && - pskb_expand_head(skb, cs->hdr_len, 0, GFP_ATOMIC))) + if (unlikely(skb_headroom(skb) < iv_len && + pskb_expand_head(skb, iv_len, 0, GFP_ATOMIC))) return TX_DROP; hdrlen = ieee80211_hdrlen(hdr->frame_control); - pos = skb_push(skb, cs->hdr_len); - memmove(pos, pos + cs->hdr_len, hdrlen); + pos = skb_push(skb, iv_len); + memmove(pos, pos + iv_len, hdrlen); return TX_CONTINUE; } @@ -1217,7 +1216,7 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) if (!info->control.hw_key) return TX_DROP; - if (tx->key->sta->cipher_scheme) { + if (tx->key->flags & KEY_FLAG_CIPHER_SCHEME) { res = ieee80211_crypto_cs_encrypt(tx, skb); if (res != TX_CONTINUE) return res; diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index 98180a9fff4a..a0533357b9ea 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -1,4 +1,4 @@ -#ifndef __MAC802154_DRVIER_OPS +#ifndef __MAC802154_DRIVER_OPS #define __MAC802154_DRIVER_OPS #include <linux/types.h> @@ -220,4 +220,4 @@ drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) return local->ops->set_promiscuous_mode(&local->hw, on); } -#endif /* __MAC802154_DRVIER_OPS */ +#endif /* __MAC802154_DRIVER_OPS */ diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 6fb6bdf9868c..38b56f9d9386 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -174,24 +174,16 @@ ieee802154_check_mac_settings(struct ieee802154_local *local, } if (local->hw.flags & IEEE802154_HW_AFILT) { - if (wpan_dev->pan_id != nwpan_dev->pan_id) - return -EBUSY; - - if (wpan_dev->short_addr != nwpan_dev->short_addr) - return -EBUSY; - - if (wpan_dev->extended_addr != nwpan_dev->extended_addr) + if (wpan_dev->pan_id != nwpan_dev->pan_id || + wpan_dev->short_addr != nwpan_dev->short_addr || + wpan_dev->extended_addr != nwpan_dev->extended_addr) return -EBUSY; } if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { - if (wpan_dev->min_be != nwpan_dev->min_be) - return -EBUSY; - - if (wpan_dev->max_be != nwpan_dev->max_be) - return -EBUSY; - - if (wpan_dev->csma_retries != nwpan_dev->csma_retries) + if (wpan_dev->min_be != nwpan_dev->min_be || + wpan_dev->max_be != nwpan_dev->max_be || + wpan_dev->csma_retries != nwpan_dev->csma_retries) return -EBUSY; } diff --git a/net/mac802154/util.c b/net/mac802154/util.c index 5fc979027919..150bf807e572 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -65,8 +65,19 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, { if (ifs_handling) { struct ieee802154_local *local = hw_to_local(hw); + u8 max_sifs_size; - if (skb->len > 18) + /* If transceiver sets CRC on his own we need to use lifs + * threshold len above 16 otherwise 18, because it's not + * part of skb->len. + */ + if (hw->flags & IEEE802154_HW_TX_OMIT_CKSUM) + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE - + IEEE802154_FCS_LEN; + else + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE; + + if (skb->len > max_sifs_size) hrtimer_start(&local->ifs_timer, ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC), HRTIMER_MODE_REL); diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 37421db88965..17bde799c854 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -1,9 +1,30 @@ # # MPLS configuration # + +menuconfig MPLS + bool "MultiProtocol Label Switching" + default n + ---help--- + MultiProtocol Label Switching routes packets through logical + circuits. Originally conceived as a way of routing packets at + hardware speeds (before hardware was capable of routing ipv4 packets), + MPLS remains a simple way of making tunnels. + + If you have not heard of MPLS you probably want to say N here. + +if MPLS + config NET_MPLS_GSO tristate "MPLS: GSO support" help This is helper module to allow segmentation of non-MPLS GSO packets that have had MPLS stack entries pushed onto them and thus become MPLS GSO packets. + +config MPLS_ROUTING + tristate "MPLS: routing support" + help + Add support for forwarding of mpls packets. + +endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 6dec088c2d0f..65bbe68c72e6 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -2,3 +2,6 @@ # Makefile for MPLS. # obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o +obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o + +mpls_router-y := af_mpls.o diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c new file mode 100644 index 000000000000..db8a2ea6d4de --- /dev/null +++ b/net/mpls/af_mpls.c @@ -0,0 +1,1023 @@ +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/sysctl.h> +#include <linux/net.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/ipv6.h> +#include <linux/mpls.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/arp.h> +#include <net/ip_fib.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include "internal.h" + +#define LABEL_NOT_SPECIFIED (1<<20) +#define MAX_NEW_LABELS 2 + +/* This maximum ha length copied from the definition of struct neighbour */ +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) + +struct mpls_route { /* next hop label forwarding entry */ + struct net_device __rcu *rt_dev; + struct rcu_head rt_rcu; + u32 rt_label[MAX_NEW_LABELS]; + u8 rt_protocol; /* routing protocol that set this entry */ + u8 rt_labels; + u8 rt_via_alen; + u8 rt_via_table; + u8 rt_via[0]; +}; + +static int zero = 0; +static int label_limit = (1 << 20) - 1; + +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags); + +static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) +{ + struct mpls_route *rt = NULL; + + if (index < net->mpls.platform_labels) { + struct mpls_route __rcu **platform_label = + rcu_dereference(net->mpls.platform_label); + rt = rcu_dereference(platform_label[index]); + } + return rt; +} + +static bool mpls_output_possible(const struct net_device *dev) +{ + return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); +} + +static unsigned int mpls_rt_header_size(const struct mpls_route *rt) +{ + /* The size of the layer 2.5 labels to be added for this route */ + return rt->rt_labels * sizeof(struct mpls_shim_hdr); +} + +static unsigned int mpls_dev_mtu(const struct net_device *dev) +{ + /* The amount of data the layer 2 frame can hold */ + return dev->mtu; +} + +static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + +static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, + struct mpls_entry_decoded dec) +{ + /* RFC4385 and RFC5586 encode other packets in mpls such that + * they don't conflict with the ip version number, making + * decoding by examining the ip version correct in everything + * except for the strangest cases. + * + * The strange cases if we choose to support them will require + * manual configuration. + */ + struct iphdr *hdr4; + bool success = true; + + /* The IPv4 code below accesses through the IPv4 header + * checksum, which is 12 bytes into the packet. + * The IPv6 code below accesses through the IPv6 hop limit + * which is 8 bytes into the packet. + * + * For all supported cases there should always be at least 12 + * bytes of packet data present. The IPv4 header is 20 bytes + * without options and the IPv6 header is always 40 bytes + * long. + */ + if (!pskb_may_pull(skb, 12)) + return false; + + /* Use ip_hdr to find the ip protocol version */ + hdr4 = ip_hdr(skb); + if (hdr4->version == 4) { + skb->protocol = htons(ETH_P_IP); + csum_replace2(&hdr4->check, + htons(hdr4->ttl << 8), + htons(dec.ttl << 8)); + hdr4->ttl = dec.ttl; + } + else if (hdr4->version == 6) { + struct ipv6hdr *hdr6 = ipv6_hdr(skb); + skb->protocol = htons(ETH_P_IPV6); + hdr6->hop_limit = dec.ttl; + } + else + /* version 0 and version 1 are used by pseudo wires */ + success = false; + return success; +} + +static int mpls_forward(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct net *net = dev_net(dev); + struct mpls_shim_hdr *hdr; + struct mpls_route *rt; + struct mpls_entry_decoded dec; + struct net_device *out_dev; + unsigned int hh_len; + unsigned int new_header_size; + unsigned int mtu; + int err; + + /* Careful this entire function runs inside of an rcu critical section */ + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + /* Read and decode the label */ + hdr = mpls_hdr(skb); + dec = mpls_entry_decode(hdr); + + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + + rt = mpls_route_input_rcu(net, dec.label); + if (!rt) + goto drop; + + /* Find the output device */ + out_dev = rcu_dereference(rt->rt_dev); + if (!mpls_output_possible(out_dev)) + goto drop; + + if (skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + /* Verify ttl is valid */ + if (dec.ttl <= 1) + goto drop; + dec.ttl -= 1; + + /* Verify the destination can hold the packet */ + new_header_size = mpls_rt_header_size(rt); + mtu = mpls_dev_mtu(out_dev); + if (mpls_pkt_too_big(skb, mtu - new_header_size)) + goto drop; + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + /* Ensure there is enough space for the headers in the skb */ + if (skb_cow(skb, hh_len + new_header_size)) + goto drop; + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + if (unlikely(!new_header_size && dec.bos)) { + /* Penultimate hop popping */ + if (!mpls_egress(rt, skb, dec)) + goto drop; + } else { + bool bos; + int i; + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + /* Push the new labels */ + hdr = mpls_hdr(skb); + bos = dec.bos; + for (i = rt->rt_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); + bos = false; + } + } + + err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + return 0; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type mpls_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .func = mpls_forward, +}; + +static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +struct mpls_route_config { + u32 rc_protocol; + u32 rc_ifindex; + u16 rc_via_table; + u16 rc_via_alen; + u8 rc_via[MAX_VIA_ALEN]; + u32 rc_label; + u32 rc_output_labels; + u32 rc_output_label[MAX_NEW_LABELS]; + u32 rc_nlflags; + struct nl_info rc_nlinfo; +}; + +static struct mpls_route *mpls_rt_alloc(size_t alen) +{ + struct mpls_route *rt; + + rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); + if (rt) + rt->rt_via_alen = alen; + return rt; +} + +static void mpls_rt_free(struct mpls_route *rt) +{ + if (rt) + kfree_rcu(rt, rt_rcu); +} + +static void mpls_notify_route(struct net *net, unsigned index, + struct mpls_route *old, struct mpls_route *new, + const struct nl_info *info) +{ + struct nlmsghdr *nlh = info ? info->nlh : NULL; + unsigned portid = info ? info->portid : 0; + int event = new ? RTM_NEWROUTE : RTM_DELROUTE; + struct mpls_route *rt = new ? new : old; + unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; + /* Ignore reserved labels for now */ + if (rt && (index >= 16)) + rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); +} + +static void mpls_route_update(struct net *net, unsigned index, + struct net_device *dev, struct mpls_route *new, + const struct nl_info *info) +{ + struct mpls_route __rcu **platform_label; + struct mpls_route *rt, *old = NULL; + + ASSERT_RTNL(); + + platform_label = rtnl_dereference(net->mpls.platform_label); + rt = rtnl_dereference(platform_label[index]); + if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { + rcu_assign_pointer(platform_label[index], new); + old = rt; + } + + mpls_notify_route(net, index, old, new, info); + + /* If we removed a route free it now */ + mpls_rt_free(old); +} + +static unsigned find_free_label(struct net *net) +{ + struct mpls_route __rcu **platform_label; + size_t platform_labels; + unsigned index; + + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 16; index < platform_labels; index++) { + if (!rtnl_dereference(platform_label[index])) + return index; + } + return LABEL_NOT_SPECIFIED; +} + +static int mpls_route_add(struct mpls_route_config *cfg) +{ + struct mpls_route __rcu **platform_label; + struct net *net = cfg->rc_nlinfo.nl_net; + struct net_device *dev = NULL; + struct mpls_route *rt, *old; + unsigned index; + int i; + int err = -EINVAL; + + index = cfg->rc_label; + + /* If a label was not specified during insert pick one */ + if ((index == LABEL_NOT_SPECIFIED) && + (cfg->rc_nlflags & NLM_F_CREATE)) { + index = find_free_label(net); + } + + /* The first 16 labels are reserved, and may not be set */ + if (index < 16) + goto errout; + + /* The full 20 bit range may not be supported. */ + if (index >= net->mpls.platform_labels) + goto errout; + + /* Ensure only a supported number of labels are present */ + if (cfg->rc_output_labels > MAX_NEW_LABELS) + goto errout; + + err = -ENODEV; + dev = dev_get_by_index(net, cfg->rc_ifindex); + if (!dev) + goto errout; + + /* For now just support ethernet devices */ + err = -EINVAL; + if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + goto errout; + + err = -EINVAL; + if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && + (dev->addr_len != cfg->rc_via_alen)) + goto errout; + + /* Append makes no sense with mpls */ + err = -EOPNOTSUPP; + if (cfg->rc_nlflags & NLM_F_APPEND) + goto errout; + + err = -EEXIST; + platform_label = rtnl_dereference(net->mpls.platform_label); + old = rtnl_dereference(platform_label[index]); + if ((cfg->rc_nlflags & NLM_F_EXCL) && old) + goto errout; + + err = -EEXIST; + if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) + goto errout; + + err = -ENOENT; + if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) + goto errout; + + err = -ENOMEM; + rt = mpls_rt_alloc(cfg->rc_via_alen); + if (!rt) + goto errout; + + rt->rt_labels = cfg->rc_output_labels; + for (i = 0; i < rt->rt_labels; i++) + rt->rt_label[i] = cfg->rc_output_label[i]; + rt->rt_protocol = cfg->rc_protocol; + RCU_INIT_POINTER(rt->rt_dev, dev); + rt->rt_via_table = cfg->rc_via_table; + memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); + + mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); + + dev_put(dev); + return 0; + +errout: + if (dev) + dev_put(dev); + return err; +} + +static int mpls_route_del(struct mpls_route_config *cfg) +{ + struct net *net = cfg->rc_nlinfo.nl_net; + unsigned index; + int err = -EINVAL; + + index = cfg->rc_label; + + /* The first 16 labels are reserved, and may not be removed */ + if (index < 16) + goto errout; + + /* The full 20 bit range may not be supported */ + if (index >= net->mpls.platform_labels) + goto errout; + + mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); + + err = 0; +errout: + return err; +} + +static void mpls_ifdown(struct net_device *dev) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) + continue; + if (rtnl_dereference(rt->rt_dev) != dev) + continue; + rt->rt_dev = NULL; + } +} + +static int mpls_dev_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch(event) { + case NETDEV_UNREGISTER: + mpls_ifdown(dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mpls_dev_notifier = { + .notifier_call = mpls_dev_notify, +}; + +static int nla_put_via(struct sk_buff *skb, + u8 table, const void *addr, int alen) +{ + static const int table_to_family[NEIGH_NR_TABLES + 1] = { + AF_INET, AF_INET6, AF_DECnet, AF_PACKET, + }; + struct nlattr *nla; + struct rtvia *via; + int family = AF_UNSPEC; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + return -EMSGSIZE; + + if (table <= NEIGH_NR_TABLES) + family = table_to_family[table]; + + via = nla_data(nla); + via->rtvia_family = family; + memcpy(via->rtvia_addr, addr, alen); + return 0; +} + +int nla_put_labels(struct sk_buff *skb, int attrtype, + u8 labels, const u32 label[]) +{ + struct nlattr *nla; + struct mpls_shim_hdr *nla_label; + bool bos; + int i; + nla = nla_reserve(skb, attrtype, labels*4); + if (!nla) + return -EMSGSIZE; + + nla_label = nla_data(nla); + bos = true; + for (i = labels - 1; i >= 0; i--) { + nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); + bos = false; + } + + return 0; +} + +int nla_get_labels(const struct nlattr *nla, + u32 max_labels, u32 *labels, u32 label[]) +{ + unsigned len = nla_len(nla); + unsigned nla_labels; + struct mpls_shim_hdr *nla_label; + bool bos; + int i; + + /* len needs to be an even multiple of 4 (the label size) */ + if (len & 3) + return -EINVAL; + + /* Limit the number of new labels allowed */ + nla_labels = len/4; + if (nla_labels > max_labels) + return -EINVAL; + + nla_label = nla_data(nla); + bos = true; + for (i = nla_labels - 1; i >= 0; i--, bos = false) { + struct mpls_entry_decoded dec; + dec = mpls_entry_decode(nla_label + i); + + /* Ensure the bottom of stack flag is properly set + * and ttl and tc are both clear. + */ + if ((dec.bos != bos) || dec.ttl || dec.tc) + return -EINVAL; + + label[i] = dec.label; + } + *labels = nla_labels; + return 0; +} + +static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct mpls_route_config *cfg) +{ + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int index; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); + + if (rtm->rtm_family != AF_MPLS) + goto errout; + if (rtm->rtm_dst_len != 20) + goto errout; + if (rtm->rtm_src_len != 0) + goto errout; + if (rtm->rtm_tos != 0) + goto errout; + if (rtm->rtm_table != RT_TABLE_MAIN) + goto errout; + /* Any value is acceptable for rtm_protocol */ + + /* As mpls uses destination specific addresses + * (or source specific address in the case of multicast) + * all addresses have universal scope. + */ + if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) + goto errout; + if (rtm->rtm_type != RTN_UNICAST) + goto errout; + if (rtm->rtm_flags != 0) + goto errout; + + cfg->rc_label = LABEL_NOT_SPECIFIED; + cfg->rc_protocol = rtm->rtm_protocol; + cfg->rc_nlflags = nlh->nlmsg_flags; + cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; + cfg->rc_nlinfo.nlh = nlh; + cfg->rc_nlinfo.nl_net = sock_net(skb->sk); + + for (index = 0; index <= RTA_MAX; index++) { + struct nlattr *nla = tb[index]; + if (!nla) + continue; + + switch(index) { + case RTA_OIF: + cfg->rc_ifindex = nla_get_u32(nla); + break; + case RTA_NEWDST: + if (nla_get_labels(nla, MAX_NEW_LABELS, + &cfg->rc_output_labels, + cfg->rc_output_label)) + goto errout; + break; + case RTA_DST: + { + u32 label_count; + if (nla_get_labels(nla, 1, &label_count, + &cfg->rc_label)) + goto errout; + + /* The first 16 labels are reserved, and may not be set */ + if (cfg->rc_label < 16) + goto errout; + + break; + } + case RTA_VIA: + { + struct rtvia *via = nla_data(nla); + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) + goto errout; + cfg->rc_via_alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (cfg->rc_via_alen > MAX_VIA_ALEN) + goto errout; + + /* Validate the address family */ + switch(via->rtvia_family) { + case AF_PACKET: + cfg->rc_via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + cfg->rc_via_table = NEIGH_ARP_TABLE; + if (cfg->rc_via_alen != 4) + goto errout; + break; + case AF_INET6: + cfg->rc_via_table = NEIGH_ND_TABLE; + if (cfg->rc_via_alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); + break; + } + default: + /* Unsupported attribute */ + goto errout; + } + } + + err = 0; +errout: + return err; +} + +static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_del(&cfg); +} + + +static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_add(&cfg); +} + +static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, + u32 label, struct mpls_route *rt, int flags) +{ + struct net_device *dev; + struct nlmsghdr *nlh; + struct rtmsg *rtm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = AF_MPLS; + rtm->rtm_dst_len = 20; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = rt->rt_protocol; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + + if (rt->rt_labels && + nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) + goto nla_put_failure; + if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) + goto nla_put_failure; + dev = rtnl_dereference(rt->rt_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) + goto nla_put_failure; + if (nla_put_labels(skb, RTA_DST, 1, &label)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mpls_route __rcu **platform_label; + size_t platform_labels; + unsigned int index; + + ASSERT_RTNL(); + + index = cb->args[0]; + if (index < 16) + index = 16; + + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (; index < platform_labels; index++) { + struct mpls_route *rt; + rt = rtnl_dereference(platform_label[index]); + if (!rt) + continue; + + if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + index, rt, NLM_F_MULTI) < 0) + break; + } + cb->args[0] = index; + + return skb->len; +} + +static inline size_t lfib_nlmsg_size(struct mpls_route *rt) +{ + size_t payload = + NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ + + nla_total_size(4); /* RTA_DST */ + if (rt->rt_labels) /* RTA_NEWDST */ + payload += nla_total_size(rt->rt_labels * 4); + if (rt->rt_dev) /* RTA_OIF */ + payload += nla_total_size(4); + return payload; +} + +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags) +{ + struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); + if (err < 0) { + /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); + + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); +} + +static int resize_platform_label_table(struct net *net, size_t limit) +{ + size_t size = sizeof(struct mpls_route *) * limit; + size_t old_limit; + size_t cp_size; + struct mpls_route __rcu **labels = NULL, **old; + struct mpls_route *rt0 = NULL, *rt2 = NULL; + unsigned index; + + if (size) { + labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!labels) + labels = vzalloc(size); + + if (!labels) + goto nolabels; + } + + /* In case the predefined labels need to be populated */ + if (limit > LABEL_IPV4_EXPLICIT_NULL) { + struct net_device *lo = net->loopback_dev; + rt0 = mpls_rt_alloc(lo->addr_len); + if (!rt0) + goto nort0; + RCU_INIT_POINTER(rt0->rt_dev, lo); + rt0->rt_protocol = RTPROT_KERNEL; + rt0->rt_via_table = NEIGH_LINK_TABLE; + memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); + } + if (limit > LABEL_IPV6_EXPLICIT_NULL) { + struct net_device *lo = net->loopback_dev; + rt2 = mpls_rt_alloc(lo->addr_len); + if (!rt2) + goto nort2; + RCU_INIT_POINTER(rt2->rt_dev, lo); + rt2->rt_protocol = RTPROT_KERNEL; + rt2->rt_via_table = NEIGH_LINK_TABLE; + memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); + } + + rtnl_lock(); + /* Remember the original table */ + old = rtnl_dereference(net->mpls.platform_label); + old_limit = net->mpls.platform_labels; + + /* Free any labels beyond the new table */ + for (index = limit; index < old_limit; index++) + mpls_route_update(net, index, NULL, NULL, NULL); + + /* Copy over the old labels */ + cp_size = size; + if (old_limit < limit) + cp_size = old_limit * sizeof(struct mpls_route *); + + memcpy(labels, old, cp_size); + + /* If needed set the predefined labels */ + if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && + (limit > LABEL_IPV6_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); + rt2 = NULL; + } + + if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && + (limit > LABEL_IPV4_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); + rt0 = NULL; + } + + /* Update the global pointers */ + net->mpls.platform_labels = limit; + rcu_assign_pointer(net->mpls.platform_label, labels); + + rtnl_unlock(); + + mpls_rt_free(rt2); + mpls_rt_free(rt0); + + if (old) { + synchronize_rcu(); + kvfree(old); + } + return 0; + +nort2: + mpls_rt_free(rt0); +nort0: + kvfree(labels); +nolabels: + return -ENOMEM; +} + +static int mpls_platform_labels(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = table->data; + int platform_labels = net->mpls.platform_labels; + int ret; + struct ctl_table tmp = { + .procname = table->procname, + .data = &platform_labels, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = &zero, + .extra2 = &label_limit, + }; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = resize_platform_label_table(net, platform_labels); + + return ret; +} + +static struct ctl_table mpls_table[] = { + { + .procname = "platform_labels", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = mpls_platform_labels, + }, + { } +}; + +static int mpls_net_init(struct net *net) +{ + struct ctl_table *table; + + net->mpls.platform_labels = 0; + net->mpls.platform_label = NULL; + + table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + table[0].data = net; + net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); + if (net->mpls.ctl == NULL) + return -ENOMEM; + + return 0; +} + +static void mpls_net_exit(struct net *net) +{ + struct mpls_route __rcu **platform_label; + size_t platform_labels; + struct ctl_table *table; + unsigned int index; + + table = net->mpls.ctl->ctl_table_arg; + unregister_net_sysctl_table(net->mpls.ctl); + kfree(table); + + /* An rcu grace period has passed since there was a device in + * the network namespace (and thus the last in flight packet) + * left this network namespace. This is because + * unregister_netdevice_many and netdev_run_todo has completed + * for each network device that was in this network namespace. + * + * As such no additional rcu synchronization is necessary when + * freeing the platform_label table. + */ + rtnl_lock(); + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 0; index < platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + RCU_INIT_POINTER(platform_label[index], NULL); + mpls_rt_free(rt); + } + rtnl_unlock(); + + kvfree(platform_label); +} + +static struct pernet_operations mpls_net_ops = { + .init = mpls_net_init, + .exit = mpls_net_exit, +}; + +static int __init mpls_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); + + err = register_pernet_subsys(&mpls_net_ops); + if (err) + goto out; + + err = register_netdevice_notifier(&mpls_dev_notifier); + if (err) + goto out_unregister_pernet; + + dev_add_pack(&mpls_packet_type); + + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); + err = 0; +out: + return err; + +out_unregister_pernet: + unregister_pernet_subsys(&mpls_net_ops); + goto out; +} +module_init(mpls_init); + +static void __exit mpls_exit(void) +{ + rtnl_unregister_all(PF_MPLS); + dev_remove_pack(&mpls_packet_type); + unregister_netdevice_notifier(&mpls_dev_notifier); + unregister_pernet_subsys(&mpls_net_ops); +} +module_exit(mpls_exit); + +MODULE_DESCRIPTION("MultiProtocol Label Switching"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_NETPROTO(PF_MPLS); diff --git a/net/mpls/internal.h b/net/mpls/internal.h new file mode 100644 index 000000000000..fb6de92052c4 --- /dev/null +++ b/net/mpls/internal.h @@ -0,0 +1,59 @@ +#ifndef MPLS_INTERNAL_H +#define MPLS_INTERNAL_H + +#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ +#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ +#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ +#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ +#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ +#define LABEL_GAL 13 /* RFC5586 */ +#define LABEL_OAM_ALERT 14 /* RFC3429 */ +#define LABEL_EXTENSION 15 /* RFC7274 */ + + +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + +struct mpls_entry_decoded { + u32 label; + u8 ttl; + u8 tc; + u8 bos; +}; + +struct sk_buff; + +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + +static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) +{ + struct mpls_shim_hdr result; + result.label_stack_entry = + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | + (tc << MPLS_LS_TC_SHIFT) | + (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | + (ttl << MPLS_LS_TTL_SHIFT)); + return result; +} + +static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) +{ + struct mpls_entry_decoded result; + unsigned entry = be32_to_cpu(hdr->label_stack_entry); + + result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; + + return result; +} + +int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); +int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]); + +#endif /* MPLS_INTERNAL_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index b02660fa9eb0..f70e34a68f70 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -438,8 +438,10 @@ config NF_TABLES To compile it as a module, choose M here. +if NF_TABLES + config NF_TABLES_INET - depends on NF_TABLES && IPV6 + depends on IPV6 select NF_TABLES_IPV4 select NF_TABLES_IPV6 tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" @@ -447,21 +449,18 @@ config NF_TABLES_INET This option enables support for a mixed IPv4/IPv6 "inet" table. config NFT_EXTHDR - depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" help This option adds the "exthdr" expression that you can use to match IPv6 extension headers. config NFT_META - depends on NF_TABLES tristate "Netfilter nf_tables meta module" help This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. config NFT_CT - depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" help @@ -469,42 +468,36 @@ config NFT_CT connection tracking information such as the flow state. config NFT_RBTREE - depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. config NFT_HASH - depends on NF_TABLES tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way mappings between matchings and actions. config NFT_COUNTER - depends on NF_TABLES tristate "Netfilter nf_tables counter module" help This option adds the "counter" expression that you can use to include packet and byte counters in a rule. config NFT_LOG - depends on NF_TABLES tristate "Netfilter nf_tables log module" help This option adds the "log" expression that you can use to log packets matching some criteria. config NFT_LIMIT - depends on NF_TABLES tristate "Netfilter nf_tables limit module" help This option adds the "limit" expression that you can use to ratelimit rule matchings. config NFT_MASQ - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables masquerade support" @@ -513,7 +506,6 @@ config NFT_MASQ to perform NAT in the masquerade flavour. config NFT_REDIR - depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables redirect support" @@ -522,7 +514,6 @@ config NFT_REDIR to perform NAT in the redirect flavour. config NFT_NAT - depends on NF_TABLES depends on NF_CONNTRACK select NF_NAT tristate "Netfilter nf_tables nat module" @@ -531,8 +522,6 @@ config NFT_NAT typical Network Address Translation (NAT) packet transformations. config NFT_QUEUE - depends on NF_TABLES - depends on NETFILTER_XTABLES depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" help @@ -540,7 +529,6 @@ config NFT_QUEUE infrastructure (also known as NFQUEUE) from nftables. config NFT_REJECT - depends on NF_TABLES default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" help @@ -554,7 +542,6 @@ config NFT_REJECT_INET tristate config NFT_COMPAT - depends on NF_TABLES depends on NETFILTER_XTABLES tristate "Netfilter x_tables over nf_tables module" help @@ -562,6 +549,8 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +endif # NF_TABLES + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -951,7 +940,7 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' - depends on NETFILTER_ADVANCED + default m if NETFILTER_ADVANCED=n ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 89f73a9e9874..a87d8b8ec730 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fea9ef566427..e6163017c42d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -120,12 +120,8 @@ EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - unsigned int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), - int hook_thresh) + struct nf_hook_state *state, + struct nf_hook_ops **elemp) { unsigned int verdict; @@ -134,19 +130,19 @@ unsigned int nf_iterate(struct list_head *head, * function because of risk of continuing from deleted element. */ list_for_each_entry_continue_rcu((*elemp), head, list) { - if (hook_thresh > (*elemp)->priority) + if (state->thresh > (*elemp)->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, hook); + (*elemp)->hook, state->hook); continue; } #endif @@ -161,11 +157,7 @@ repeat: /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - int hook_thresh) +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { struct nf_hook_ops *elem; unsigned int verdict; @@ -174,10 +166,11 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list); + elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], + struct nf_hook_ops, list); next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, - outdev, &elem, okfn, hook_thresh); + verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, + &elem); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -186,8 +179,8 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_QBITS); + int err = nf_queue(skb, elem, state, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 758b002130d9..380ef5148ea1 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -19,6 +19,7 @@ #include <net/netlink.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> @@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next, #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static const char *get_physindev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physindev(skb); + + return dev ? dev->name : NULL; +} + +static const char *get_phyoutdev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physoutdev(skb); + + return dev ? dev->name : NULL; +} +#endif + static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->dir ? par->dir->name : NULL) -#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); - if (!nf_bridge) + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); e.physdev = 1; #else e.iface = NULL; @@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - if (!nf_bridge) + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + e.physdev = 1; #else e.iface = NULL; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b87ca32efa0b..5d2b806a862e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct ip_vs_cpu_stats *s; s = this_cpu_ptr(cp->dest->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); } @@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb, } } +static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, + int conn_reuse_mode) +{ + /* Controlled (FTP DATA or persistence)? */ + if (cp->control) + return false; + + switch (cp->protocol) { + case IPPROTO_TCP: + return (cp->state == IP_VS_TCP_S_TIME_WAIT) || + ((conn_reuse_mode & 2) && + (cp->state == IP_VS_TCP_S_FIN_WAIT) && + (cp->flags & IP_VS_CONN_F_NOOUTPUT)); + case IPPROTO_SCTP: + return cp->state == IP_VS_SCTP_S_CLOSED; + default: + return false; + } +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1246,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) */ static unsigned int ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET); } @@ -1258,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET); } @@ -1273,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET6); } @@ -1285,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET6); } @@ -1585,6 +1607,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_conn *cp; int ret, pkts; struct netns_ipvs *ipvs; + int conn_reuse_mode; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1653,10 +1676,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ cp = pp->conn_in_get(af, skb, &iph, 0); - if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest && - unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs && - is_new_conn(skb, &iph)) { - ip_vs_conn_expire_now(cp); + conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); + if (conn_reuse_mode && !iph.fragoffs && + is_new_conn(skb, &iph) && cp && + ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && + unlikely(!atomic_read(&cp->dest->weight))) || + unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { + if (!atomic_read(&cp->n_control)) + ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); cp = NULL; } @@ -1738,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ static unsigned int ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET); } @@ -1751,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET); } @@ -1765,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET6); } @@ -1778,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET6); } @@ -1798,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { int r; struct net *net; @@ -1820,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 static unsigned int ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { int r; struct net *net; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ed99448671c3..49532672f66d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net) } static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) { -#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c spin_lock_bh(&src->lock); @@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) } static void +ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) +{ + dst->conns = (u32)src->conns; + dst->inpkts = (u32)src->inpkts; + dst->outpkts = (u32)src->outpkts; + dst->inbytes = src->inbytes; + dst->outbytes = src->outbytes; + dst->cps = (u32)src->cps; + dst->inpps = (u32)src->inpps; + dst->outpps = (u32)src->outpps; + dst->inbps = (u32)src->inbps; + dst->outbps = (u32)src->outbps; +} + +static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); /* get current counters as zero point, rates are zeroed */ -#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c +#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c IP_VS_ZERO_STATS_COUNTER(conns); IP_VS_ZERO_STATS_COUNTER(inpkts); @@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "conn_reuse_mode", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats_user show; + struct ip_vs_kstats show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, - show.inpkts, show.outpkts, - (unsigned long long) show.inbytes, - (unsigned long long) show.outbytes); - -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)show.conns, + (unsigned long long)show.inpkts, + (unsigned long long)show.outpkts, + (unsigned long long)show.inbytes, + (unsigned long long)show.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, "%8X %8X %8X %16X %16X\n", - show.cps, show.inpps, show.outpps, - show.inbps, show.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", + (unsigned long long)show.cps, + (unsigned long long)show.inpps, + (unsigned long long)show.outpps, + (unsigned long long)show.inbps, + (unsigned long long)show.outbps); return 0; } @@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) struct net *net = seq_file_single_net(seq); struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; - struct ip_vs_stats_user rates; + struct ip_vs_kstats kstats; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) for_each_possible_cpu(i) { struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; do { start = u64_stats_fetch_begin_irq(&u->syncp); - inbytes = u->ustats.inbytes; - outbytes = u->ustats.outbytes; + conns = u->cnt.conns; + inpkts = u->cnt.inpkts; + outpkts = u->cnt.outpkts; + inbytes = u->cnt.inbytes; + outbytes = u->cnt.outbytes; } while (u64_stats_fetch_retry_irq(&u->syncp, start)); - seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)inbytes, - (__u64)outbytes); + seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", + i, (u64)conns, (u64)inpkts, + (u64)outpkts, (u64)inbytes, + (u64)outbytes); } - spin_lock_bh(&tot_stats->lock); - - seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", - tot_stats->ustats.conns, tot_stats->ustats.inpkts, - tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); - - ip_vs_read_estimator(&rates, tot_stats); + ip_vs_copy_stats(&kstats, tot_stats); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)kstats.conns, + (unsigned long long)kstats.inpkts, + (unsigned long long)kstats.outpkts, + (unsigned long long)kstats.inbytes, + (unsigned long long)kstats.outbytes); -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ +/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, " %8X %8X %8X %16X %16X\n", - rates.cps, - rates.inpps, - rates.outpps, - rates.inbps, - rates.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", + kstats.cps, + kstats.inpps, + kstats.outpps, + kstats.inbps, + kstats.outbps); return 0; } @@ -2400,6 +2426,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; + struct ip_vs_kstats kstats; sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; @@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; dst->num_dests = src->num_dests; - ip_vs_copy_stats(&dst->stats, &src->stats); + ip_vs_copy_stats(&kstats, &src->stats); + ip_vs_export_stats_user(&dst->stats, &kstats); } static inline int @@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, int count = 0; struct ip_vs_dest *dest; struct ip_vs_dest_entry entry; + struct ip_vs_kstats kstats; memset(&entry, 0, sizeof(entry)); list_for_each_entry(dest, &svc->destinations, n_list) { @@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, entry.activeconns = atomic_read(&dest->activeconns); entry.inactconns = atomic_read(&dest->inactconns); entry.persistconns = atomic_read(&dest->persistconns); - ip_vs_copy_stats(&entry.stats, &dest->stats); + ip_vs_copy_stats(&kstats, &dest->stats); + ip_vs_export_stats_user(&entry.stats, &kstats); if (copy_to_user(&uptr->entrytable[count], &entry, sizeof(entry))) { ret = -EFAULT; @@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, - struct ip_vs_stats *stats) + struct ip_vs_kstats *kstats) { - struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) return -EMSGSIZE; - ip_vs_copy_stats(&ustats, stats); - - if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || - nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) + if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) + goto nla_put_failure; + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, + struct ip_vs_kstats *kstats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + + if (!nl_stats) + return -EMSGSIZE; + + if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps)) goto nla_put_failure; nla_nest_end(skb, nl_stats); @@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; + struct ip_vs_kstats kstats; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + ip_vs_copy_stats(&kstats, &svc->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_service); @@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) { struct nlattr *nl_dest; + struct ip_vs_kstats kstats; nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); if (!nl_dest) @@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) atomic_read(&dest->persistconns)) || nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + ip_vs_copy_stats(&kstats, &dest->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_dest); @@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; tbl[idx++].data = &ipvs->sysctl_backup_only; + ipvs->sysctl_conn_reuse_mode = 1; + tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 1425e9a924c4..ef0eb0a8d552 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -45,17 +45,19 @@ NOTES. - * The stored value for average bps is scaled by 2^5, so that maximal - rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10. + * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. - * A lot code is taken from net/sched/estimator.c + * Netlink users can see 64-bit values but sockopt users are restricted + to 32-bit values for conns, packets, bps, cps and pps. + + * A lot of code is taken from net/core/gen_estimator.c */ /* * Make a summary from each cpu */ -static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, +static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, struct ip_vs_cpu_stats __percpu *stats) { int i; @@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, for_each_possible_cpu(i) { struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; + if (add) { - sum->conns += s->ustats.conns; - sum->inpkts += s->ustats.inpkts; - sum->outpkts += s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - inbytes = s->ustats.inbytes; - outbytes = s->ustats.outbytes; + conns = s->cnt.conns; + inpkts = s->cnt.inpkts; + outpkts = s->cnt.outpkts; + inbytes = s->cnt.inbytes; + outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); + sum->conns += conns; + sum->inpkts += inpkts; + sum->outpkts += outpkts; sum->inbytes += inbytes; sum->outbytes += outbytes; } else { add = true; - sum->conns = s->ustats.conns; - sum->inpkts = s->ustats.inpkts; - sum->outpkts = s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - sum->inbytes = s->ustats.inbytes; - sum->outbytes = s->ustats.outbytes; + sum->conns = s->cnt.conns; + sum->inpkts = s->cnt.inpkts; + sum->outpkts = s->cnt.outpkts; + sum->inbytes = s->cnt.inbytes; + sum->outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); } } @@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; struct ip_vs_stats *s; - u32 n_conns; - u32 n_inpkts, n_outpkts; - u64 n_inbytes, n_outbytes; - u32 rate; + u64 rate; struct net *net = (struct net *)arg; struct netns_ipvs *ipvs; @@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg) s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); - n_conns = s->ustats.conns; - n_inpkts = s->ustats.inpkts; - n_outpkts = s->ustats.outpkts; - n_inbytes = s->ustats.inbytes; - n_outbytes = s->ustats.outbytes; + ip_vs_read_cpu_stats(&s->kstats, s->cpustats); /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns) << 9; - e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps) >> 2; - - rate = (n_inpkts - e->last_inpkts) << 9; - e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps) >> 2; - - rate = (n_outpkts - e->last_outpkts) << 9; - e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps) >> 2; - - rate = (n_inbytes - e->last_inbytes) << 4; - e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps) >> 2; - - rate = (n_outbytes - e->last_outbytes) << 4; - e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps) >> 2; + rate = (s->kstats.conns - e->last_conns) << 9; + e->last_conns = s->kstats.conns; + e->cps += ((s64)rate - (s64)e->cps) >> 2; + + rate = (s->kstats.inpkts - e->last_inpkts) << 9; + e->last_inpkts = s->kstats.inpkts; + e->inpps += ((s64)rate - (s64)e->inpps) >> 2; + + rate = (s->kstats.outpkts - e->last_outpkts) << 9; + e->last_outpkts = s->kstats.outpkts; + e->outpps += ((s64)rate - (s64)e->outpps) >> 2; + + /* scaled by 2^5, but divided 2 seconds */ + rate = (s->kstats.inbytes - e->last_inbytes) << 4; + e->last_inbytes = s->kstats.inbytes; + e->inbps += ((s64)rate - (s64)e->inbps) >> 2; + + rate = (s->kstats.outbytes - e->last_outbytes) << 4; + e->last_outbytes = s->kstats.outbytes; + e->outbps += ((s64)rate - (s64)e->outbps) >> 2; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); @@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - struct ip_vs_stats_user *u = &stats->ustats; + struct ip_vs_kstats *k = &stats->kstats; /* reset counters, caller must hold the stats->lock lock */ - est->last_inbytes = u->inbytes; - est->last_outbytes = u->outbytes; - est->last_conns = u->conns; - est->last_inpkts = u->inpkts; - est->last_outpkts = u->outpkts; + est->last_inbytes = k->inbytes; + est->last_outbytes = k->outbytes; + est->last_conns = k->conns; + est->last_inpkts = k->inpkts; + est->last_outpkts = k->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; @@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) } /* Get decoded rates */ -void ip_vs_read_estimator(struct ip_vs_stats_user *dst, - struct ip_vs_stats *stats) +void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) { struct ip_vs_estimator *e = &stats->est; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d93ceeb3ef04..19b9cce6c210 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, struct ip_vs_conn *cp; struct netns_ipvs *ipvs = net_ipvs(net); - if (!(flags & IP_VS_CONN_F_TEMPLATE)) + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); - else + if (cp && ((cp->dport != dport) || + !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) { + if (!(flags & IP_VS_CONN_F_INACTIVE)) { + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + cp = NULL; + } else { + /* This is the expiration message for the + * connection that was already replaced, so we + * just ignore it. + */ + __ip_vs_conn_put(cp); + kfree(param->pe_data); + return; + } + } + } else { cp = ip_vs_ct_in_get(param); + } if (cp) { /* Free pe_data */ @@ -1388,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; + rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); + rtnl_unlock(); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3aedbda7658a..19986ec5f21a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct sock *sk = skb->sk; struct rtable *ort = skb_rtable(skb); - if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + if (!skb->dev && sk && sk_fullsock(sk)) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } @@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_update_conntrack(skb, cp, 1); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; @@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; @@ -924,7 +924,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = ttl; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index a4b5e2a435ac..45da11afa785 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -47,9 +47,11 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; counter = acct->counter; - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&counter[dir].packets), - (unsigned long long)atomic64_read(&counter[dir].bytes)); + seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); + + return 0; }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index b8b95f4027ca..57a26cc90c9f 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct ts_state ts; struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; unsigned int dataoff, start, stop, off, i; @@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb, return NF_ACCEPT; } - memset(&ts, 0, sizeof(ts)); start = skb_find_text(skb, dataoff, skb->len, - search[SEARCH_CONNECT].ts, &ts); + search[SEARCH_CONNECT].ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; - memset(&ts, 0, sizeof(ts)); stop = skb_find_text(skb, start, skb->len, - search[SEARCH_NEWLINE].ts, &ts); + search[SEARCH_NEWLINE].ts); if (stop == UINT_MAX) goto out; stop += start; for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { - memset(&ts, 0, sizeof(ts)); - off = skb_find_text(skb, start, stop, search[i].ts, &ts); + off = skb_find_text(skb, start, stop, search[i].ts); if (off == UINT_MAX) continue; off += start + search[i].len; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 91a1837acd0e..7a17070c5dab 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -561,7 +561,9 @@ static int exp_seq_show(struct seq_file *s, void *v) helper->expect_policy[expect->class].name); } - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 61a3c927e63c..ea7f36784b3d 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,16 +14,11 @@ /* core.c */ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - unsigned int hook, const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), int hook_thresh); + struct nf_hook_state *state, struct nf_hook_ops **elemp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, - unsigned int hook, struct net_device *indev, - struct net_device *outdev, int (*okfn)(struct sk_buff *), - unsigned int queuenum); +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, + struct nf_hook_state *state, unsigned int queuenum); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a2233e77cf39..a5aa5967b8e1 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -17,6 +17,7 @@ #include <net/route.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> @@ -133,7 +134,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) { - if (!sk || sk->sk_state == TCP_TIME_WAIT) + if (!sk || !sk_fullsock(sk)) return; read_lock_bh(&sk->sk_callback_lock); @@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *physindev; const struct net_device *physoutdev; - physindev = skb->nf_bridge->physindev; + physindev = nf_bridge_get_physindev(skb); if (physindev && in != physindev) nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = nf_bridge_get_physoutdev(skb); if (physoutdev && out != physoutdev) nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4c8b68e5fa16..2e88032cd5ad 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -10,6 +10,7 @@ #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/protocol.h> @@ -47,19 +48,25 @@ EXPORT_SYMBOL(nf_unregister_queue_handler); void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { + struct nf_hook_state *state = &entry->state; + /* Release those devices we held, or Alexey will kill me. */ - if (entry->indev) - dev_put(entry->indev); - if (entry->outdev) - dev_put(entry->outdev); + if (state->in) + dev_put(state->in); + if (state->out) + dev_put(state->out); + if (state->sk) + sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; + struct net_device *physdev; - if (nf_bridge->physindev) - dev_put(nf_bridge->physindev); - if (nf_bridge->physoutdev) - dev_put(nf_bridge->physoutdev); + physdev = nf_bridge_get_physindev(entry->skb); + if (physdev) + dev_put(physdev); + physdev = nf_bridge_get_physoutdev(entry->skb); + if (physdev) + dev_put(physdev); } #endif /* Drop reference to owner of hook which queued us. */ @@ -70,22 +77,25 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); /* Bump dev refs so they don't vanish while packet is out */ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { + struct nf_hook_state *state = &entry->state; + if (!try_module_get(entry->elem->owner)) return false; - if (entry->indev) - dev_hold(entry->indev); - if (entry->outdev) - dev_hold(entry->outdev); + if (state->in) + dev_hold(state->in); + if (state->out) + dev_hold(state->out); + if (state->sk) + sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; struct net_device *physdev; - physdev = nf_bridge->physindev; + physdev = nf_bridge_get_physindev(entry->skb); if (physdev) dev_hold(physdev); - physdev = nf_bridge->physoutdev; + physdev = nf_bridge_get_physoutdev(entry->skb); if (physdev) dev_hold(physdev); } @@ -100,12 +110,9 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) + struct nf_hook_ops *elem, + struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -121,7 +128,7 @@ int nf_queue(struct sk_buff *skb, goto err_unlock; } - afinfo = nf_get_afinfo(pf); + afinfo = nf_get_afinfo(state->pf); if (!afinfo) goto err_unlock; @@ -134,11 +141,7 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, .elem = elem, - .pf = pf, - .hook = hook, - .indev = indev, - .outdev = outdev, - .okfn = okfn, + .state = *state, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -184,30 +187,29 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } if (verdict == NF_ACCEPT) { - afinfo = nf_get_afinfo(entry->pf); + afinfo = nf_get_afinfo(entry->state.pf); if (!afinfo || afinfo->reroute(skb, entry) < 0) verdict = NF_DROP; } + entry->state.thresh = INT_MIN; + if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook], - skb, entry->hook, - entry->indev, entry->outdev, &elem, - entry->okfn, INT_MIN); + verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + skb, &entry->state, &elem); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: local_bh_disable(); - entry->okfn(skb); + entry->state.okfn(entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_QBITS); + err = nf_queue(skb, elem, &entry->state, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ac1a9528dbf2..78af83bc9c8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx) static inline bool nft_rule_is_active(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; + return (rule->genmask & nft_genmask_cur(net)) == 0; } static inline int nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << gencursor_next(net))) == 0; + return (rule->genmask & nft_genmask_next(net)) == 0; } static inline void nft_rule_activate_next(struct net *net, struct nft_rule *rule) { /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); + rule->genmask = nft_genmask_cur(net); } static inline void nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) { - rule->genmask = (1 << gencursor_next(net)); + rule->genmask = nft_genmask_next(net); } static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask &= ~(1 << gencursor_next(net)); + rule->genmask &= ~nft_genmask_next(net); } static int @@ -401,7 +396,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { - [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; @@ -686,26 +682,28 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (!try_module_get(afi->owner)) return -EAFNOSUPPORT; - table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); - if (table == NULL) { - module_put(afi->owner); - return -ENOMEM; - } + err = -ENOMEM; + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (table == NULL) + goto err1; - nla_strlcpy(table->name, name, nla_len(name)); + nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); - if (err < 0) { - kfree(table); - module_put(afi->owner); - return err; - } + if (err < 0) + goto err2; + list_add_tail_rcu(&table->list, &afi->tables); return 0; +err2: + kfree(table); +err1: + module_put(afi->owner); + return err; } static int nft_flush_table(struct nft_ctx *ctx) @@ -1351,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, rcu_assign_pointer(basechain->stats, stats); } + write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -1378,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); - chain->net = net; chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -1547,6 +1545,23 @@ nla_put_failure: return -1; }; +int nft_expr_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_expr *expr) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr); + if (!nest) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + struct nft_expr_info { const struct nft_expr_ops *ops; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; @@ -1624,6 +1639,39 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx, module_put(expr->ops->type->owner); } +struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, + const struct nlattr *nla) +{ + struct nft_expr_info info; + struct nft_expr *expr; + int err; + + err = nf_tables_expr_parse(ctx, nla, &info); + if (err < 0) + goto err1; + + err = -ENOMEM; + expr = kzalloc(info.ops->size, GFP_KERNEL); + if (expr == NULL) + goto err2; + + err = nf_tables_newexpr(ctx, &info, expr); + if (err < 0) + goto err2; + + return expr; +err2: + module_put(info.ops->type->owner); +err1: + return ERR_PTR(err); +} + +void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) +{ + nf_tables_expr_destroy(ctx, expr); + kfree(expr); +} + /* * Rules */ @@ -1705,12 +1753,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { - struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); - if (elem == NULL) + if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0) goto nla_put_failure; - if (nf_tables_fill_expr_info(skb, expr) < 0) - goto nla_put_failure; - nla_nest_end(skb, elem); } nla_nest_end(skb, list); @@ -2161,7 +2205,7 @@ nft_select_set_ops(const struct nlattr * const nla[], features = 0; if (nla[NFTA_SET_FLAGS] != NULL) { features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP; + features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; } bops = NULL; @@ -2218,6 +2262,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_POLICY] = { .type = NLA_U32 }, [NFTA_SET_DESC] = { .type = NLA_NESTED }, [NFTA_SET_ID] = { .type = NLA_U32 }, + [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2368,6 +2414,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (set->timeout && + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) + goto nla_put_failure; + if (set->gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + goto nla_put_failure; + if (set->policy != NFT_SET_POL_PERFORMANCE) { if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) goto nla_put_failure; @@ -2580,7 +2633,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, dtype, flags, policy; + u64 timeout; + u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; int err; @@ -2600,15 +2654,20 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; flags = 0; if (nla[NFTA_SET_FLAGS] != NULL) { flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | - NFT_SET_INTERVAL | NFT_SET_MAP)) + NFT_SET_INTERVAL | NFT_SET_TIMEOUT | + NFT_SET_MAP | NFT_SET_EVAL)) return -EINVAL; + /* Only one of both operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == + (NFT_SET_MAP | NFT_SET_EVAL)) + return -EOPNOTSUPP; } dtype = 0; @@ -2625,14 +2684,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (desc.dlen == 0 || - desc.dlen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; } else - desc.dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_verdict); } else if (flags & NFT_SET_MAP) return -EINVAL; + timeout = 0; + if (nla[NFTA_SET_TIMEOUT] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); + } + gc_int = 0; + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); @@ -2692,6 +2763,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; INIT_LIST_HEAD(&set->bindings); + write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -2700,6 +2772,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, set->flags = flags; set->size = desc.size; set->policy = policy; + set->timeout = timeout; + set->gc_int = gc_int; err = ops->init(set, &desc, nla); if (err < 0) @@ -2768,12 +2842,14 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, - set->dtype == NFT_DATA_VERDICT ? - NFT_DATA_VERDICT : NFT_DATA_VALUE); + return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext), + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -2785,12 +2861,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) return -EBUSY; - if (set->flags & NFT_SET_MAP) { + if (binding->flags & NFT_SET_MAP) { /* If the set is already bound to the same chain all * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (i->chain == binding->chain) + if (binding->flags & NFT_SET_MAP && + i->chain == binding->chain) goto bind; } @@ -2824,6 +2901,35 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_destroy(ctx, set); } +const struct nft_set_ext_type nft_set_ext_types[] = { + [NFT_SET_EXT_KEY] = { + .align = __alignof__(u32), + }, + [NFT_SET_EXT_DATA] = { + .align = __alignof__(u32), + }, + [NFT_SET_EXT_EXPR] = { + .align = __alignof__(struct nft_expr), + }, + [NFT_SET_EXT_FLAGS] = { + .len = sizeof(u8), + .align = __alignof__(u8), + }, + [NFT_SET_EXT_TIMEOUT] = { + .len = sizeof(u64), + .align = __alignof__(u64), + }, + [NFT_SET_EXT_EXPIRATION] = { + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, + [NFT_SET_EXT_USERDATA] = { + .len = sizeof(struct nft_userdata), + .align = __alignof__(struct nft_userdata), + }, +}; +EXPORT_SYMBOL_GPL(nft_set_ext_types); + /* * Set elements */ @@ -2832,6 +2938,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -2870,6 +2979,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -2877,20 +2987,52 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, - set->klen) < 0) + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && - nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen) < 0) goto nla_put_failure; - if (elem->flags != 0) - if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) && + nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, + htonl(*nft_set_ext_flags(ext)))) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + cpu_to_be64(*nft_set_ext_timeout(ext)))) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + unsigned long expires, now = jiffies; + + expires = *nft_set_ext_expiration(ext); + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + + if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, + cpu_to_be64(jiffies_to_msecs(expires)))) goto nla_put_failure; + } + + if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { + struct nft_userdata *udata; + + udata = nft_set_ext_userdata(ext); + if (nla_put(skb, NFTA_SET_ELEM_USERDATA, + udata->len + 1, udata->data)) + goto nla_put_failure; + } nla_nest_end(skb, nest); return 0; @@ -3111,20 +3253,65 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *data, + u64 timeout, gfp_t gfp) +{ + struct nft_set_ext *ext; + void *elem; + + elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); + if (elem == NULL) + return NULL; + + ext = nft_set_elem_ext(set, elem); + nft_set_ext_init(ext, tmpl); + + memcpy(nft_set_ext_key(ext), key, set->klen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + memcpy(nft_set_ext_data(ext), data, set->dlen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) + *nft_set_ext_expiration(ext) = + jiffies + msecs_to_jiffies(timeout); + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) + *nft_set_ext_timeout(ext) = timeout; + + return elem; +} + +void nft_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + + kfree(elem); +} +EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_userdata *udata; + struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u64 timeout; + u32 flags; + u8 ulen; int err; - if (set->size && set->nelems == set->size) - return -ENFILE; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy); if (err < 0) @@ -3133,38 +3320,59 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) return -EINVAL; - elem.flags = 0; + nft_set_ext_prepare(&tmpl); + + flags = 0; if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && - !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; if (nla[NFTA_SET_ELEM_DATA] != NULL && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } - err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + timeout = 0; + if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); + } else if (set->flags & NFT_SET_TIMEOUT) { + timeout = set->timeout; + } + + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; err = -EINVAL; if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - err = -EEXIST; - if (set->ops->get(set, &elem) == 0) - goto err2; + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len); + if (timeout > 0) { + nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (timeout != set->timeout) + nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + } if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, sizeof(data), &d2, + nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3180,32 +3388,68 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, .chain = (struct nft_chain *)binding->chain, }; - err = nft_validate_data_load(&bind_ctx, dreg, - &elem.data, d2.type); + if (!(binding->flags & NFT_SET_MAP)) + continue; + + err = nft_validate_register_store(&bind_ctx, dreg, + &data, + d2.type, d2.len); if (err < 0) goto err3; } + + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); + } + + /* The full maximum length of userdata can exceed the maximum + * offset value (U8_MAX) for following extensions, therefor it + * must be the last extension added. + */ + ulen = 0; + if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { + ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); + if (ulen > 0) + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + } + + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data, + timeout, GFP_KERNEL); + if (elem.priv == NULL) + goto err3; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + if (ulen > 0) { + udata = nft_set_ext_userdata(ext); + udata->len = ulen - 1; + nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) - goto err3; + goto err4; + ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; err = set->ops->insert(set, &elem); if (err < 0) - goto err4; + goto err5; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err4: +err5: kfree(trans); +err4: + kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&elem.data, d2.type); + nft_data_uninit(&data, d2.type); err2: - nft_data_uninit(&elem.key, d1.type); + nft_data_uninit(&elem.key.val, d1.type); err1: return err; } @@ -3241,11 +3485,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + if (set->size && + !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) + return -ENFILE; + err = nft_add_set_elem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + atomic_dec(&set->nelems); break; - - set->nelems++; + } } return err; } @@ -3268,7 +3516,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) goto err1; - err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; @@ -3276,21 +3525,26 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; - err = set->ops->get(set, &elem); - if (err < 0) - goto err2; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err2; } + elem.priv = set->ops->deactivate(set, &elem); + if (elem.priv == NULL) { + err = -ENOENT; + goto err3; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; + +err3: + kfree(trans); err2: - nft_data_uninit(&elem.key, desc.type); + nft_data_uninit(&elem.key.val, desc.type); err1: return err; } @@ -3322,11 +3576,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, if (err < 0) break; - set->nelems--; + set->ndeact++; } return err; } +void nft_set_gc_batch_release(struct rcu_head *rcu) +{ + struct nft_set_gc_batch *gcb; + unsigned int i; + + gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); + for (i = 0; i < gcb->head.cnt; i++) + nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + kfree(gcb); +} +EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); + +struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, + gfp_t gfp) +{ + struct nft_set_gc_batch *gcb; + + gcb = kzalloc(sizeof(*gcb), gfp); + if (gcb == NULL) + return gcb; + gcb->head.set = set; + return gcb; +} +EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -3526,6 +3805,10 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_DELSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3540,7 +3823,7 @@ static int nf_tables_commit(struct sk_buff *skb) while (++net->nft.base_seq == 0); /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); + net->nft.gencursor = nft_gencursor_next(net); /* Make sure all packets have left the previous generation before * purging old rules. @@ -3611,24 +3894,23 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_NEWSETELEM, 0); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); + atomic_dec(&te->set->nelems); + te->set->ndeact--; break; } } @@ -3660,6 +3942,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3728,18 +4014,17 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); + atomic_dec(&te->set->nelems); break; case NFT_MSG_DELSETELEM: - nft_trans_elem_set(trans)->nelems++; + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + te->set->ndeact--; + nft_trans_destroy(trans); break; } @@ -3814,13 +4099,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { - if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_data *data; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; - switch (elem->data.verdict) { + data = nft_set_ext_data(ext); + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, elem->data.chain); + return nf_tables_check_loops(ctx, data->verdict.chain); default: return 0; } @@ -3853,10 +4143,11 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, if (data == NULL) continue; - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - err = nf_tables_check_loops(ctx, data->chain); + err = nf_tables_check_loops(ctx, + data->verdict.chain); if (err < 0) return err; default: @@ -3871,7 +4162,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, continue; list_for_each_entry(binding, &set->bindings, list) { - if (binding->chain != chain) + if (!(binding->flags & NFT_SET_MAP) || + binding->chain != chain) continue; iter.skip = 0; @@ -3889,85 +4181,129 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, } /** - * nft_validate_input_register - validate an expressions' input register + * nft_parse_register - parse a register value from a netlink attribute * - * @reg: the register number + * @attr: netlink attribute * - * Validate that the input register is one of the general purpose - * registers. + * Parse and translate a register value from a netlink attribute. + * Registers used to be 128 bit wide, these register numbers will be + * mapped to the corresponding 32 bit register numbers. */ -int nft_validate_input_register(enum nft_registers reg) +unsigned int nft_parse_register(const struct nlattr *attr) { - if (reg <= NFT_REG_VERDICT) - return -EINVAL; - if (reg > NFT_REG_MAX) - return -ERANGE; - return 0; + unsigned int reg; + + reg = ntohl(nla_get_be32(attr)); + switch (reg) { + case NFT_REG_VERDICT...NFT_REG_4: + return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + default: + return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + } } -EXPORT_SYMBOL_GPL(nft_validate_input_register); +EXPORT_SYMBOL_GPL(nft_parse_register); /** - * nft_validate_output_register - validate an expressions' output register + * nft_dump_register - dump a register value to a netlink attribute + * + * @skb: socket buffer + * @attr: attribute number + * @reg: register number + * + * Construct a netlink attribute containing the register number. For + * compatibility reasons, register numbers being a multiple of 4 are + * translated to the corresponding 128 bit register numbers. + */ +int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg) +{ + if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0) + reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE); + else + reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00; + + return nla_put_be32(skb, attr, htonl(reg)); +} +EXPORT_SYMBOL_GPL(nft_dump_register); + +/** + * nft_validate_register_load - validate a load from a register * * @reg: the register number + * @len: the length of the data * - * Validate that the output register is one of the general purpose - * registers or the verdict register. + * Validate that the input register is one of the general purpose + * registers and that the length of the load is within the bounds. */ -int nft_validate_output_register(enum nft_registers reg) +int nft_validate_register_load(enum nft_registers reg, unsigned int len) { - if (reg < NFT_REG_VERDICT) + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; - if (reg > NFT_REG_MAX) + if (len == 0) + return -EINVAL; + if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data)) return -ERANGE; + return 0; } -EXPORT_SYMBOL_GPL(nft_validate_output_register); +EXPORT_SYMBOL_GPL(nft_validate_register_load); /** - * nft_validate_data_load - validate an expressions' data load + * nft_validate_register_store - validate an expressions' register store * * @ctx: context of the expression performing the load * @reg: the destination register number * @data: the data to load * @type: the data type + * @len: the length of the data * * Validate that a data load uses the appropriate data type for - * the destination register. A value of NULL for the data means - * that its runtime gathered data, which is always of type - * NFT_DATA_VALUE. + * the destination register and the length is within the bounds. + * A value of NULL for the data means that its runtime gathered + * data. */ -int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type) +int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) { int err; switch (reg) { case NFT_REG_VERDICT: - if (data == NULL || type != NFT_DATA_VERDICT) + if (type != NFT_DATA_VERDICT) return -EINVAL; - if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { - err = nf_tables_check_loops(ctx, data->chain); + if (data != NULL && + (data->verdict.code == NFT_GOTO || + data->verdict.code == NFT_JUMP)) { + err = nf_tables_check_loops(ctx, data->verdict.chain); if (err < 0) return err; - if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 > + data->verdict.chain->level) { if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) return -EMLINK; - data->chain->level = ctx->chain->level + 1; + data->verdict.chain->level = ctx->chain->level + 1; } } return 0; default: + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) + return -EINVAL; + if (len == 0) + return -EINVAL; + if (reg * NFT_REG32_SIZE + len > + FIELD_SIZEOF(struct nft_regs, data)) + return -ERANGE; + if (data != NULL && type != NFT_DATA_VALUE) return -EINVAL; return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_data_load); +EXPORT_SYMBOL_GPL(nft_validate_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, @@ -3988,11 +4324,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; - data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); - switch (data->verdict) { + switch (data->verdict.code) { default: - switch (data->verdict & NF_VERDICT_MASK) { + switch (data->verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -4018,7 +4354,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return -EOPNOTSUPP; chain->use++; - data->chain = chain; + data->verdict.chain = chain; desc->len = sizeof(data); break; } @@ -4029,10 +4365,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->chain->use--; + data->verdict.chain->use--; break; } } @@ -4045,13 +4381,14 @@ static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) goto nla_put_failure; - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, + data->verdict.chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4061,7 +4398,8 @@ nla_put_failure: return -1; } -static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, +static int nft_value_init(const struct nft_ctx *ctx, + struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla) { unsigned int len; @@ -4069,10 +4407,10 @@ static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, len = nla_len(nla); if (len == 0) return -EINVAL; - if (len > sizeof(data->data)) + if (len > size) return -EOVERFLOW; - nla_memcpy(data->data, nla, sizeof(data->data)); + nla_memcpy(data->data, nla, len); desc->type = NFT_DATA_VALUE; desc->len = len; return 0; @@ -4085,8 +4423,7 @@ static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, } static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { - [NFTA_DATA_VALUE] = { .type = NLA_BINARY, - .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VALUE] = { .type = NLA_BINARY }, [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, }; @@ -4095,6 +4432,7 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * * @ctx: context of the expression using the data * @data: destination struct nft_data + * @size: maximum data length * @desc: data description * @nla: netlink attribute containing data * @@ -4104,7 +4442,8 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ -int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, +int nft_data_init(const struct nft_ctx *ctx, + struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla) { struct nlattr *tb[NFTA_DATA_MAX + 1]; @@ -4115,7 +4454,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, return err; if (tb[NFTA_DATA_VALUE]) - return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + return nft_value_init(ctx, data, size, desc, + tb[NFTA_DATA_VALUE]); if (tb[NFTA_DATA_VERDICT] && ctx != NULL) return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); return -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2d298dccb6dd..f153b07073af 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> @@ -21,24 +22,66 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = LOGLEVEL_WARNING, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static void __nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + if (unlikely(pkt->skb->nf_trace)) + __nft_trace_packet(pkt, chain, rulenum, type); +} + static void nft_cmp_fast_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1]) + struct nft_regs *regs) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); u32 mask = nft_cmp_fast_mask(priv->len); - if ((data[priv->sreg].data[0] & mask) == priv->data) + if ((regs->data[priv->sreg] & mask) == priv->data) return; - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static bool nft_payload_fast_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) @@ -51,12 +94,13 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) return false; + *dest = 0; if (priv->len == 2) - *(u16 *)dest->data = *(u16 *)ptr; + *(u16 *)dest = *(u16 *)ptr; else if (priv->len == 4) - *(u32 *)dest->data = *(u32 *)ptr; + *(u32 *)dest = *(u32 *)ptr; else - *(u8 *)dest->data = *(u8 *)ptr; + *(u8 *)dest = *(u8 *)ptr; return true; } @@ -66,62 +110,25 @@ struct nft_jumpstack { int rulenum; }; -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", -}; - -static struct nf_loginfo trace_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 4, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) -{ - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - - nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); -} - unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; + const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); const struct nft_rule *rule; const struct nft_expr *expr, *last; - struct nft_data data[NFT_REG_MAX + 1]; + struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; int rulenum; - /* - * Cache cursor to avoid problems in case that the cursor is updated - * while traversing the ruleset. - */ - unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); + unsigned int gencursor = nft_genmask_cur(net); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs.verdict.code = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ @@ -132,62 +139,52 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) - nft_cmp_fast_eval(expr, data); + nft_cmp_fast_eval(expr, ®s); else if (expr->ops != &nft_payload_fast_ops || - !nft_payload_fast_eval(expr, data, pkt)) - expr->ops->eval(expr, data, pkt); + !nft_payload_fast_eval(expr, ®s, pkt)) + expr->ops->eval(expr, ®s, pkt); - if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + if (regs.verdict.code != NFT_CONTINUE) break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (regs.verdict.code) { case NFT_BREAK: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; } - switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { + switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - - return data[NFT_REG_VERDICT].verdict; + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + return regs.verdict.code; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (regs.verdict.code) { case NFT_JUMP: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - chain = data[NFT_REG_VERDICT].chain; - goto do_chain; + /* fall through */ case NFT_GOTO: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - chain = data[NFT_REG_VERDICT].chain; + chain = regs.verdict.chain; goto do_chain; - case NFT_RETURN: - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); - break; case NFT_CONTINUE: - if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + rulenum++; + /* fall through */ + case NFT_RETURN: + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); @@ -201,8 +198,7 @@ next_rule: goto next_rule; } - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); @@ -244,8 +240,14 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err6; + err = nft_dynset_module_init(); + if (err < 0) + goto err7; + return 0; +err7: + nft_payload_module_exit(); err6: nft_byteorder_module_exit(); err5: @@ -262,6 +264,7 @@ err1: void nf_tables_core_module_exit(void) { + nft_dynset_module_exit(); nft_payload_module_exit(); nft_byteorder_module_exit(); nft_bitwise_module_exit(); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11d85b3813f2..3ad91266c821 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -23,6 +23,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_log.h> @@ -62,7 +63,7 @@ struct nfulnl_instance { struct timer_list timer; struct net *net; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_portid; /* PORTID of the peer process */ + u32 peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -151,7 +152,7 @@ static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * instance_create(struct net *net, u_int16_t group_num, - int portid, struct user_namespace *user_ns) + u32 portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; struct nfnl_log_net *log = nfnl_log_pernet(net); @@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { + struct net_device *physindev; + /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; - if (skb->nf_bridge && skb->nf_bridge->physindev && + + physindev = nf_bridge_get_physindev(skb); + if (physindev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, - htonl(skb->nf_bridge->physindev->ifindex))) + htonl(physindev->ifindex))) goto nla_put_failure; } #endif @@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; } else { + struct net_device *physoutdev; + /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; - if (skb->nf_bridge && skb->nf_bridge->physoutdev && + + physoutdev = nf_bridge_get_physoutdev(skb); + if (physoutdev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, - htonl(skb->nf_bridge->physoutdev->ifindex))) + htonl(physoutdev->ifindex))) goto nla_put_failure; } #endif @@ -539,7 +548,7 @@ __build_packet_message(struct nfnl_log_net *log, /* UID */ sk = skb->sk; - if (sk && sk->sk_state != TCP_TIME_WAIT) { + if (sk && sk_fullsock(sk)) { read_lock_bh(&sk->sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { struct file *file = sk->sk_socket->file; @@ -998,11 +1007,13 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfulnl_instance *inst = v; - return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", - inst->group_num, - inst->peer_portid, inst->qlen, - inst->copy_mode, inst->copy_range, - inst->flushtimeout, atomic_read(&inst->use)); + seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n", + inst->group_num, + inst->peer_portid, inst->qlen, + inst->copy_mode, inst->copy_range, + inst->flushtimeout, atomic_read(&inst->use)); + + return 0; } static const struct seq_operations nful_seq_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 0db8515e76da..0b98c7420239 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -25,6 +25,7 @@ #include <linux/proc_fs.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> @@ -54,7 +55,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_portid; + u32 peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -109,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, - int portid) +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) { struct nfqnl_instance *inst; unsigned int h; @@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) { const struct cred *cred; - if (sk->sk_state == TCP_TIME_WAIT) + if (!sk_fullsock(sk)) return 0; read_lock_bh(&sk->sk_callback_lock); @@ -314,13 +314,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); - if (entry->hook <= NF_INET_FORWARD || - (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + if (entry->state.hook <= NF_INET_FORWARD || + (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) csum_verify = !skb_csum_unnecessary(entskb); else csum_verify = false; - outdev = entry->outdev; + outdev = entry->state.out; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { case NFQNL_COPY_META: @@ -368,23 +368,23 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, return NULL; } nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = entry->pf; + nfmsg->nfgen_family = entry->state.pf; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); pmsg = nla_data(nla); pmsg->hw_protocol = entskb->protocol; - pmsg->hook = entry->hook; + pmsg->hook = entry->state.hook; *packet_id_ptr = &pmsg->packet_id; - indev = entry->indev; + indev = entry->state.in; if (indev) { #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; #else - if (entry->pf == PF_BRIDGE) { + if (entry->state.pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ @@ -396,14 +396,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { + int physinif; + /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physindev && + + physinif = nf_bridge_get_physinif(entskb); + if (physinif && nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(entskb->nf_bridge->physindev->ifindex))) + htonl(physinif))) goto nla_put_failure; } #endif @@ -414,7 +418,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; #else - if (entry->pf == PF_BRIDGE) { + if (entry->state.pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ @@ -426,14 +430,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; } else { + int physoutif; + /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && + + physoutif = nf_bridge_get_physoutif(entskb); + if (physoutif && nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(entskb->nf_bridge->physoutdev->ifindex))) + htonl(physoutif))) goto nla_put_failure; } #endif @@ -633,8 +641,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct nfqnl_instance *queue; struct sk_buff *skb, *segs; int err = -ENOBUFS; - struct net *net = dev_net(entry->indev ? - entry->indev : entry->outdev); + struct net *net = dev_net(entry->state.in ? + entry->state.in : entry->state.out); struct nfnl_queue_net *q = nfnl_queue_pernet(net); /* rcu_read_lock()ed by nf_hook_slow() */ @@ -647,7 +655,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) skb = entry->skb; - switch (entry->pf) { + switch (entry->state.pf) { case NFPROTO_IPV4: skb->protocol = htons(ETH_P_IP); break; @@ -757,19 +765,20 @@ nfqnl_set_mode(struct nfqnl_instance *queue, static int dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { - if (entry->indev) - if (entry->indev->ifindex == ifindex) + if (entry->state.in) + if (entry->state.in->ifindex == ifindex) return 1; - if (entry->outdev) - if (entry->outdev->ifindex == ifindex) + if (entry->state.out) + if (entry->state.out->ifindex == ifindex) return 1; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - if (entry->skb->nf_bridge->physindev && - entry->skb->nf_bridge->physindev->ifindex == ifindex) - return 1; - if (entry->skb->nf_bridge->physoutdev && - entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + int physinif, physoutif; + + physinif = nf_bridge_get_physinif(entry->skb); + physoutif = nf_bridge_get_physoutif(entry->skb); + + if (physinif == ifindex || physoutif == ifindex) return 1; } #endif @@ -860,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { }; static struct nfqnl_instance * -verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid) +verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid) { struct nfqnl_instance *queue; @@ -1242,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfqnl_instance *inst = v; - seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n", inst->queue_num, inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 4fb6ee2c1106..d71cc18fa35d 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -26,18 +26,16 @@ struct nft_bitwise { }; static void nft_bitwise_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); - const struct nft_data *src = &data[priv->sreg]; - struct nft_data *dst = &data[priv->dreg]; + const u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; unsigned int i; - for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { - dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ - priv->xor.data[i]; - } + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) + dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i]; } static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { @@ -63,28 +61,27 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); - err = nft_validate_output_register(priv->dreg); + priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); if (err < 0) return err; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); - err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1, + tb[NFTA_BITWISE_MASK]); if (err < 0) return err; if (d1.len != priv->len) return -EINVAL; - err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, + tb[NFTA_BITWISE_XOR]); if (err < 0) return err; if (d2.len != priv->len) @@ -97,9 +94,9 @@ static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_bitwise *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) goto nla_put_failure; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index c39ed8d29df1..fde5145f2e36 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -26,16 +26,17 @@ struct nft_byteorder { }; static void nft_byteorder_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); - struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; union { u32 u32; u16 u16; } *s, *d; unsigned int i; - s = (void *)src->data; - d = (void *)dst->data; + s = (void *)src; + d = (void *)dst; switch (priv->size) { case 4: @@ -87,19 +88,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, tb[NFTA_BYTEORDER_OP] == NULL) return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); switch (priv->op) { case NFT_BYTEORDER_NTOH: @@ -109,10 +97,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); - if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); switch (priv->size) { case 2: @@ -122,16 +106,24 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - return 0; + priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_validate_register_load(priv->sreg, priv->len); + if (err < 0) + return err; + + priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_byteorder *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e2b3f51c81f1..e25b35d70e4d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -25,13 +25,13 @@ struct nft_cmp_expr { }; static void nft_cmp_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; - d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + d = memcmp(®s->data[priv->sreg], &priv->data, priv->len); switch (priv->op) { case NFT_CMP_EQ: if (d != 0) @@ -59,7 +59,7 @@ static void nft_cmp_eval(const struct nft_expr *expr, return; mismatch: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { @@ -75,12 +75,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct nft_data_desc desc; int err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); - - err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, + tb[NFTA_CMP_DATA]); BUG_ON(err < 0); + priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); + err = nft_validate_register_load(priv->sreg, desc.len); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; } @@ -89,7 +93,7 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) goto nla_put_failure; @@ -122,13 +126,18 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, u32 mask; int err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - - err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, sizeof(data), &desc, + tb[NFTA_CMP_DATA]); BUG_ON(err < 0); - desc.len *= BITS_PER_BYTE; + priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); + err = nft_validate_register_load(priv->sreg, desc.len); + if (err < 0) + return err; + + desc.len *= BITS_PER_BYTE; mask = nft_cmp_fast_mask(desc.len); + priv->data = data.data[0] & mask; priv->len = desc.len; return 0; @@ -139,7 +148,7 @@ static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data data; - if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) goto nla_put_failure; @@ -167,7 +176,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; - enum nft_registers sreg; enum nft_cmp_ops op; int err; @@ -176,11 +184,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) tb[NFTA_CMP_DATA] == NULL) return ERR_PTR(-EINVAL); - sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - err = nft_validate_input_register(sreg); - if (err < 0) - return ERR_PTR(err); - op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); switch (op) { case NFT_CMP_EQ: @@ -194,7 +197,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return ERR_PTR(-EINVAL); } - err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, sizeof(data), &desc, + tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 65f3e2b6be44..7f29cfc76349 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -20,6 +20,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_arp/arp_tables.h> #include <net/netfilter/nf_tables.h> static int nft_compat_chain_validate_dependency(const char *tablename, @@ -42,6 +43,7 @@ union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; struct ebt_entry ebt; + struct arpt_entry arp; }; static inline void @@ -53,7 +55,7 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) } static void nft_target_eval_xt(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -70,16 +72,16 @@ static void nft_target_eval_xt(const struct nft_expr *expr, switch (ret) { case XT_CONTINUE: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs->verdict.code = NFT_CONTINUE; break; default: - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; break; } } static void nft_target_eval_bridge(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -96,19 +98,19 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, switch (ret) { case EBT_ACCEPT: - data[NFT_REG_VERDICT].verdict = NF_ACCEPT; + regs->verdict.code = NF_ACCEPT; break; case EBT_DROP: - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; break; case EBT_CONTINUE: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs->verdict.code = NFT_CONTINUE; break; case EBT_RETURN: - data[NFT_REG_VERDICT].verdict = NFT_RETURN; + regs->verdict.code = NFT_RETURN; break; default: - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; break; } } @@ -143,6 +145,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; + case NFPROTO_ARP: + break; } par->entryinfo = entry; par->target = target; @@ -300,7 +304,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, } static void nft_match_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -313,16 +317,16 @@ static void nft_match_eval(const struct nft_expr *expr, ret = match->match(skb, (struct xt_action_param *)&pkt->xt); if (pkt->xt.hotdrop) { - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; return; } - switch(ret) { - case true: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + switch (ret ? 1 : 0) { + case 1: + regs->verdict.code = NFT_CONTINUE; break; - case false: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + case 0: + regs->verdict.code = NFT_BREAK; break; } } @@ -357,6 +361,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; + case NFPROTO_ARP: + break; } par->entryinfo = entry; par->match = match; @@ -543,6 +549,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, case NFPROTO_BRIDGE: fmt = "ebt_%s"; break; + case NFPROTO_ARP: + fmt = "arpt_%s"; + break; default: pr_err("nft_compat: unsupported protocol %d\n", nfmsg->nfgen_family); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index c89ee486ce54..17591239229f 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -24,7 +24,7 @@ struct nft_counter { }; static void nft_counter_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter *priv = nft_expr_priv(expr); @@ -92,6 +92,7 @@ static struct nft_expr_type nft_counter_type __read_mostly = { .ops = &nft_counter_ops, .policy = nft_counter_policy, .maxattr = NFTA_COUNTER_MAX, + .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index cc5603016242..8cbca3432f90 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -31,11 +31,11 @@ struct nft_ct { }; static void nft_ct_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; const struct nf_conn_help *help; @@ -54,8 +54,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, state = NF_CT_STATE_UNTRACKED_BIT; else state = NF_CT_STATE_BIT(ctinfo); - dest->data[0] = state; + *dest = state; return; + default: + break; } if (ct == NULL) @@ -63,26 +65,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - dest->data[0] = CTINFO2DIR(ctinfo); + *dest = CTINFO2DIR(ctinfo); return; case NFT_CT_STATUS: - dest->data[0] = ct->status; + *dest = ct->status; return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - dest->data[0] = ct->mark; + *dest = ct->mark; return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: - dest->data[0] = ct->secmark; + *dest = ct->secmark; return; #endif case NFT_CT_EXPIRATION: diff = (long)jiffies - (long)ct->timeout.expires; if (diff < 0) diff = 0; - dest->data[0] = jiffies_to_msecs(diff); + *dest = jiffies_to_msecs(diff); return; case NFT_CT_HELPER: if (ct->master == NULL) @@ -93,9 +95,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, helper = rcu_dereference(help->helper); if (helper == NULL) goto err; - if (strlen(helper->name) >= sizeof(dest->data)) - goto err; - strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN); return; #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: { @@ -103,58 +103,60 @@ static void nft_ct_get_eval(const struct nft_expr *expr, unsigned int size; if (!labels) { - memset(dest->data, 0, sizeof(dest->data)); + memset(dest, 0, NF_CT_LABELS_MAX_SIZE); return; } - BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data)); size = labels->words * sizeof(long); - - memcpy(dest->data, labels->bits, size); - if (size < sizeof(dest->data)) - memset(((char *) dest->data) + size, 0, - sizeof(dest->data) - size); + memcpy(dest, labels->bits, size); + if (size < NF_CT_LABELS_MAX_SIZE) + memset(((char *) dest) + size, 0, + NF_CT_LABELS_MAX_SIZE - size); return; } #endif + default: + break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { case NFT_CT_L3PROTOCOL: - dest->data[0] = nf_ct_l3num(ct); + *dest = nf_ct_l3num(ct); return; case NFT_CT_SRC: - memcpy(dest->data, tuple->src.u3.all, + memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_DST: - memcpy(dest->data, tuple->dst.u3.all, + memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTOCOL: - dest->data[0] = nf_ct_protonum(ct); + *dest = nf_ct_protonum(ct); return; case NFT_CT_PROTO_SRC: - dest->data[0] = (__force __u16)tuple->src.u.all; + *dest = (__force __u16)tuple->src.u.all; return; case NFT_CT_PROTO_DST: - dest->data[0] = (__force __u16)tuple->dst.u.all; + *dest = (__force __u16)tuple->dst.u.all; return; + default: + break; } return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static void nft_ct_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; #ifdef CONFIG_NF_CONNTRACK_MARK - u32 value = data[priv->sreg].data[0]; + u32 value = regs->data[priv->sreg]; #endif enum ip_conntrack_info ctinfo; struct nf_conn *ct; @@ -172,6 +174,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } break; #endif + default: + break; } } @@ -220,12 +224,17 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { - case NFT_CT_STATE: case NFT_CT_DIRECTION: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = sizeof(u8); + break; + case NFT_CT_STATE: case NFT_CT_STATUS: #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: @@ -233,22 +242,54 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: #endif + case NFT_CT_EXPIRATION: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = sizeof(u32); + break; #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = NF_CT_LABELS_MAX_SIZE; + break; #endif - case NFT_CT_EXPIRATION: case NFT_CT_HELPER: if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; + len = NF_CT_HELPER_NAME_LEN; break; + case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + len = sizeof(u8); + break; case NFT_CT_SRC: case NFT_CT_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + + switch (ctx->afi->family) { + case NFPROTO_IPV4: + len = FIELD_SIZEOF(struct nf_conntrack_tuple, + src.u3.ip); + break; + case NFPROTO_IPV6: + case NFPROTO_INET: + len = FIELD_SIZEOF(struct nf_conntrack_tuple, + src.u3.ip6); + break; + default: + return -EAFNOSUPPORT; + } + break; case NFT_CT_PROTO_SRC: case NFT_CT_PROTO_DST: if (tb[NFTA_CT_DIRECTION] == NULL) return -EINVAL; + len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all); break; default: return -EOPNOTSUPP; @@ -265,12 +306,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; @@ -286,20 +324,22 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: + len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif default: return -EOPNOTSUPP; } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); + err = nft_validate_register_load(priv->sreg, len); if (err < 0) return err; @@ -320,7 +360,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; @@ -347,7 +387,7 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c new file mode 100644 index 000000000000..513a8ef60a59 --- /dev/null +++ b/net/netfilter/nft_dynset.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +struct nft_dynset { + struct nft_set *set; + struct nft_set_ext_tmpl tmpl; + enum nft_dynset_ops op:8; + enum nft_registers sreg_key:8; + enum nft_registers sreg_data:8; + u64 timeout; + struct nft_expr *expr; + struct nft_set_binding binding; +}; + +static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, + struct nft_regs *regs) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set_ext *ext; + u64 timeout; + void *elem; + + if (set->size && !atomic_add_unless(&set->nelems, 1, set->size)) + return NULL; + + timeout = priv->timeout ? : set->timeout; + elem = nft_set_elem_init(set, &priv->tmpl, + ®s->data[priv->sreg_key], + ®s->data[priv->sreg_data], + timeout, GFP_ATOMIC); + if (elem == NULL) { + if (set->size) + atomic_dec(&set->nelems); + return NULL; + } + + ext = nft_set_elem_ext(set, elem); + if (priv->expr != NULL) + nft_expr_clone(nft_set_ext_expr(ext), priv->expr); + + return elem; +} + +static void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set = priv->set; + const struct nft_set_ext *ext; + const struct nft_expr *sexpr; + u64 timeout; + + if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, + expr, regs, &ext)) { + sexpr = NULL; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + sexpr = nft_set_ext_expr(ext); + + if (priv->op == NFT_DYNSET_OP_UPDATE && + nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + timeout = priv->timeout ? : set->timeout; + *nft_set_ext_expiration(ext) = jiffies + timeout; + } else if (sexpr == NULL) + goto out; + + if (sexpr != NULL) + sexpr->ops->eval(sexpr, regs, pkt); + return; + } +out: + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { + [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING }, + [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, + [NFTA_DYNSET_OP] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, + [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, +}; + +static int nft_dynset_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set; + u64 timeout; + int err; + + if (tb[NFTA_DYNSET_SET_NAME] == NULL || + tb[NFTA_DYNSET_OP] == NULL || + tb[NFTA_DYNSET_SREG_KEY] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + if (IS_ERR(set)) { + if (tb[NFTA_DYNSET_SET_ID]) + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_DYNSET_SET_ID]); + if (IS_ERR(set)) + return PTR_ERR(set); + } + + if (set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); + switch (priv->op) { + case NFT_DYNSET_OP_ADD: + break; + case NFT_DYNSET_OP_UPDATE: + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + timeout = 0; + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + } + + priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); + err = nft_validate_register_load(priv->sreg_key, set->klen);; + if (err < 0) + return err; + + if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + if (set->dtype == NFT_DATA_VERDICT) + return -EOPNOTSUPP; + + priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); + err = nft_validate_register_load(priv->sreg_data, set->dlen); + if (err < 0) + return err; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + if (tb[NFTA_DYNSET_EXPR] != NULL) { + if (!(set->flags & NFT_SET_EVAL)) + return -EINVAL; + if (!(set->flags & NFT_SET_ANONYMOUS)) + return -EOPNOTSUPP; + + priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); + if (IS_ERR(priv->expr)) + return PTR_ERR(priv->expr); + + err = -EOPNOTSUPP; + if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) + goto err1; + } else if (set->flags & NFT_SET_EVAL) + return -EINVAL; + + nft_set_ext_prepare(&priv->tmpl); + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); + if (set->flags & NFT_SET_MAP) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); + if (priv->expr != NULL) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR, + priv->expr->ops->size); + if (set->flags & NFT_SET_TIMEOUT) { + if (timeout || set->timeout) + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); + } + + priv->timeout = timeout; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + goto err1; + + priv->set = set; + return 0; + +err1: + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); + return err; +} + +static void nft_dynset_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); +} + +static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP && + nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + goto nla_put_failure; + if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dynset_type; +static const struct nft_expr_ops nft_dynset_ops = { + .type = &nft_dynset_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), + .eval = nft_dynset_eval, + .init = nft_dynset_init, + .destroy = nft_dynset_destroy, + .dump = nft_dynset_dump, +}; + +static struct nft_expr_type nft_dynset_type __read_mostly = { + .name = "dynset", + .ops = &nft_dynset_ops, + .policy = nft_dynset_policy, + .maxattr = NFTA_DYNSET_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_dynset_module_init(void) +{ + return nft_register_expr(&nft_dynset_type); +} + +void nft_dynset_module_exit(void) +{ + nft_unregister_expr(&nft_dynset_type); +} diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c deleted file mode 100644 index b6eed4d5a096..000000000000 --- a/net/netfilter/nft_expr_template.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_template { - -}; - -static void nft_template_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - struct nft_template *priv = nft_expr_priv(expr); - -} - -static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { - [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, -}; - -static int nft_template_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_template *priv = nft_expr_priv(expr); - - return 0; -} - -static void nft_template_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_template *priv = nft_expr_priv(expr); - -} - -static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - const struct nft_template *priv = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_type nft_template_type; -static const struct nft_expr_ops nft_template_ops = { - .type = &nft_template_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), - .eval = nft_template_eval, - .init = nft_template_init, - .destroy = nft_template_destroy, - .dump = nft_template_dump, -}; - -static struct nft_expr_type nft_template_type __read_mostly = { - .name = "template", - .ops = &nft_template_ops, - .policy = nft_template_policy, - .maxattr = NFTA_TEMPLATE_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_template_module_init(void) -{ - return nft_register_expr(&nft_template_type); -} - -static void __exit nft_template_module_exit(void) -{ - nft_unregister_expr(&nft_template_type); -} - -module_init(nft_template_module_init); -module_exit(nft_template_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 55c939f5371f..ba7aed13e174 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -26,11 +26,11 @@ struct nft_exthdr { }; static void nft_exthdr_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; unsigned int offset = 0; int err; @@ -39,11 +39,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr, goto err; offset += priv->offset; - if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + dest[priv->len / NFT_REG32_SIZE] = 0; + if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { @@ -58,7 +59,6 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -69,22 +69,17 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); - if (priv->len == 0 || - priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; + priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); - priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_exthdr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 37c15e674884..3f9d45d3d9b7 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -15,6 +15,7 @@ #include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> +#include <linux/workqueue.h> #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> @@ -23,119 +24,175 @@ /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_HASH_ELEMENT_HINT 3 +struct nft_hash { + struct rhashtable ht; + struct delayed_work gc_work; +}; + struct nft_hash_elem { struct rhash_head node; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; +}; + +struct nft_hash_cmp_arg { + const struct nft_set *set; + const u32 *key; + u8 genmask; }; -static bool nft_hash_lookup(const struct nft_set *set, - const struct nft_data *key, - struct nft_data *data) +static const struct rhashtable_params nft_hash_params; + +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_cmp_arg *arg = data; + + return jhash(arg->key, len, seed); +} + +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) { - struct rhashtable *priv = nft_set_priv(set); + const struct nft_hash_elem *he = data; + + return jhash(nft_set_ext_key(&he->ext), len, seed); +} + +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; + + if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (nft_set_elem_expired(&he->ext)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; + return 0; +} + +static bool nft_hash_lookup(const struct nft_set *set, const u32 *key, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(read_pnet(&set->pnet)), + .set = set, + .key = key, + }; - he = rhashtable_lookup(priv, key); - if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + *ext = &he->ext; return !!he; } -static int nft_hash_insert(const struct nft_set *set, - const struct nft_set_elem *elem) +static bool nft_hash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; - unsigned int size; - - if (elem->flags != 0) - return -EINVAL; + struct nft_hash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; - size = sizeof(*he); - if (set->flags & NFT_SET_MAP) - size += sizeof(he->data[0]); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + goto out; - he = kzalloc(size, GFP_KERNEL); + he = new(set, expr, regs); if (he == NULL) - return -ENOMEM; - - nft_data_copy(&he->key, &elem->key); - if (set->flags & NFT_SET_MAP) - nft_data_copy(he->data, &elem->data); - - rhashtable_insert(priv, &he->node); + goto err1; + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params)) + goto err2; +out: + *ext = &he->ext; + return true; - return 0; +err2: + nft_set_elem_destroy(set, he); +err1: + return false; } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); - kfree(he); + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = elem->key.val.data, + }; + + return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); } -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_activate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - rhashtable_remove(priv, elem->cookie); - synchronize_rcu(); - kfree(elem->cookie); + nft_set_elem_change_active(set, &he->ext); + nft_set_elem_clear_busy(&he->ext); } -struct nft_compare_arg { - const struct nft_set *set; - struct nft_set_elem *elem; -}; - -static bool nft_hash_compare(void *ptr, void *arg) +static void *nft_hash_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash_elem *he = ptr; - struct nft_compare_arg *x = arg; - - if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) { - x->elem->cookie = he; - x->elem->flags = 0; - if (x->set->flags & NFT_SET_MAP) - nft_data_copy(&x->elem->data, he->data); + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = elem->key.val.data, + }; - return true; + rcu_read_lock(); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) { + if (!nft_set_elem_mark_busy(&he->ext)) + nft_set_elem_change_active(set, &he->ext); + else + he = NULL; } + rcu_read_unlock(); - return false; + return he; } -static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); - struct nft_compare_arg arg = { - .set = set, - .elem = elem, - }; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - if (rhashtable_lookup_compare(priv, &elem->key, - &nft_hash_compare, &arg)) - return 0; - - return -ENOENT; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - struct rhashtable *priv = nft_set_priv(set); - const struct nft_hash_elem *he; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(priv, &hti); + err = rhashtable_walk_init(&priv->ht, &hti); iter->err = err; if (err) return; @@ -159,11 +216,12 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; + if (nft_set_elem_expired(&he->ext)) + goto cont; + if (!nft_set_elem_active(&he->ext, genmask)) + goto cont; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + elem.priv = he; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -178,47 +236,102 @@ out: rhashtable_walk_exit(&hti); } +static void nft_hash_gc(struct work_struct *work) +{ + struct nft_set *set; + struct nft_hash_elem *he; + struct nft_hash *priv; + struct nft_set_gc_batch *gcb = NULL; + struct rhashtable_iter hti; + int err; + + priv = container_of(work, struct nft_hash, gc_work.work); + set = nft_set_container_of(priv); + + err = rhashtable_walk_init(&priv->ht, &hti); + if (err) + goto schedule; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) + goto out; + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + if (PTR_ERR(he) != -EAGAIN) + goto out; + continue; + } + + if (!nft_set_elem_expired(&he->ext)) + continue; + if (nft_set_elem_mark_busy(&he->ext)) + continue; + + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (gcb == NULL) + goto out; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, he); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + nft_set_gc_batch_complete(gcb); +schedule: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct rhashtable); + return sizeof(struct nft_hash); } +static const struct rhashtable_params nft_hash_params = { + .head_offset = offsetof(struct nft_hash_elem, node), + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, + .automatic_shrinking = true, +}; + static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct rhashtable *priv = nft_set_priv(set); - struct rhashtable_params params = { - .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT, - .head_offset = offsetof(struct nft_hash_elem, node), - .key_offset = offsetof(struct nft_hash_elem, key), - .key_len = set->klen, - .hashfn = jhash, - }; + struct nft_hash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_hash_params; + int err; - return rhashtable_init(priv, ¶ms); + params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.key_len = set->klen; + + err = rhashtable_init(&priv->ht, ¶ms); + if (err < 0) + return err; + + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + if (set->flags & NFT_SET_TIMEOUT) + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_elem_destroy(void *ptr, void *arg) { - struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; - struct nft_hash_elem *he; - struct rhash_head *pos, *next; - unsigned int i; - - /* Stop an eventual async resizing */ - priv->being_destroyed = true; - mutex_lock(&priv->mutex); + nft_set_elem_destroy((const struct nft_set *)arg, ptr); +} - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_safe(he, pos, next, tbl, i, node) - nft_hash_elem_destroy(set, he); - } - mutex_unlock(&priv->mutex); +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); - rhashtable_destroy(priv); + cancel_delayed_work_sync(&priv->gc_work); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, @@ -227,11 +340,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, unsigned int esize; esize = sizeof(struct nft_hash_elem); - if (features & NFT_SET_MAP) - esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); - if (desc->size) { - est->size = sizeof(struct rhashtable) + + est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * sizeof(struct nft_hash_elem *) + desc->size * esize; @@ -251,15 +361,18 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, - .get = nft_hash_get, .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, + .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP, + .features = NFT_SET_MAP | NFT_SET_TIMEOUT, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 810385eb7249..db3b746858e3 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -24,12 +24,12 @@ struct nft_immediate_expr { }; static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - nft_data_copy(&data[priv->dreg], &priv->data); + nft_data_copy(®s->data[priv->dreg], &priv->data, priv->dlen); } static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { @@ -49,17 +49,15 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA] == NULL) return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc, + tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; priv->dlen = desc.len; - err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, &priv->data, + desc.type, desc.len); if (err < 0) goto err1; @@ -81,7 +79,7 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg)) goto nla_put_failure; return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 85da5bd02f64..435c1ccd6c0e 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -27,7 +27,7 @@ struct nft_limit { }; static void nft_limit_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit *priv = nft_expr_priv(expr); @@ -45,7 +45,7 @@ static void nft_limit_eval(const struct nft_expr *expr, } spin_unlock_bh(&limit_lock); - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { @@ -98,6 +98,7 @@ static struct nft_expr_type nft_limit_type __read_mostly = { .ops = &nft_limit_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, + .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bde05f28cf14..a13d6a386d63 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -27,7 +27,7 @@ struct nft_log { }; static void nft_log_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); @@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx, li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { - li->u.log.level = 4; + li->u.log.level = LOGLEVEL_WARNING; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9615b8b9fb37..b3c31ef8015d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -26,15 +26,20 @@ struct nft_lookup { }; static void nft_lookup_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); return; - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + } + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { @@ -66,8 +71,11 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return PTR_ERR(set); } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); - err = nft_validate_input_register(priv->sreg); + if (set->flags & NFT_SET_EVAL) + return -EOPNOTSUPP; + + priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); if (err < 0) return err; @@ -75,19 +83,16 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); - err = nft_validate_output_register(priv->dreg); + priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + set->dtype, set->dlen); if (err < 0) return err; - - if (priv->dreg == NFT_REG_VERDICT) { - if (set->dtype != NFT_DATA_VERDICT) - return -EINVAL; - } else if (set->dtype == NFT_DATA_VERDICT) - return -EINVAL; } else if (set->flags & NFT_SET_MAP) return -EINVAL; + priv->binding.flags = set->flags & NFT_SET_MAP; + err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) return err; @@ -110,10 +115,10 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; if (priv->set->flags & NFT_SET_MAP) - if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e99911eda915..52561e1c31e2 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -25,65 +25,68 @@ #include <net/netfilter/nft_meta.h> void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; const struct net_device *in = pkt->in, *out = pkt->out; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { case NFT_META_LEN: - dest->data[0] = skb->len; + *dest = skb->len; break; case NFT_META_PROTOCOL: - *(__be16 *)dest->data = skb->protocol; + *dest = 0; + *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - dest->data[0] = pkt->ops->pf; + *dest = pkt->ops->pf; break; case NFT_META_L4PROTO: - dest->data[0] = pkt->tprot; + *dest = pkt->tprot; break; case NFT_META_PRIORITY: - dest->data[0] = skb->priority; + *dest = skb->priority; break; case NFT_META_MARK: - dest->data[0] = skb->mark; + *dest = skb->mark; break; case NFT_META_IIF: if (in == NULL) goto err; - dest->data[0] = in->ifindex; + *dest = in->ifindex; break; case NFT_META_OIF: if (out == NULL) goto err; - dest->data[0] = out->ifindex; + *dest = out->ifindex; break; case NFT_META_IIFNAME: if (in == NULL) goto err; - strncpy((char *)dest->data, in->name, sizeof(dest->data)); + strncpy((char *)dest, in->name, IFNAMSIZ); break; case NFT_META_OIFNAME: if (out == NULL) goto err; - strncpy((char *)dest->data, out->name, sizeof(dest->data)); + strncpy((char *)dest, out->name, IFNAMSIZ); break; case NFT_META_IIFTYPE: if (in == NULL) goto err; - *(u16 *)dest->data = in->type; + *dest = 0; + *(u16 *)dest = in->type; break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *(u16 *)dest->data = out->type; + *dest = 0; + *(u16 *)dest = out->type; break; case NFT_META_SKUID: - if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) goto err; read_lock_bh(&skb->sk->sk_callback_lock); @@ -93,13 +96,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; } - dest->data[0] = - from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(&init_user_ns, skb->sk->sk_socket->file->f_cred->fsuid); read_unlock_bh(&skb->sk->sk_callback_lock); break; case NFT_META_SKGID: - if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) goto err; read_lock_bh(&skb->sk->sk_callback_lock); @@ -108,8 +110,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, read_unlock_bh(&skb->sk->sk_callback_lock); goto err; } - dest->data[0] = - from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(&init_user_ns, skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); break; @@ -119,33 +120,33 @@ void nft_meta_get_eval(const struct nft_expr *expr, if (dst == NULL) goto err; - dest->data[0] = dst->tclassid; + *dest = dst->tclassid; break; } #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: - dest->data[0] = skb->secmark; + *dest = skb->secmark; break; #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - dest->data[0] = skb->pkt_type; + *dest = skb->pkt_type; break; } switch (pkt->ops->pf) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - dest->data[0] = PACKET_MULTICAST; + *dest = PACKET_MULTICAST; else - dest->data[0] = PACKET_BROADCAST; + *dest = PACKET_BROADCAST; break; case NFPROTO_IPV6: if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) - dest->data[0] = PACKET_MULTICAST; + *dest = PACKET_MULTICAST; else - dest->data[0] = PACKET_BROADCAST; + *dest = PACKET_BROADCAST; break; default: WARN_ON(1); @@ -153,23 +154,22 @@ void nft_meta_get_eval(const struct nft_expr *expr, } break; case NFT_META_CPU: - dest->data[0] = smp_processor_id(); + *dest = raw_smp_processor_id(); break; case NFT_META_IIFGROUP: if (in == NULL) goto err; - dest->data[0] = in->group; + *dest = in->group; break; case NFT_META_OIFGROUP: if (out == NULL) goto err; - dest->data[0] = out->group; + *dest = out->group; break; case NFT_META_CGROUP: - if (skb->sk == NULL) - break; - - dest->data[0] = skb->sk->sk_classid; + if (skb->sk == NULL || !sk_fullsock(skb->sk)) + goto err; + *dest = skb->sk->sk_classid; break; default: WARN_ON(1); @@ -178,17 +178,17 @@ void nft_meta_get_eval(const struct nft_expr *expr, return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_get_eval); void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = data[meta->sreg].data[0]; + u32 value = regs->data[meta->sreg]; switch (meta->key) { case NFT_META_MARK: @@ -218,22 +218,22 @@ int nft_meta_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); - int err; + unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { - case NFT_META_LEN: case NFT_META_PROTOCOL: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + len = sizeof(u16); + break; case NFT_META_NFPROTO: case NFT_META_L4PROTO: + case NFT_META_LEN: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: case NFT_META_OIF: - case NFT_META_IIFNAME: - case NFT_META_OIFNAME: - case NFT_META_IIFTYPE: - case NFT_META_OIFTYPE: case NFT_META_SKUID: case NFT_META_SKGID: #ifdef CONFIG_IP_ROUTE_CLASSID @@ -247,21 +247,19 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: case NFT_META_CGROUP: + len = sizeof(u32); + break; + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + len = IFNAMSIZ; break; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - return 0; + priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); @@ -270,20 +268,24 @@ int nft_meta_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_MARK: case NFT_META_PRIORITY: + len = sizeof(u32); + break; case NFT_META_NFTRACE: + len = sizeof(u8); break; default: return -EOPNOTSUPP; } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); + err = nft_validate_register_load(priv->sreg, len); if (err < 0) return err; @@ -298,7 +300,7 @@ int nft_meta_get_dump(struct sk_buff *skb, if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg)) goto nla_put_failure; return 0; @@ -314,7 +316,7 @@ int nft_meta_set_dump(struct sk_buff *skb, if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a0837c6c9283..ee2d71753746 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -37,7 +37,7 @@ struct nft_nat { }; static void nft_nat_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_nat *priv = nft_expr_priv(expr); @@ -49,33 +49,32 @@ static void nft_nat_eval(const struct nft_expr *expr, if (priv->sreg_addr_min) { if (priv->family == AF_INET) { range.min_addr.ip = (__force __be32) - data[priv->sreg_addr_min].data[0]; + regs->data[priv->sreg_addr_min]; range.max_addr.ip = (__force __be32) - data[priv->sreg_addr_max].data[0]; + regs->data[priv->sreg_addr_max]; } else { memcpy(range.min_addr.ip6, - data[priv->sreg_addr_min].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_min], + sizeof(range.min_addr.ip6)); memcpy(range.max_addr.ip6, - data[priv->sreg_addr_max].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_max], + sizeof(range.max_addr.ip6)); } range.flags |= NF_NAT_RANGE_MAP_IPS; } if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; range.max_proto.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - data[NFT_REG_VERDICT].verdict = - nf_nat_setup_info(ct, &range, priv->type); + regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type); } static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { @@ -119,6 +118,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_nat *priv = nft_expr_priv(expr); + unsigned int alen, plen; u32 family; int err; @@ -146,25 +146,34 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != AF_INET && family != AF_INET6) - return -EAFNOSUPPORT; if (family != ctx->afi->family) return -EOPNOTSUPP; + + switch (family) { + case NFPROTO_IPV4: + alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip); + break; + case NFPROTO_IPV6: + alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6); + break; + default: + return -EAFNOSUPPORT; + } priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { priv->sreg_addr_min = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); - - err = nft_validate_input_register(priv->sreg_addr_min); + nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); + err = nft_validate_register_load(priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { priv->sreg_addr_max = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - err = nft_validate_input_register(priv->sreg_addr_max); + err = nft_validate_register_load(priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -172,19 +181,21 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, } } + plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { priv->sreg_proto_min = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - err = nft_validate_input_register(priv->sreg_proto_min); + err = nft_validate_register_load(priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { priv->sreg_proto_max = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - err = nft_validate_input_register(priv->sreg_proto_max); + err = nft_validate_register_load(priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -220,18 +231,18 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->sreg_addr_min) { - if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, - htonl(priv->sreg_addr_min)) || - nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, - htonl(priv->sreg_addr_max))) + if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN, + priv->sreg_addr_min) || + nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX, + priv->sreg_addr_max)) goto nla_put_failure; } if (priv->sreg_proto_min) { - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min)) || - nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, - htonl(priv->sreg_proto_max))) + if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN, + priv->sreg_proto_min) || + nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX, + priv->sreg_proto_max)) goto nla_put_failure; } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 85daa84bfdfe..94fb3b27a2c5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -18,12 +18,12 @@ #include <net/netfilter/nf_tables.h> static void nft_payload_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; int offset; switch (priv->base) { @@ -43,11 +43,12 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; - if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + dest[priv->len / NFT_REG32_SIZE] = 0; + if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { @@ -62,24 +63,21 @@ static int nft_payload_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_payload *priv = nft_expr_priv(expr); - int err; priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_payload *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) || nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) @@ -131,9 +129,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, } offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) - return ERR_PTR(-EINVAL); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index e8ae2f6bf232..96805d21d618 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -28,7 +28,7 @@ struct nft_queue { }; static void nft_queue_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_queue *priv = nft_expr_priv(expr); @@ -51,7 +51,7 @@ static void nft_queue_eval(const struct nft_expr *expr, if (priv->flags & NFT_QUEUE_FLAG_BYPASS) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; } static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 46214f245665..1c30f41cff5b 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -26,25 +26,25 @@ struct nft_rbtree { struct nft_rbtree_elem { struct rb_node node; - u16 flags; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; -static bool nft_rbtree_lookup(const struct nft_set *set, - const struct nft_data *key, - struct nft_data *data) + +static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, + const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; - const struct rb_node *parent = priv->root.rb_node; + const struct rb_node *parent; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; spin_lock_bh(&nft_rbtree_lock); + parent = priv->root.rb_node; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -52,12 +52,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: - if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(&rbe->ext) & + NFT_SET_ELEM_INTERVAL_END) goto out; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, rbe->data); - spin_unlock_bh(&nft_rbtree_lock); + + *ext = &rbe->ext; return true; } } @@ -71,23 +76,13 @@ out: return false; } -static void nft_rbtree_elem_destroy(const struct nft_set *set, - struct nft_rbtree_elem *rbe) -{ - nft_data_uninit(&rbe->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(rbe->data, set->dtype); - - kfree(rbe); -} - static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree_elem *new) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -95,13 +90,18 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &new->key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; - else - return -EEXIST; + else { + if (nft_set_elem_active(&rbe->ext, genmask)) + return -EEXIST; + p = &parent->rb_left; + } } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); @@ -111,31 +111,13 @@ static int __nft_rbtree_insert(const struct nft_set *set, static int nft_rbtree_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe; - unsigned int size; + struct nft_rbtree_elem *rbe = elem->priv; int err; - size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) - size += sizeof(rbe->data[0]); - - rbe = kzalloc(size, GFP_KERNEL); - if (rbe == NULL) - return -ENOMEM; - - rbe->flags = elem->flags; - nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(rbe->data, &elem->data); - spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); - if (err < 0) - kfree(rbe); - spin_unlock_bh(&nft_rbtree_lock); + return err; } @@ -143,42 +125,49 @@ static void nft_rbtree_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->cookie; + struct nft_rbtree_elem *rbe = elem->priv; spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); - kfree(rbe); } -static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_rbtree_activate(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(set, &rbe->ext); +} + +static void *nft_rbtree_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; - spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { - elem->cookie = rbe; - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem->data, rbe->data); - elem->flags = rbe->flags; - spin_unlock_bh(&nft_rbtree_lock); - return 0; + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + nft_set_elem_change_active(set, &rbe->ext); + return rbe; } } - spin_unlock_bh(&nft_rbtree_lock); - return -ENOENT; + return NULL; } static void nft_rbtree_walk(const struct nft_ctx *ctx, @@ -186,21 +175,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); - const struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&rbe->ext, genmask)) + goto cont; - rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem.data, rbe->data); - elem.flags = rbe->flags; + elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -237,7 +226,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_rbtree_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe); } } @@ -247,9 +236,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, unsigned int nsize; nsize = sizeof(struct nft_rbtree_elem); - if (features & NFT_SET_MAP) - nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); - if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * nsize; else @@ -262,12 +248,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, - .get = nft_rbtree_get, + .deactivate = nft_rbtree_deactivate, + .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP, diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index d7e9e93a4e90..03f7bf40ae75 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -44,25 +44,28 @@ int nft_redir_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); + unsigned int plen; int err; err = nft_redir_validate(ctx, expr, NULL); if (err < 0) return err; + plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { priv->sreg_proto_min = - ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN])); + nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - err = nft_validate_input_register(priv->sreg_proto_min); + err = nft_validate_register_load(priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { priv->sreg_proto_max = - ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX])); + nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - err = nft_validate_input_register(priv->sreg_proto_max); + err = nft_validate_register_load(priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -85,11 +88,11 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) const struct nft_redir *priv = nft_expr_priv(expr); if (priv->sreg_proto_min) { - if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) + if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN, + priv->sreg_proto_min)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX, - htonl(priv->sreg_proto_max))) + if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX, + priv->sreg_proto_max)) goto nla_put_failure; } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 7b5f9d58680a..62cabee42fbe 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -18,7 +18,7 @@ #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_inet_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); break; case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_icmp_code(priv->icmp_code), + pkt->ops->hooknum); break; } break; @@ -56,7 +58,8 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, } break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + + regs->verdict.code = NF_DROP; } static int nft_reject_inet_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 50e1e5aaf4ce..cca96cec1b68 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t { static bool tproxy_sk_is_transparent(struct sock *sk) { - if (sk->sk_state != TCP_TIME_WAIT) { - if (inet_sk(sk)->transparent) - return true; - sock_put(sk); - } else { + switch (sk->sk_state) { + case TCP_TIME_WAIT: if (inet_twsk(sk)->tw_transparent) return true; - inet_twsk_put(inet_twsk(sk)); + break; + case TCP_NEW_SYN_RECV: + if (inet_rsk(inet_reqsk(sk))->no_srccheck) + return true; + break; + default: + if (inet_sk(sk)->transparent) + return true; } + + sock_gen_put(sk); return false; } @@ -266,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } @@ -431,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, tgi->lport ? tgi->lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 7198d660b4de..a1d126f29463 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cgroup_info *info = par->matchinfo; - if (skb->sk == NULL) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; return (info->id == skb->sk->sk_classid) ^ info->invert; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index f440f57a452f..1caaccbc306c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev"); static bool physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) { - static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; + const struct net_device *physdev; unsigned long ret; const char *indev, *outdev; - const struct nf_bridge_info *nf_bridge; /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if * the destination device will be a bridge. */ - if (!(nf_bridge = skb->nf_bridge)) { + if (!skb->nf_bridge) { /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) @@ -54,31 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } + physdev = nf_bridge_get_physoutdev(skb); + outdev = physdev ? physdev->name : NULL; + /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ - !(info->invert & XT_PHYSDEV_OP_BRIDGED))) + (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; + physdev = nf_bridge_get_physindev(skb); + indev = physdev ? physdev->name : NULL; + if ((info->bitmask & XT_PHYSDEV_OP_ISIN && - (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || + (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || (info->bitmask & XT_PHYSDEV_OP_ISOUT && - (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) + (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) return false; if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; - indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - ret = ifname_compare_aligned(indev, info->physindev, info->in_mask); - if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) - return false; + if (indev) { + ret = ifname_compare_aligned(indev, info->physindev, + info->in_mask); + + if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) + return false; + } match_outdev: if (!(info->bitmask & XT_PHYSDEV_OP_OUT)) return true; - outdev = nf_bridge->physoutdev ? - nf_bridge->physoutdev->name : nulldevname; + + if (!outdev) + return false; + ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask); return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 0d47afea9682..89045982ec94 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) return ret; if (!match_counter0(opt.ext.packets, &info->packets)) - return 0; + return false; return match_counter0(opt.ext.bytes, &info->bytes); } @@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) return ret; if (!match_counter(opt.ext.packets, &info->packets)) - return 0; + return false; return match_counter(opt.ext.bytes, &info->bytes); } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 13332dbf291d..e092cb046326 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -129,13 +129,24 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol, return NULL; } -static bool -socket_match(const struct sk_buff *skb, struct xt_action_param *par, - const struct xt_socket_mtinfo1 *info) +static bool xt_socket_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + + default: + return inet_sk(sk)->transparent; + } +} + +static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, + const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); - struct udphdr _hdr, *hp = NULL; - struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -145,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, #endif if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; protocol = iph->protocol; saddr = iph->saddr; @@ -158,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return false; + &sport, &dport)) + return NULL; } else { - return false; + return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in case this is a - * reply packet of an established SNAT-ted connection. */ - + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && @@ -183,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif + return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, + sport, dport, indev); +} + +static bool +socket_match(const struct sk_buff *skb, struct xt_action_param *par, + const struct xt_socket_mtinfo1 *info) +{ + struct sock *sk = skb->sk; + if (!sk) - sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, - par->in); + sk = xt_socket_lookup_slow_v4(skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -195,16 +217,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && - sk->sk_state != TCP_TIME_WAIT && + sk_fullsock(sk) && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, - if XT_SOCKET_TRANSPARENT is used */ + * if XT_SOCKET_TRANSPARENT is used + */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = ((sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->transparent) || - (sk->sk_state == TCP_TIME_WAIT && - inet_twsk(sk)->tw_transparent)); + transparent = xt_socket_sk_is_transparent(sk); if (sk != skb->sk) sock_gen_put(sk); @@ -213,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, sk = NULL; } - pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", - protocol, &saddr, ntohs(sport), - &daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - - return (sk != NULL); + return sk != NULL; } static bool @@ -315,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, return NULL; } -static bool -socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, + const struct net_device *indev) { - struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); - struct udphdr _hdr, *hp = NULL; - struct sock *sk = skb->sk; - const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); - int thoff = 0, uninitialized_var(tproto); - const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + int thoff = 0, tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NF_DROP; + return NULL; } if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - hp = skb_header_pointer(skb, thoff, - sizeof(_hdr), &_hdr); + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; saddr = &iph->saddr; sport = hp->source; @@ -344,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) dport = hp->dest; } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, &sport, &dport, &ipv6_var)) - return false; + return NULL; } else { - return false; + return NULL; } + return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, + sport, dport, indev); +} + +static bool +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + struct sock *sk = skb->sk; + if (!sk) - sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, - par->in); + sk = xt_socket_lookup_slow_v6(skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -363,16 +386,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && - sk->sk_state != TCP_TIME_WAIT && + sk_fullsock(sk) && ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, - if XT_SOCKET_TRANSPARENT is used */ + * if XT_SOCKET_TRANSPARENT is used + */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = ((sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->transparent) || - (sk->sk_state == TCP_TIME_WAIT && - inet_twsk(sk)->tw_transparent)); + transparent = xt_socket_sk_is_transparent(sk); if (sk != skb->sk) sock_gen_put(sk); @@ -381,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) sk = NULL; } - pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " - "(orig %pI6:%hu) sock %p\n", - tproto, saddr, ntohs(sport), - daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - - return (sk != NULL); + return sk != NULL; } #endif diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 5699adb97652..0bc3460319c8 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -26,13 +26,12 @@ static bool string_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_string_info *conf = par->matchinfo; - struct ts_state state; bool invert; invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT; return (skb_find_text((struct sk_buff *)skb, conf->from_offset, - conf->to_offset, conf->config, &state) + conf->to_offset, conf->config) != UINT_MAX) ^ invert; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 70440748fe5c..13f777f20995 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -293,15 +293,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return -ENOMEM; addr_struct.s_addr = iter4->addr; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) return ret_val; addr_struct.s_addr = iter4->mask; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); @@ -328,14 +326,12 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, if (nla_b == NULL) return -ENOMEM; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, - sizeof(struct in6_addr), - &iter6->addr); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6ADDR, + &iter6->addr); if (ret_val != 0) return ret_val; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, - sizeof(struct in6_addr), - &iter6->mask); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6MASK, + &iter6->mask); if (ret_val != 0) return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index aec7994f78cf..b0380927f05f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1117,34 +1117,30 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, struct in_addr addr_struct; addr_struct.s_addr = addr4->list.addr; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; addr_struct.s_addr = addr4->list.mask; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; secid = addr4->secid; } else { - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6ADDR, - sizeof(struct in6_addr), - &addr6->list.addr); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6ADDR, + &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6MASK, - sizeof(struct in6_addr), - &addr6->list.mask); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6MASK, + &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 05919bf3f670..19909d0786a2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -116,6 +116,8 @@ static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); static struct list_head netlink_tap_all __read_mostly; +static const struct rhashtable_params netlink_rhashtable_params; + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; @@ -970,41 +972,50 @@ netlink_unlock_table(void) struct netlink_compare_arg { - struct net *net; + possible_net_t pnet; u32 portid; }; -static bool netlink_compare(void *ptr, void *arg) +/* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ +#define netlink_compare_arg_len \ + (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) + +static inline int netlink_compare(struct rhashtable_compare_arg *arg, + const void *ptr) { - struct netlink_compare_arg *x = arg; - struct sock *sk = ptr; + const struct netlink_compare_arg *x = arg->key; + const struct netlink_sock *nlk = ptr; - return nlk_sk(sk)->portid == x->portid && - net_eq(sock_net(sk), x->net); + return nlk->portid != x->portid || + !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); +} + +static void netlink_compare_arg_init(struct netlink_compare_arg *arg, + struct net *net, u32 portid) +{ + memset(arg, 0, sizeof(*arg)); + write_pnet(&arg->pnet, net); + arg->portid = portid; } static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, struct net *net) { - struct netlink_compare_arg arg = { - .net = net, - .portid = portid, - }; + struct netlink_compare_arg arg; - return rhashtable_lookup_compare(&table->hash, &portid, - &netlink_compare, &arg); + netlink_compare_arg_init(&arg, net, portid); + return rhashtable_lookup_fast(&table->hash, &arg, + netlink_rhashtable_params); } -static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +static int __netlink_insert(struct netlink_table *table, struct sock *sk) { - struct netlink_compare_arg arg = { - .net = sock_net(sk), - .portid = nlk_sk(sk)->portid, - }; + struct netlink_compare_arg arg; - return rhashtable_lookup_compare_insert(&table->hash, - &nlk_sk(sk)->node, - &netlink_compare, &arg); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); + return rhashtable_lookup_insert_key(&table->hash, &arg, + &nlk_sk(sk)->node, + netlink_rhashtable_params); } static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) @@ -1066,9 +1077,10 @@ static int netlink_insert(struct sock *sk, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - err = 0; - if (!__netlink_insert(table, sk)) { - err = -EADDRINUSE; + err = __netlink_insert(table, sk); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; sock_put(sk); } @@ -1082,7 +1094,8 @@ static void netlink_remove(struct sock *sk) struct netlink_table *table; table = &nl_table[sk->sk_protocol]; - if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { + if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, + netlink_rhashtable_params)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } @@ -2256,8 +2269,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2346,8 +2358,7 @@ out: return err; } -static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, +static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct scm_cookie scm; @@ -3116,17 +3127,28 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) +{ + const struct netlink_sock *nlk = data; + struct netlink_compare_arg arg; + + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); +} + +static const struct rhashtable_params netlink_rhashtable_params = { + .head_offset = offsetof(struct netlink_sock, node), + .key_len = netlink_compare_arg_len, + .obj_hashfn = netlink_hash, + .obj_cmpfn = netlink_compare, + .max_size = 65536, + .automatic_shrinking = true, +}; + static int __init netlink_proto_init(void) { int i; int err = proto_register(&netlink_proto, 0); - struct rhashtable_params ht_params = { - .head_offset = offsetof(struct netlink_sock, node), - .key_offset = offsetof(struct netlink_sock, portid), - .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, - .max_shift = 16, /* 64K */ - }; if (err != 0) goto out; @@ -3138,7 +3160,8 @@ static int __init netlink_proto_init(void) goto panic; for (i = 0; i < MAX_LINKS; i++) { - if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) { + if (rhashtable_init(&nl_table[i].hash, + &netlink_rhashtable_params) < 0) { while (--i > 0) rhashtable_destroy(&nl_table[i].hash); kfree(nl_table); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 69f1d5e9959f..b987fd56c3c5 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) return 1; } -static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); @@ -1133,8 +1132,8 @@ out: return err; } -static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 6ae063cebf7d..988f542481a8 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -65,36 +65,6 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) return 1; } -#ifdef CONFIG_INET - -static int nr_rebuild_header(struct sk_buff *skb) -{ - unsigned char *bp = skb->data; - - if (arp_find(bp + 7, skb)) - return 1; - - bp[6] &= ~AX25_CBIT; - bp[6] &= ~AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - bp += AX25_ADDR_LEN; - - bp[6] &= ~AX25_CBIT; - bp[6] |= AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - - return 0; -} - -#else - -static int nr_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -#endif - static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) @@ -188,7 +158,6 @@ static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops nr_header_ops = { .create = nr_header, - .rebuild= nr_rebuild_header, }; static const struct net_device_ops nr_netdev_ops = { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index e181e290427c..9578bd6a4f3e 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -750,8 +750,8 @@ error: return ret; } -static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return nfc_llcp_send_i_frame(llcp_sock, msg, len); } -static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 9575a1892607..49ff32106080 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -907,6 +907,16 @@ static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx, return 0; } +static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + if (!ndev->ops->fw_download) + return -ENOTSUPP; + + return ndev->ops->fw_download(ndev, firmware_name); +} + static struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, @@ -922,6 +932,7 @@ static struct nfc_ops nci_nfc_ops = { .disable_se = nci_disable_se, .discover_se = nci_discover_se, .se_io = nci_se_io, + .fw_download = nci_fw_download, }; /* ---- Interface to NCI drivers ---- */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 14a2d11581da..3763036710ae 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1584,7 +1584,7 @@ static const struct genl_ops nfc_genl_ops[] = { struct urelease_work { struct work_struct w; - int portid; + u32 portid; }; static void nfc_urelease_event_work(struct work_struct *work) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 373e138c0ab6..82b4e8024778 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work) } } -static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nfc_dev *dev = nfc_rawsock(sk)->dev; @@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, return len; } -static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index b7d818c59423..ed6b0f8dd1bb 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -6,6 +6,7 @@ config OPENVSWITCH tristate "Open vSwitch" depends on INET select LIBCRC32C + select MPLS select NET_MPLS_GSO ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 5bae7243c577..096c6276e6b9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -203,7 +203,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); - release_net(ovs_dp_get_net(dp)); kfree(dp->ports); kfree(dp); } @@ -1501,7 +1500,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (dp == NULL) goto err_free_reply; - ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); + ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); @@ -1575,7 +1574,6 @@ err_destroy_percpu: err_destroy_table: ovs_flow_tbl_destroy(&dp->table); err_free_dp: - release_net(ovs_dp_get_net(dp)); kfree(dp); err_free_reply: kfree_skb(reply); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 3ece94563079..4ec4a480b147 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -84,10 +84,8 @@ struct datapath { /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; -#ifdef CONFIG_NET_NS /* Network namespace ref. */ - struct net *net; -#endif + possible_net_t net; u32 user_features; }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 22b18c145c92..c691b1a1eee0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -535,11 +535,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, - nla_get_be32(a), is_mask); + nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, - nla_get_be32(a), is_mask); + nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TOS: SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, @@ -648,10 +648,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + output->ipv4_src)) return -EMSGSIZE; if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + output->ipv4_dst)) return -EMSGSIZE; if (output->ipv4_tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 3277a7520e31..6d39766e7828 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -222,7 +222,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + struct sock *sk = vxlan_port->vs->sock->sk; + __be16 dst_port = inet_sk(sk)->inet_sport; const struct ovs_key_ipv4_tunnel *tun_key; struct vxlan_metadata md = {0}; struct rtable *rt; @@ -255,7 +256,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) vxflags = vxlan_port->exts | (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, &md, false, vxflags); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f8db7064d81c..5102c3cc4eec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -216,10 +216,16 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, static void packet_flush_mclist(struct sock *sk); struct packet_skb_cb { - unsigned int origlen; union { struct sockaddr_pkt pkt; - struct sockaddr_ll ll; + union { + /* Trick: alias skb original length with + * ll.sll_family and ll.protocol in order + * to save room. + */ + unsigned int origlen; + struct sockaddr_ll ll; + }; } sa; }; @@ -1608,8 +1614,8 @@ oom: * protocol layers and you must therefore supply it with a complete frame */ -static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); @@ -1818,13 +1824,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > - sizeof(skb->cb)); + sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); sll = &PACKET_SKB_CB(skb)->sa.ll; - sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; - sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; @@ -1833,7 +1836,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_halen = dev_parse_header(skb, sll->sll_addr); - PACKET_SKB_CB(skb)->origlen = skb->len; + /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). + * Use their space for storing the original skb length. + */ + PACKET_SKB_CB(skb)->sa.origlen = skb->len; if (pskb_trim(skb, snaplen)) goto drop_n_acct; @@ -1847,7 +1853,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; - skb->dropcount = atomic_read(&sk->sk_drops); + sock_skb_set_dropcount(sk, skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); @@ -1910,14 +1916,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } } - if (skb->ip_summed == CHECKSUM_PARTIAL) - status |= TP_STATUS_CSUMNOTREADY; - snaplen = skb->len; res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + status |= TP_STATUS_CSUM_VALID; + if (snaplen > res) snaplen = res; @@ -2603,8 +2614,7 @@ out: return err; } -static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); @@ -2884,13 +2894,14 @@ out: * If necessary we block. */ -static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; int vnet_hdr_len = 0; + unsigned int origlen = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) @@ -2990,6 +3001,15 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; + if (sock->type != SOCK_PACKET) { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + + /* Original length was stored in sockaddr_ll fields */ + origlen = PACKET_SKB_CB(skb)->sa.origlen; + sll->sll_family = AF_PACKET; + sll->sll_protocol = skb->protocol; + } + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { @@ -3001,6 +3021,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(struct sockaddr_pkt); } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); } @@ -3014,7 +3035,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; - aux.tp_len = PACKET_SKB_CB(skb)->origlen; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + aux.tp_status |= TP_STATUS_CSUM_VALID; + + aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); diff --git a/net/packet/internal.h b/net/packet/internal.h index cdddf6a30399..fe6e20caea1d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -74,9 +74,7 @@ extern struct mutex fanout_mutex; #define PACKET_FANOUT_MAX 256 struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; unsigned int num_members; u16 id; u8 type; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 26054b4b467c..5e710435ffa9 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -83,8 +83,7 @@ static int pn_init(struct sock *sk) return 0; } -static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; @@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, return (err >= 0) ? len : err; } -static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5d3f2b7507d4..6de2aeb98a1f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } -static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb; @@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk) return skb; } -static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb; int err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 008214a3d5eb..d575ef4e9aa6 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -425,15 +425,15 @@ out: return err; } -static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sock *sk = sock->sk; if (pn_socket_autobind(sock)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, m, total_len); + return sk->sk_prot->sendmsg(sk, m, total_len); } const struct proto_ops phonet_dgram_ops = { diff --git a/net/rds/connection.c b/net/rds/connection.c index 378c3a6acf84..14f041398ca1 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, rcu_read_lock(); conn = rds_conn_lookup(head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - !is_outgoing) { + laddr == faddr && !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we @@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } atomic_set(&conn->c_state, RDS_CONN_DOWN); + conn->c_send_gen = 0; conn->c_reconnect_jiffies = 0; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); diff --git a/net/rds/rds.h b/net/rds/rds.h index c2a5eef41343..0d41155a2258 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -110,6 +110,7 @@ struct rds_connection { void *c_transport_data; atomic_t c_state; + unsigned long c_send_gen; unsigned long c_flags; unsigned long c_reconnect_jiffies; struct delayed_work c_send_w; @@ -702,8 +703,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_incoming *inc, gfp_t gfp); -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags); +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, @@ -711,8 +712,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, __be32 saddr, __be32 daddr, int flip); /* send.c */ -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len); +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); void rds_send_reset(struct rds_connection *conn); int rds_send_xmit(struct rds_connection *conn); struct sockaddr_in; diff --git a/net/rds/recv.c b/net/rds/recv.c index f9ec1acd801c..a00462b0d01d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) return 0; } -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags) +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rds/send.c b/net/rds/send.c index 42f65d4305c8..e9430f537f9c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn) struct scatterlist *sg; int ret = 0; LIST_HEAD(to_be_dropped); + int batch_count; + unsigned long send_gen = 0; restart: + batch_count = 0; /* * sendmsg calls here after having queued its message on the send @@ -157,6 +160,17 @@ restart: } /* + * we record the send generation after doing the xmit acquire. + * if someone else manages to jump in and do some work, we'll use + * this to avoid a goto restart farther down. + * + * The acquire_in_xmit() check above ensures that only one + * caller can increment c_send_gen at any time. + */ + conn->c_send_gen++; + send_gen = conn->c_send_gen; + + /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, * we do the opposite to avoid races. */ @@ -202,6 +216,16 @@ restart: if (!rm) { unsigned int len; + batch_count++; + + /* we want to process as big a batch as we can, but + * we also want to avoid softlockups. If we've been + * through a lot of messages, lets back off and see + * if anyone else jumps in + */ + if (batch_count >= 1024) + goto over_batch; + spin_lock_irqsave(&conn->c_lock, flags); if (!list_empty(&conn->c_send_queue)) { @@ -357,9 +381,9 @@ restart: } } +over_batch: if (conn->c_trans->xmit_complete) conn->c_trans->xmit_complete(conn); - release_in_xmit(conn); /* Nuke any messages we decided not to retransmit. */ @@ -380,10 +404,15 @@ restart: * If the transport cannot continue (i.e ret != 0), then it must * call us when more room is available, such as from the tx * completion handler. + * + * We have an extra generation check here so that if someone manages + * to jump in after our release_in_xmit, we'll see that they have done + * some work and we will skip our goto */ if (ret == 0) { smp_mb(); - if (!list_empty(&conn->c_send_queue)) { + if (!list_empty(&conn->c_send_queue) && + send_gen == conn->c_send_gen) { rds_stats_inc(s_send_lock_queue_raced); goto restart; } @@ -920,8 +949,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len) +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 43bac7c4dd9e..8ae603069a1a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros return 1; } -static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); @@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, } -static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 50005888be57..369ca81a8c5d 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -41,6 +41,9 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev, { unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); + if (daddr) + memcpy(buff + 7, daddr, dev->addr_len); + *buff++ = ROSE_GFI | ROSE_Q_BIT; *buff++ = 0x00; *buff++ = ROSE_DATA; @@ -53,43 +56,6 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev, return -37; } -static int rose_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - struct net_device *dev = skb->dev; - struct net_device_stats *stats = &dev->stats; - unsigned char *bp = (unsigned char *)skb->data; - struct sk_buff *skbn; - unsigned int len; - - if (arp_find(bp + 7, skb)) { - return 1; - } - - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - return 1; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - kfree_skb(skb); - - len = skbn->len; - - if (!rose_route_frame(skbn, NULL)) { - kfree_skb(skbn); - stats->tx_errors++; - return 1; - } - - stats->tx_packets++; - stats->tx_bytes += len; -#endif - return 1; -} - static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; @@ -134,19 +100,26 @@ static int rose_close(struct net_device *dev) static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; + unsigned int len = skb->len; if (!netif_running(dev)) { printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); return NETDEV_TX_BUSY; } - dev_kfree_skb(skb); - stats->tx_errors++; + + if (!rose_route_frame(skb, NULL)) { + dev_kfree_skb(skb); + stats->tx_errors++; + return NETDEV_TX_OK; + } + + stats->tx_packets++; + stats->tx_bytes += len; return NETDEV_TX_OK; } static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7b1670489638..0095b9a0b779 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, * - sends a call data packet * - may send an abort (abort code in control data) */ -static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len) +static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { struct rxrpc_transport *trans; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); @@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, switch (rx->sk.sk_state) { case RXRPC_SERVER_LISTENING: if (!m->msg_name) { - ret = rxrpc_server_sendmsg(iocb, rx, m, len); + ret = rxrpc_server_sendmsg(rx, m, len); break; } case RXRPC_SERVER_BOUND: @@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, break; } case RXRPC_CLIENT_CONNECTED: - ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len); + ret = rxrpc_client_sendmsg(rx, trans, m, len); break; default: ret = -ENOTCONN; diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 481f89f93789..4505a691d88c 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -28,7 +28,7 @@ const char *rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", - "?09", "?10", "?11", "?12", "?13", "?14", "?15" + "?09", "?10", "?11", "?12", "VERSION", "?14", "?15" }; /* @@ -593,6 +593,20 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, rxrpc_queue_conn(conn); } +/* + * post endpoint-level events to the local endpoint + * - this includes debug and version messages + */ +static void rxrpc_post_packet_to_local(struct rxrpc_local *local, + struct sk_buff *skb) +{ + _enter("%p,%p", local, skb); + + atomic_inc(&local->usage); + skb_queue_tail(&local->event_queue, skb); + rxrpc_queue_work(&local->event_processor); +} + static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local, struct sk_buff *skb, struct rxrpc_skb_priv *sp) @@ -699,6 +713,11 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } + if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + rxrpc_post_packet_to_local(local, skb); + goto out; + } + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) goto bad_message; @@ -731,6 +750,8 @@ void rxrpc_data_ready(struct sock *sk) else goto cant_route_call; } + +out: rxrpc_put_local(local); return; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba9fd36d3f15..aef1bd294e17 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -152,11 +152,13 @@ struct rxrpc_local { struct work_struct destroyer; /* endpoint destroyer */ struct work_struct acceptor; /* incoming call processor */ struct work_struct rejecter; /* packet reject writer */ + struct work_struct event_processor; /* endpoint event processor */ struct list_head services; /* services listening on this endpoint */ struct list_head link; /* link in endpoint list */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ + struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ atomic_t usage; @@ -548,10 +550,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, size_t); -int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, - size_t); +int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, + struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * ar-peer.c @@ -572,8 +573,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; * ar-recvmsg.c */ void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index 87f7135d238b..ca904ed5400a 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -13,16 +13,22 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/udp.h> +#include <linux/ip.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#include <generated/utsrelease.h> #include "ar-internal.h" +static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC"; + static LIST_HEAD(rxrpc_locals); DEFINE_RWLOCK(rxrpc_local_lock); static DECLARE_RWSEM(rxrpc_local_sem); static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq); static void rxrpc_destroy_local(struct work_struct *work); +static void rxrpc_process_local_events(struct work_struct *work); /* * allocate a new local @@ -37,11 +43,13 @@ struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx) INIT_WORK(&local->destroyer, &rxrpc_destroy_local); INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls); INIT_WORK(&local->rejecter, &rxrpc_reject_packets); + INIT_WORK(&local->event_processor, &rxrpc_process_local_events); INIT_LIST_HEAD(&local->services); INIT_LIST_HEAD(&local->link); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); + skb_queue_head_init(&local->event_queue); spin_lock_init(&local->lock); rwlock_init(&local->services_lock); atomic_set(&local->usage, 1); @@ -264,10 +272,12 @@ static void rxrpc_destroy_local(struct work_struct *work) ASSERT(list_empty(&local->services)); ASSERT(!work_pending(&local->acceptor)); ASSERT(!work_pending(&local->rejecter)); + ASSERT(!work_pending(&local->event_processor)); /* finish cleaning up the local descriptor */ rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); + rxrpc_purge_queue(&local->event_queue); kernel_sock_shutdown(local->socket, SHUT_RDWR); sock_release(local->socket); @@ -308,3 +318,91 @@ void __exit rxrpc_destroy_all_locals(void) _leave(""); } + +/* + * Reply to a version request + */ +static void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_header *hdr, + struct sk_buff *skb) +{ + struct sockaddr_in sin; + struct msghdr msg; + struct kvec iov[2]; + size_t len; + int ret; + + _enter(""); + + sin.sin_family = AF_INET; + sin.sin_port = udp_hdr(skb)->source; + sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + + msg.msg_name = &sin; + msg.msg_namelen = sizeof(sin); + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + hdr->seq = 0; + hdr->serial = 0; + hdr->type = RXRPC_PACKET_TYPE_VERSION; + hdr->flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED); + hdr->userStatus = 0; + hdr->_rsvd = 0; + + iov[0].iov_base = hdr; + iov[0].iov_len = sizeof(*hdr); + iov[1].iov_base = (char *)rxrpc_version_string; + iov[1].iov_len = sizeof(rxrpc_version_string); + + len = iov[0].iov_len + iov[1].iov_len; + + _proto("Tx VERSION (reply)"); + + ret = kernel_sendmsg(local->socket, &msg, iov, 2, len); + if (ret < 0) + _debug("sendmsg failed: %d", ret); + + _leave(""); +} + +/* + * Process event packets targetted at a local endpoint. + */ +static void rxrpc_process_local_events(struct work_struct *work) +{ + struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor); + struct sk_buff *skb; + char v; + + _enter(""); + + atomic_inc(&local->usage); + + while ((skb = skb_dequeue(&local->event_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + kdebug("{%d},{%u}", local->debug_id, sp->hdr.type); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: + if (skb_copy_bits(skb, 0, &v, 1) < 0) + return; + _proto("Rx VERSION { %02x }", v); + if (v == 0) + rxrpc_send_version_request(local, &sp->hdr, skb); + break; + + default: + /* Just ignore anything we don't understand */ + break; + } + + rxrpc_put_local(local); + rxrpc_free_skb(skb); + } + + rxrpc_put_local(local); + _leave(""); +} diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 8331c95e1522..c0042807bfc6 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -23,8 +23,7 @@ */ unsigned rxrpc_resend_timeout = 4 * HZ; -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len); @@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ -int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct rxrpc_transport *trans, struct msghdr *msg, - size_t len) +int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans, + struct msghdr *msg, size_t len) { struct rxrpc_conn_bundle *bundle; enum rxrpc_command cmd; @@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, /* request phase complete for this client call */ ret = -EPROTO; } else { - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); } rxrpc_put_call(call); @@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(NULL, call->socket, call, msg, len); + ret = rxrpc_send_data(call->socket, call, msg, len); } release_sock(&call->socket->sk); @@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call); * send a message through a server socket * - caller holds the socket locked */ -int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct msghdr *msg, size_t len) +int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_command cmd; struct rxrpc_call *call; @@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, break; } - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); break; case RXRPC_CMD_SEND_ABORT: @@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, * - must be called in process context * - caller holds the socket locked */ -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len) { @@ -546,11 +542,7 @@ static int rxrpc_send_data(struct kiocb *iocb, call->tx_pending = NULL; copied = 0; - if (len > iov_iter_count(&msg->msg_iter)) - len = iov_iter_count(&msg->msg_iter); - while (len) { - int copy; - + do { if (!skb) { size_t size, chunk, max, space; @@ -572,8 +564,8 @@ static int rxrpc_send_data(struct kiocb *iocb, max &= ~(call->conn->size_align - 1UL); chunk = max; - if (chunk > len && !more) - chunk = len; + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); @@ -616,23 +608,23 @@ static int rxrpc_send_data(struct kiocb *iocb, sp = rxrpc_skb(skb); /* append next segment of data to the current buffer */ - copy = skb_tailroom(skb); - ASSERTCMP(copy, >, 0); - if (copy > len) - copy = len; - if (copy > sp->remain) - copy = sp->remain; - - _debug("add"); - ret = skb_add_data(skb, &msg->msg_iter, copy); - _debug("added"); - if (ret < 0) - goto efault; - sp->remain -= copy; - skb->mark += copy; - copied += copy; - - len -= copy; + if (msg_data_left(msg) > 0) { + int copy = skb_tailroom(skb); + ASSERTCMP(copy, >, 0); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + if (copy > sp->remain) + copy = sp->remain; + + _debug("add"); + ret = skb_add_data(skb, &msg->msg_iter, copy); + _debug("added"); + if (ret < 0) + goto efault; + sp->remain -= copy; + skb->mark += copy; + copied += copy; + } /* check for the far side aborting the call or a network error * occurring */ @@ -640,7 +632,8 @@ static int rxrpc_send_data(struct kiocb *iocb, goto call_aborted; /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || (!len && !more)) { + if (sp->remain <= 0 || + (msg_data_left(msg) == 0 && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -670,7 +663,7 @@ static int rxrpc_send_data(struct kiocb *iocb, sp->hdr.serviceId = conn->service_id; sp->hdr.flags = conn->out_clientflag; - if (len == 0 && !more) + if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz) > 1) @@ -686,10 +679,10 @@ static int rxrpc_send_data(struct kiocb *iocb, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more); + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } - } + } while (msg_data_left(msg) > 0); success: ret = copied; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 19a560626dc4..b92beded7459 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) * - we need to be careful about two or more threads calling recvmsg * simultaneously */ -int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct rxrpc_skb_priv *sp; struct rxrpc_call *call = NULL, *continue_call = NULL; @@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, &call->conn->trans->peer->srx, len); msg->msg_namelen = len; } - sock_recv_ts_and_drops(msg, &rx->sk, skb); + sock_recv_timestamp(msg, &rx->sk, skb); } /* receive the message */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 5f6288fa3f12..4d2cede17468 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -13,26 +13,40 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/filter.h> +#include <linux/bpf.h> + #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> -#define BPF_TAB_MASK 15 +#define BPF_TAB_MASK 15 +#define ACT_BPF_NAME_LEN 256 + +struct tcf_bpf_cfg { + struct bpf_prog *filter; + struct sock_filter *bpf_ops; + char *bpf_name; + u32 bpf_fd; + u16 bpf_num_ops; +}; -static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { - struct tcf_bpf *b = a->priv; + struct tcf_bpf *prog = act->priv; int action, filter_res; - spin_lock(&b->tcf_lock); + spin_lock(&prog->tcf_lock); - b->tcf_tm.lastuse = jiffies; - bstats_update(&b->tcf_bstats, skb); + prog->tcf_tm.lastuse = jiffies; + bstats_update(&prog->tcf_bstats, skb); - filter_res = BPF_PROG_RUN(b->filter, skb); + /* Needed here for accessing maps. */ + rcu_read_lock(); + filter_res = BPF_PROG_RUN(prog->filter, skb); + rcu_read_unlock(); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the @@ -52,52 +66,87 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, break; case TC_ACT_SHOT: action = filter_res; - b->tcf_qstats.drops++; + prog->tcf_qstats.drops++; break; case TC_ACT_UNSPEC: - action = b->tcf_action; + action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } - spin_unlock(&b->tcf_lock); + spin_unlock(&prog->tcf_lock); return action; } -static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, +static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) +{ + return !prog->bpf_ops; +} + +static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { unsigned char *tp = skb_tail_pointer(skb); - struct tcf_bpf *b = a->priv; + struct tcf_bpf *prog = act->priv; struct tc_act_bpf opt = { - .index = b->tcf_index, - .refcnt = b->tcf_refcnt - ref, - .bindcnt = b->tcf_bindcnt - bind, - .action = b->tcf_action, + .index = prog->tcf_index, + .refcnt = prog->tcf_refcnt - ref, + .bindcnt = prog->tcf_bindcnt - bind, + .action = prog->tcf_action, }; - struct tcf_t t; - struct nlattr *nla; + struct tcf_t tm; + int ret; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) - goto nla_put_failure; - - nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * - sizeof(struct sock_filter)); - if (!nla) + if (tcf_bpf_is_ebpf(prog)) + ret = tcf_bpf_dump_ebpf_info(prog, skb); + else + ret = tcf_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(b->tcf_tm.expires); - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) goto nla_put_failure; + return skb->len; nla_put_failure: @@ -107,36 +156,21 @@ nla_put_failure: static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, + [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; -static int tcf_bpf_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { - struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; - struct tc_act_bpf *parm; - struct tcf_bpf *b; - u16 bpf_size, bpf_num_ops; struct sock_filter *bpf_ops; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; + u16 bpf_size, bpf_num_ops; int ret; - if (!nla) - return -EINVAL; - - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); - if (ret < 0) - return ret; - - if (!tb[TCA_ACT_BPF_PARMS] || - !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) - return -EINVAL; - parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) return -EINVAL; @@ -146,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (!bpf_ops) + if (bpf_ops == NULL) return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto free_bpf_ops; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); - if (ret) + cfg->bpf_ops = bpf_ops; + cfg->bpf_num_ops = bpf_num_ops; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_ACT_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), + nla_len(tb[TCA_ACT_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + cfg->bpf_fd = bpf_fd; + cfg->bpf_name = name; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *act, + int replace, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *prog; + struct tcf_bpf_cfg cfg; + bool is_bpf, is_ebpf; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; + is_ebpf = tb[TCA_ACT_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_ACT_BPF_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + memset(&cfg, 0, sizeof(cfg)); + + ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : + tcf_bpf_init_from_efd(tb, &cfg); + if (ret < 0) + return ret; + + if (!tcf_hash_check(parm->index, act, bind)) { + ret = tcf_hash_create(parm->index, est, act, + sizeof(*prog), bind); + if (ret < 0) goto destroy_fp; ret = ACT_P_CREATED; } else { + /* Don't override defaults. */ if (bind) goto destroy_fp; - tcf_hash_release(a, bind); - if (!ovr) { + + tcf_hash_release(act, bind); + if (!replace) { ret = -EEXIST; goto destroy_fp; } } - b = to_bpf(a); - spin_lock_bh(&b->tcf_lock); - b->tcf_action = parm->action; - b->bpf_num_ops = bpf_num_ops; - b->bpf_ops = bpf_ops; - b->filter = fp; - spin_unlock_bh(&b->tcf_lock); + prog = to_bpf(act); + spin_lock_bh(&prog->tcf_lock); + + prog->bpf_ops = cfg.bpf_ops; + prog->bpf_name = cfg.bpf_name; + + if (cfg.bpf_num_ops) + prog->bpf_num_ops = cfg.bpf_num_ops; + if (cfg.bpf_fd) + prog->bpf_fd = cfg.bpf_fd; + + prog->tcf_action = parm->action; + prog->filter = cfg.filter; + + spin_unlock_bh(&prog->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(a); + tcf_hash_insert(act); + return ret; destroy_fp: - bpf_prog_destroy(fp); -free_bpf_ops: - kfree(bpf_ops); + if (is_ebpf) + bpf_prog_put(cfg.filter); + else + bpf_prog_destroy(cfg.filter); + + kfree(cfg.bpf_ops); + kfree(cfg.bpf_name); + return ret; } -static void tcf_bpf_cleanup(struct tc_action *a, int bind) +static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - struct tcf_bpf *b = a->priv; + const struct tcf_bpf *prog = act->priv; - bpf_prog_destroy(b->filter); + if (tcf_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); } -static struct tc_action_ops act_bpf_ops = { - .kind = "bpf", - .type = TCA_ACT_BPF, - .owner = THIS_MODULE, - .act = tcf_bpf, - .dump = tcf_bpf_dump, - .cleanup = tcf_bpf_cleanup, - .init = tcf_bpf_init, +static struct tc_action_ops act_bpf_ops __read_mostly = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, }; static int __init bpf_init_module(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index baef987fe2c0..8b0470e418dc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -286,7 +286,7 @@ replay: RCU_INIT_POINTER(*back, next); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); - tcf_destroy(tp); + tcf_destroy(tp, true); err = 0; goto errout; } @@ -301,14 +301,20 @@ replay: err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_destroy(tp); + tcf_destroy(tp, true); goto errout; } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); - if (err == 0) + if (err == 0) { tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + if (tcf_destroy(tp, false)) { + struct tcf_proto *next = rtnl_dereference(tp->next); + + RCU_INIT_POINTER(*back, next); + } + } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); @@ -329,7 +335,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); } else { if (tp_created) - tcf_destroy(tp); + tcf_destroy(tp, true); } errout: diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index fc399db86f11..0b8c3ace671f 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -96,11 +96,14 @@ static void basic_delete_filter(struct rcu_head *head) kfree(f); } -static void basic_destroy(struct tcf_proto *tp) +static bool basic_destroy(struct tcf_proto *tp, bool force) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; + if (!force && !list_empty(&head->flist)) + return false; + list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); @@ -108,6 +111,7 @@ static void basic_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int basic_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5f3ee9e4b5bf..5c4171c5d2bd 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -16,6 +16,8 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/filter.h> +#include <linux/bpf.h> + #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> @@ -24,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_DESCRIPTION("TC BPF based classifier"); +#define CLS_BPF_NAME_LEN 256 + struct cls_bpf_head { struct list_head plist; u32 hgen; @@ -32,18 +36,24 @@ struct cls_bpf_head { struct cls_bpf_prog { struct bpf_prog *filter; - struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct tcf_result res; struct list_head link; + struct tcf_result res; + struct tcf_exts exts; u32 handle; - u16 bpf_num_ops; + union { + u32 bpf_fd; + u16 bpf_num_ops; + }; + struct sock_filter *bpf_ops; + const char *bpf_name; struct tcf_proto *tp; struct rcu_head rcu; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_FD] = { .type = NLA_U32 }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -54,8 +64,10 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); struct cls_bpf_prog *prog; - int ret; + int ret = -1; + /* Needed here for accessing maps. */ + rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { int filter_res = BPF_PROG_RUN(prog->filter, skb); @@ -70,10 +82,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (ret < 0) continue; - return ret; + break; } + rcu_read_unlock(); - return -1; + return ret; +} + +static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) +{ + return !prog->bpf_ops; } static int cls_bpf_init(struct tcf_proto *tp) @@ -94,8 +112,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); - bpf_prog_destroy(prog->filter); + if (cls_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); + kfree(prog->bpf_name); kfree(prog->bpf_ops); kfree(prog); } @@ -114,14 +136,18 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); + return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp) +static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; + if (!force && !list_empty(&head->plist)) + return false; + list_for_each_entry_safe(prog, tmp, &head->plist, link) { list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); @@ -130,6 +156,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) @@ -151,69 +178,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_prog_from_ops(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) { struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; u16 bpf_size, bpf_num_ops; - u32 classid; int ret; - if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) - return -EINVAL; - - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - return ret; - - classid = nla_get_u32(tb[TCA_BPF_CLASSID]); bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { - ret = -EINVAL; - goto errout; - } + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; bpf_size = bpf_num_ops * sizeof(*bpf_ops); - if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { - ret = -EINVAL; - goto errout; - } + if (bpf_size != nla_len(tb[TCA_BPF_OPS])) + return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (bpf_ops == NULL) { - ret = -ENOMEM; - goto errout; - } + if (bpf_ops == NULL) + return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto errout_free; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; + prog->bpf_num_ops = bpf_num_ops; + prog->bpf_name = NULL; + + prog->filter = fp; + prog->res.classid = classid; + + return 0; +} + +static int cls_bpf_prog_from_efd(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_BPF_NAME]), + nla_len(tb[TCA_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + prog->bpf_ops = NULL; + prog->bpf_fd = bpf_fd; + prog->bpf_name = name; + prog->filter = fp; prog->res.classid = classid; + return 0; +} + +static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts exts; + bool is_bpf, is_ebpf; + u32 classid; + int ret; + + is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; + is_ebpf = tb[TCA_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_BPF_CLASSID]) + return -EINVAL; + + tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + return ret; + + classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + + ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : + cls_bpf_prog_from_efd(tb, prog, classid); + if (ret < 0) { + tcf_exts_destroy(&exts); + return ret; + } + tcf_bind_filter(tp, &prog->res, base); tcf_exts_change(tp, &prog->exts, &exts); return 0; -errout_free: - kfree(bpf_ops); -errout: - tcf_exts_destroy(&exts); - return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -297,11 +376,43 @@ errout: return ret; } +static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *tm) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; - struct nlattr *nest, *nla; + struct nlattr *nest; + int ret; if (prog == NULL) return skb->len; @@ -314,16 +425,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) - goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * - sizeof(struct sock_filter)); - if (nla == NULL) + if (cls_bpf_is_ebpf(prog)) + ret = cls_bpf_dump_ebpf_info(prog, skb); + else + ret = cls_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); - if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 221697ab0247..ea611b216412 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -143,14 +143,18 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp) +static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); + if (!force) + return false; + if (head) { RCU_INIT_POINTER(tp->root, NULL); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); } + return true; } static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 461410394d08..a620c4e288a5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -557,17 +557,21 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp) +static bool flow_destroy(struct tcf_proto *tp, bool force) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; + if (!force && !list_empty(&head->filters)) + return false; + list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static unsigned long flow_get(struct tcf_proto *tp, u32 handle) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index a5269f76004c..715e01e5910a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,6 +33,7 @@ struct fw_head { u32 mask; + bool mask_set; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; @@ -113,6 +114,14 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) static int fw_init(struct tcf_proto *tp) { + struct fw_head *head; + + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->mask_set = false; + rcu_assign_pointer(tp->root, head); return 0; } @@ -124,14 +133,20 @@ static void fw_delete_filter(struct rcu_head *head) kfree(f); } -static void fw_destroy(struct tcf_proto *tp) +static bool fw_destroy(struct tcf_proto *tp, bool force) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; int h; if (head == NULL) - return; + return true; + + if (!force) { + for (h = 0; h < HTSIZE; h++) + if (rcu_access_pointer(head->ht[h])) + return false; + } for (h = 0; h < HTSIZE; h++) { while ((f = rtnl_dereference(head->ht[h])) != NULL) { @@ -143,6 +158,7 @@ static void fw_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int fw_delete(struct tcf_proto *tp, unsigned long arg) @@ -286,17 +302,11 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!handle) return -EINVAL; - if (head == NULL) { - u32 mask = 0xFFFFFFFF; + if (!head->mask_set) { + head->mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) - mask = nla_get_u32(tb[TCA_FW_MASK]); - - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; - head->mask = mask; - - rcu_assign_pointer(tp->root, head); + head->mask = nla_get_u32(tb[TCA_FW_MASK]); + head->mask_set = true; } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 2ecd24688554..08a3b0a6f5ab 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -258,6 +258,13 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) static int route4_init(struct tcf_proto *tp) { + struct route4_head *head; + + head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + rcu_assign_pointer(tp->root, head); return 0; } @@ -270,13 +277,20 @@ route4_delete_filter(struct rcu_head *head) kfree(f); } -static void route4_destroy(struct tcf_proto *tp) +static bool route4_destroy(struct tcf_proto *tp, bool force) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; if (head == NULL) - return; + return true; + + if (!force) { + for (h1 = 0; h1 <= 256; h1++) { + if (rcu_access_pointer(head->table[h1])) + return false; + } + } for (h1 = 0; h1 <= 256; h1++) { struct route4_bucket *b; @@ -301,6 +315,7 @@ static void route4_destroy(struct tcf_proto *tp) } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); + return true; } static int route4_delete(struct tcf_proto *tp, unsigned long arg) @@ -484,13 +499,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; err = -ENOBUFS; - if (head == NULL) { - head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); - if (head == NULL) - goto errout; - rcu_assign_pointer(tp->root, head); - } - f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); if (!f) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index edd8ade3fbc1..02fa82792dab 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -291,13 +291,20 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) kfree_rcu(f, rcu); } -static void rsvp_destroy(struct tcf_proto *tp) +static bool rsvp_destroy(struct tcf_proto *tp, bool force) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; if (data == NULL) - return; + return true; + + if (!force) { + for (h1 = 0; h1 < 256; h1++) { + if (rcu_access_pointer(data->ht[h1])) + return false; + } + } RCU_INIT_POINTER(tp->root, NULL); @@ -319,6 +326,7 @@ static void rsvp_destroy(struct tcf_proto *tp) } } kfree_rcu(data, rcu); + return true; } static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index bd49bf547a47..a557dbaf5afe 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -468,11 +468,14 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp) +static bool tcindex_destroy(struct tcf_proto *tp, bool force) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; + if (!force) + return false; + pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); walker.count = 0; walker.skip = 0; @@ -481,6 +484,7 @@ static void tcindex_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); + return true; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 95fdf4e40051..cab9e9b43967 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -463,13 +463,35 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp) +static bool ht_empty(struct tc_u_hnode *ht) +{ + unsigned int h; + + for (h = 0; h <= ht->divisor; h++) + if (rcu_access_pointer(ht->ht[h])) + return false; + + return true; +} + +static bool u32_destroy(struct tcf_proto *tp, bool force) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); + if (!force) { + if (root_ht) { + if (root_ht->refcnt > 1) + return false; + if (root_ht->refcnt == 1) { + if (!ht_empty(root_ht)) + return false; + } + } + } + if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); @@ -494,6 +516,7 @@ static void u32_destroy(struct tcf_proto *tp) } tp->data = NULL; + return true; } static int u32_delete(struct tcf_proto *tp, unsigned long arg) diff --git a/net/sched/em_text.c b/net/sched/em_text.c index f03c3de16c27..73e2ed576ceb 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -34,7 +34,6 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, { struct text_match *tm = EM_TEXT_PRIV(m); int from, to; - struct ts_state state; from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data; from += tm->from_offset; @@ -42,7 +41,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data; to += tm->to_offset; - return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX; + return skb_find_text(skb, from, to, tm->config) != UINT_MAX; } static int em_text_change(struct net *net, void *data, int len, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 243b7d169d61..ad9eed70bc8f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1858,11 +1858,15 @@ reclassify: } EXPORT_SYMBOL(tc_classify); -void tcf_destroy(struct tcf_proto *tp) +bool tcf_destroy(struct tcf_proto *tp, bool force) { - tp->ops->destroy(tp); - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + + return false; } void tcf_destroy_chain(struct tcf_proto __rcu **fl) @@ -1871,7 +1875,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl) while ((tp = rtnl_dereference(*fl)) != NULL) { RCU_INIT_POINTER(*fl, tp->next); - tcf_destroy(tp); + tcf_destroy(tp, true); } } EXPORT_SYMBOL(tcf_destroy_chain); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index dfcea20e3171..f377702d4b91 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Meant to be mostly used for localy generated traffic : + * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash. * All packets belonging to a socket are considered as a 'flow'. @@ -63,7 +63,7 @@ struct fq_flow { struct sk_buff *tail; /* last skb in the list */ unsigned long age; /* jiffies when flow was emptied, for gc */ }; - struct rb_node fq_node; /* anchor in fq_root[] trees */ + struct rb_node fq_node; /* anchor in fq_root[] trees */ struct sock *sk; int qlen; /* number of packets in flow queue */ int credit; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index eb5b8445fef9..4cdbfb85686a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* ------------------------------------------------------------- */ +static int ingress_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + + return 0; +} + static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); tcf_destroy_chain(&p->filter_list); + net_dec_ingress_queue(); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .id = "ingress", .priv_size = sizeof(struct ingress_qdisc_data), .enqueue = ingress_enqueue, + .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .owner = THIS_MODULE, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 179f1c8c0d8b..956ead2cab9a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __skb_dequeue(&sch->q); if (skb) { -deliver: qdisc_qstats_backlog_dec(sch, skb); +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; @@ -578,6 +578,7 @@ deliver: rb_erase(p, &q->t_root); sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8f34b27d5775..53b7acde9aa3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1322,8 +1322,7 @@ static __init int sctp_init(void) int max_share; int order; - BUILD_BUG_ON(sizeof(struct sctp_ulpevent) > - sizeof(((struct sk_buff *) 0)->cb)); + sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aafe94bf292e..f09de7fac2e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -102,11 +102,6 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -extern struct kmem_cache *sctp_bucket_cachep; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; @@ -1586,8 +1581,7 @@ static int sctp_error(struct sock *sk, int flags, int err) static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2066,9 +2060,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2e9ada10fd84..26d50c565f54 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -58,10 +58,6 @@ static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/net/socket.c b/net/socket.c index 1dbff3e60437..5b0126234606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,59 +610,27 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - return sock->ops->sendmsg(iocb, sock, msg, size); -} - -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) -{ - int err = security_socket_sendmsg(sock, msg, size); - - return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); -} - -static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, - size_t size, bool nosec) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : - __sock_sendmsg(&iocb, sock, msg, size); + int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - return do_sock_sendmsg(sock, msg, size, false); -} -EXPORT_SYMBOL(sock_sendmsg); + int err = security_socket_sendmsg(sock, msg, + msg_data_left(msg)); -static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, true); + return err ?: sock_sendmsg_nosec(sock, msg); } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { - mm_segment_t oldfs = get_fs(); - int result; - - set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size); - result = sock_sendmsg(sock, msg, size); - set_fs(oldfs); - return result; + iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); @@ -730,9 +698,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, - sizeof(__u32), &skb->dropcount); + sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, @@ -743,45 +711,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, size, flags); } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { int err = security_socket_recvmsg(sock, msg, size, flags); - return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - BUG_ON(ret == -EIOCBQUEUED); - return ret; + return err ?: sock_recvmsg_nosec(sock, msg, size, flags); } EXPORT_SYMBOL(sock_recvmsg); -static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - BUG_ON(ret == -EIOCBQUEUED); - return ret; -} - /** * kernel_recvmsg - Receive a message from a socket (kernel space) * @sock: The socket to receive the message from @@ -803,12 +747,8 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, mm_segment_t oldfs = get_fs(); int result; + iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size); result = sock_recvmsg(sock, msg, size, flags); set_fs(oldfs); return result; @@ -846,7 +786,8 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *to}; + struct msghdr msg = {.msg_iter = *to, + .msg_iocb = iocb}; ssize_t res; if (file->f_flags & O_NONBLOCK) @@ -858,8 +799,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - res = __sock_recvmsg(iocb, sock, &msg, - iov_iter_count(to), msg.msg_flags); + res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); *to = msg.msg_iter; return res; } @@ -868,7 +808,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *from}; + struct msghdr msg = {.msg_iter = *from, + .msg_iocb = iocb}; ssize_t res; if (iocb->ki_pos != 0) @@ -880,7 +821,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from)); + res = sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -1697,18 +1638,14 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, struct iovec iov; int fput_needed; - if (len > INT_MAX) - len = INT_MAX; - if (unlikely(!access_ok(VERIFY_READ, buff, len))) - return -EFAULT; + err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - iov.iov_base = buff; - iov.iov_len = len; msg.msg_name = NULL; - iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_namelen = 0; @@ -1722,7 +1659,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg, len); + err = sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -1757,26 +1694,22 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, int err, err2; int fput_needed; - if (size > INT_MAX) - size = INT_MAX; - if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) - return -EFAULT; + err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; - iov.iov_len = size; - iov.iov_base = ubuf; - iov_iter_init(&msg.msg_iter, READ, &iov, 1, size); /* Save some cycles and don't copy the address if not needed */ msg.msg_name = addr ? (struct sockaddr *)&address : NULL; /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, size, flags); + err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -1896,10 +1829,10 @@ struct used_address { unsigned int name_len; }; -static ssize_t copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec **iov) +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct user_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec **iov) { struct sockaddr __user *uaddr; struct iovec __user *uiov; @@ -1943,13 +1876,10 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; - err = rw_copy_check_uvector(save_addr ? READ : WRITE, - uiov, nr_segs, - UIO_FASTIOV, *iov, iov); - if (err >= 0) - iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE, - *iov, nr_segs, err); - return err; + kmsg->msg_iocb = NULL; + + return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs, + UIO_FASTIOV, iov, &kmsg->msg_iter); } static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, @@ -1964,7 +1894,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, __attribute__ ((aligned(sizeof(__kernel_size_t)))); /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; - int ctl_len, total_len; + int ctl_len; ssize_t err; msg_sys->msg_name = &address; @@ -1974,8 +1904,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, else err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov); if (err < 0) - goto out_freeiov; - total_len = err; + return err; err = -ENOBUFS; @@ -2022,10 +1951,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, used_address->name_len == msg_sys->msg_namelen && !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { - err = sock_sendmsg_nosec(sock, msg_sys, total_len); + err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys, total_len); + err = sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. @@ -2041,8 +1970,7 @@ out_freectl: if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: - if (iov != iovstack) - kfree(iov); + kfree(iov); return err; } @@ -2167,8 +2095,8 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, else err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) - goto out_freeiov; - total_len = err; + return err; + total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2206,8 +2134,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = len; out_freeiov: - if (iov != iovstack) - kfree(iov); + kfree(iov); return err; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc331b6cf573..0c8120229a03 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,7 +257,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) svc_set_cmsg_data(rqstp, cmh); - if (sock_sendmsg(sock, &msg, 0) < 0) + if (sock_sendmsg(sock, &msg) < 0) goto out; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8c1e558db118..46568b85c333 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1,6 +1,7 @@ /* * net/switchdev/switchdev.c - Switch device API * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,6 +15,7 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <net/ip_fib.h> #include <net/switchdev.h> /** @@ -26,13 +28,13 @@ int netdev_switch_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid) { - const struct net_device_ops *ops = dev->netdev_ops; + const struct swdev_ops *ops = dev->swdev_ops; - if (!ops->ndo_switch_parent_id_get) + if (!ops || !ops->swdev_parent_id_get) return -EOPNOTSUPP; - return ops->ndo_switch_parent_id_get(dev, psid); + return ops->swdev_parent_id_get(dev, psid); } -EXPORT_SYMBOL(netdev_switch_parent_id_get); +EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get); /** * netdev_switch_port_stp_update - Notify switch device port of STP @@ -44,20 +46,29 @@ EXPORT_SYMBOL(netdev_switch_parent_id_get); */ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) { - const struct net_device_ops *ops = dev->netdev_ops; + const struct swdev_ops *ops = dev->swdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; - if (!ops->ndo_switch_port_stp_update) - return -EOPNOTSUPP; - WARN_ON(!ops->ndo_switch_parent_id_get); - return ops->ndo_switch_port_stp_update(dev, state); + if (ops && ops->swdev_port_stp_update) + return ops->swdev_port_stp_update(dev, state); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_stp_update(lower_dev, state); + if (err && err != -EOPNOTSUPP) + return err; + } + + return err; } -EXPORT_SYMBOL(netdev_switch_port_stp_update); +EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); static DEFINE_MUTEX(netdev_switch_mutex); static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); /** - * register_netdev_switch_notifier - Register nofifier + * register_netdev_switch_notifier - Register notifier * @nb: notifier_block * * Register switch device notifier. This should be used by code @@ -73,10 +84,10 @@ int register_netdev_switch_notifier(struct notifier_block *nb) mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(register_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); /** - * unregister_netdev_switch_notifier - Unregister nofifier + * unregister_netdev_switch_notifier - Unregister notifier * @nb: notifier_block * * Unregister switch device notifier. @@ -91,10 +102,10 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb) mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(unregister_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); /** - * call_netdev_switch_notifiers - Call nofifiers + * call_netdev_switch_notifiers - Call notifiers * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data @@ -114,7 +125,7 @@ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, mutex_unlock(&netdev_switch_mutex); return err; } -EXPORT_SYMBOL(call_netdev_switch_notifiers); +EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); /** * netdev_switch_port_bridge_setlink - Notify switch device port of bridge @@ -139,7 +150,7 @@ int netdev_switch_port_bridge_setlink(struct net_device *dev, return ops->ndo_bridge_setlink(dev, nlh, flags); } -EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); /** * netdev_switch_port_bridge_dellink - Notify switch device port of bridge @@ -164,7 +175,7 @@ int netdev_switch_port_bridge_dellink(struct net_device *dev, return ops->ndo_bridge_dellink(dev, nlh, flags); } -EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); /** * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink @@ -194,7 +205,7 @@ int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, return ret; } -EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); /** * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink @@ -224,4 +235,170 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, return ret; } -EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); + +static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +{ + const struct swdev_ops *ops = dev->swdev_ops; + struct net_device *lower_dev; + struct net_device *port_dev; + struct list_head *iter; + + /* Recusively search down until we find a sw port dev. + * (A sw port dev supports swdev_parent_id_get). + */ + + if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && + ops && ops->swdev_parent_id_get) + return dev; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + port_dev = netdev_switch_get_lowest_dev(lower_dev); + if (port_dev) + return port_dev; + } + + return NULL; +} + +static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +{ + struct netdev_phys_item_id psid; + struct netdev_phys_item_id prev_psid; + struct net_device *dev = NULL; + int nhsel; + + /* For this route, all nexthop devs must be on the same switch. */ + + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (!nh->nh_dev) + return NULL; + + dev = netdev_switch_get_lowest_dev(nh->nh_dev); + if (!dev) + return NULL; + + if (netdev_switch_parent_id_get(dev, &psid)) + return NULL; + + if (nhsel > 0) { + if (prev_psid.id_len != psid.id_len) + return NULL; + if (memcmp(prev_psid.id, psid.id, psid.id_len)) + return NULL; + } + + prev_psid = psid; + } + + return dev; +} + +/** + * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @nlflags: netlink flags passed in (NLM_F_*) + * @tb_id: route table ID + * + * Add IPv4 route entry to switch device. + */ +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id) +{ + struct net_device *dev; + const struct swdev_ops *ops; + int err = 0; + + /* Don't offload route if using custom ip rules or if + * IPv4 FIB offloading has been disabled completely. + */ + +#ifdef CONFIG_IP_MULTIPLE_TABLES + if (fi->fib_net->ipv4.fib_has_custom_rules) + return 0; +#endif + + if (fi->fib_net->ipv4.fib_offload_disabled) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->swdev_ops; + + if (ops->swdev_fib_ipv4_add) { + err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, nlflags, + tb_id); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); + +/** + * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Delete IPv4 route entry from switch device. + */ +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct net_device *dev; + const struct swdev_ops *ops; + int err = 0; + + if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->swdev_ops; + + if (ops->swdev_fib_ipv4_del) { + err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); + +/** + * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * + * @fi: route FIB info structure + */ +void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +{ + /* There was a problem installing this route to the offload + * device. For now, until we come up with more refined + * policy handling, abruptly end IPv4 fib offloading for + * for entire net by flushing offload device(s) of all + * IPv4 routes, and mark IPv4 fib offloading broken from + * this point forward. + */ + + fib_flush_external(fi->fib_net); + fi->fib_net->ipv4.fib_offload_disabled = true; +} +EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 91c8a8e031db..c25a3a149dc4 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -26,3 +26,11 @@ config TIPC_MEDIA_IB help Saying Y here will enable support for running TIPC on IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 599b1a540d2b..57e460be4692 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -10,5 +10,6 @@ tipc-y += addr.o bcast.o bearer.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..ba7daa864d44 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -38,6 +38,13 @@ #include "addr.h" #include "core.h" +u32 tipc_own_addr(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->own_addr; +} + /** * in_own_cluster - test for cluster inclusion; <0.0.0> always matches */ diff --git a/net/tipc/addr.h b/net/tipc/addr.h index c700c2d28e09..7ba6d5c8ae40 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -55,6 +55,7 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } +u32 tipc_own_addr(struct net *net); int in_own_cluster(struct net *net, u32 addr); int in_own_cluster_exact(struct net *net, u32 addr); int in_own_node(struct net *net, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3e41704832de..c5cbdcb1f0b5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -135,9 +115,10 @@ static void bclink_set_last_sent(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *bcl = tn->bcl; + struct sk_buff *skb = skb_peek(&bcl->backlogq); - if (bcl->next_out) - bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); + if (skb) + bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } @@ -155,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -180,7 +160,7 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) struct sk_buff *skb; struct tipc_link *bcl = tn->bcl; - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&bcl->transmq, skb) { if (more(buf_seqno(skb), after)) { tipc_link_retransmit(bcl, skb, mod(to - after)); break; @@ -210,14 +190,17 @@ void tipc_bclink_wakeup_users(struct net *net) void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { struct sk_buff *skb, *tmp; - struct sk_buff *next; unsigned int released = 0; struct net *net = n_ptr->net; struct tipc_net *tn = net_generic(net, tipc_net_id); + if (unlikely(!n_ptr->bclink.recv_permitted)) + return; + tipc_bclink_lock(net); + /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&tn->bcl->outqueue); + skb = skb_peek(&tn->bcl->transmq); if (!skb) goto exit; @@ -244,27 +227,19 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&tn->bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->transmq, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - - next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); - if (skb != tn->bcl->next_out) { - bcbuf_decr_acks(skb); - } else { - bcbuf_set_acks(skb, 0); - tn->bcl->next_out = next; - bclink_set_last_sent(net); - } - + bcbuf_decr_acks(skb); + bclink_set_last_sent(net); if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &tn->bcl->outqueue); + __skb_unlink(skb, &tn->bcl->transmq); kfree_skb(skb); released = 1; } @@ -272,7 +247,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(tn->bcl->next_out)) { + if (unlikely(skb_peek(&tn->bcl->backlogq))) { tipc_link_push_packets(tn->bcl); bclink_set_last_sent(net); } @@ -319,7 +294,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); + struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, @@ -354,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -387,14 +361,13 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) __skb_queue_purge(list); return -EHOSTUNREACH; } - /* Broadcast to all nodes */ if (likely(bclink)) { tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { - u32 len = skb_queue_len(&bcl->outqueue); + u32 len = skb_queue_len(&bcl->transmq); bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; @@ -440,7 +413,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) */ if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], - STATE_MSG, 0, 0, 0, 0, 0); + STATE_MSG, 0, 0, 0, 0); tn->bcl->stats.sent_acks++; } } @@ -481,17 +454,18 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -528,11 +502,13 @@ receive: tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) - goto unlock; tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) { + tipc_bclink_unlock(net); + goto unlock; + } bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; @@ -559,25 +535,25 @@ receive: if (node->bclink.last_in == node->bclink.last_sent) goto unlock; - if (skb_queue_empty(&node->bclink.deferred_queue)) { + if (skb_queue_empty(&node->bclink.deferdq)) { node->bclink.oos_state = 1; goto unlock; } - msg = buf_msg(skb_peek(&node->bclink.deferred_queue)); + msg = buf_msg(skb_peek(&node->bclink.deferdq)); seqno = msg_seqno(msg); next_in = mod(next_in + 1); if (seqno != next_in) goto unlock; /* Take in-sequence message from deferred queue & deliver it */ - buf = __skb_dequeue(&node->bclink.deferred_queue); + buf = __skb_dequeue(&node->bclink.deferdq); goto receive; } /* Handle out-of-sequence broadcast message */ if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferred_queue, + deferred = tipc_link_defer_pkt(&node->bclink.deferdq, buf); bclink_update_last_sent(node, seqno); buf = NULL; @@ -594,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } @@ -634,7 +611,6 @@ static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); tn->bcl->stats.sent_info++; - if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); return 0; @@ -835,7 +811,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->queue_limit[0])) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; nla_nest_end(msg->skb, prop); @@ -913,8 +889,9 @@ int tipc_bclink_init(struct net *net) sprintf(bcbearer->media.name, "tipc-broadcast"); spin_lock_init(&bclink->lock); - __skb_queue_head_init(&bcl->outqueue); - __skb_queue_head_init(&bcl->deferred_queue); + __skb_queue_head_init(&bcl->transmq); + __skb_queue_head_init(&bcl->backlogq); + __skb_queue_head_init(&bcl->deferdq); skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); @@ -922,7 +899,7 @@ int tipc_bclink_init(struct net *net) skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; bcl->owner->net = net; - bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; + bcl->mtu = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 48852c2dcc03..3613e72e858e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -48,6 +48,9 @@ static struct tipc_media * const media_info_array[] = { #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, #endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, +#endif NULL }; @@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * tipc_enable_bearer - enable bearer with the given name */ static int tipc_enable_bearer(struct net *net, const char *name, - u32 disc_domain, u32 priority) + u32 disc_domain, u32 priority, + struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; @@ -304,7 +308,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr); + res = m_ptr->enable_media(net, b_ptr, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, kfree_rcu(b_ptr, rcu); } -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; @@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio, attrs); if (err) { rtnl_unlock(); return err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6b17795ff8bc..5cad243ee8fc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -41,7 +41,7 @@ #include <net/genetlink.h> #define MAX_BEARERS 2 -#define MAX_MEDIA 2 +#define MAX_MEDIA 3 #define MAX_NODES 4096 #define WSIZE 32 @@ -50,14 +50,16 @@ * - the field's actual content and length is defined per media * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 32 +#define TIPC_MEDIA_INFO_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 /* * Identifiers of supported TIPC media types */ #define TIPC_MEDIA_TYPE_ETH 1 #define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 /** * struct tipc_node_map - set of node identifiers @@ -76,7 +78,7 @@ struct tipc_node_map { * @broadcast: non-zero if address is a broadcast address */ struct tipc_media_addr { - u8 value[TIPC_MEDIA_ADDR_SIZE]; + u8 value[TIPC_MEDIA_INFO_SIZE]; u8 media_id; u8 broadcast; }; @@ -103,7 +105,8 @@ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + struct nlattr *attr[]); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -182,6 +185,9 @@ extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB extern struct tipc_media ib_media_info; #endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); @@ -196,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index feef3753615d..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -86,9 +86,10 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, - INT_H_SIZE, dest_domain); + MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); + msg_set_node_capabilities(msg, 0); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); @@ -133,6 +134,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, u32 net_id = msg_bc_netid(msg); u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); + u16 caps = msg_node_capabilities(msg); bool addr_match = false; bool sign_match = false; bool link_up = false; @@ -167,6 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, if (!node) return; tipc_node_lock(node); + node->capabilities = caps; link = node->links[bearer->identity]; /* Prepare to validate requesting node's signature and media address */ @@ -249,7 +252,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rbuf = tipc_buf_acquire(INT_H_SIZE); + rbuf = tipc_buf_acquire(MAX_H_SIZE); if (rbuf) { tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); tipc_bearer_send(net, bearer->identity, rbuf, &maddr); @@ -257,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -359,8 +363,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - - req->buf = tipc_buf_acquire(INT_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 5e1426f1751f..f69a2fde9f4a 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -37,8 +37,6 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ - /* Convert Ethernet address (media address format) to string */ static int tipc_eth_addr2str(struct tipc_media_addr *addr, char *strbuf, int bufsz) @@ -53,9 +51,9 @@ static int tipc_eth_addr2str(struct tipc_media_addr *addr, /* Convert from media address format to discovery message addr format */ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } @@ -79,7 +77,7 @@ static int tipc_eth_msg2addr(struct tipc_bearer *b, char *msg) { /* Skip past preamble: */ - msg += ETH_ADDR_OFFSET; + msg += TIPC_MEDIA_ADDR_OFFSET; return tipc_eth_raw2addr(b, addr, msg); } diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 8522eef9c136..e8c16718e3fa 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -57,7 +57,7 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, /* Convert from media address format to discovery message addr format */ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 14f09b3cb87c..a6b30df6ec02 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012-2014, Ericsson AB + * Copyright (c) 1996-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -35,6 +35,7 @@ */ #include "core.h" +#include "subscr.h" #include "link.h" #include "bcast.h" #include "socket.h" @@ -88,24 +89,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { #define TIMEOUT_EVT 560817u /* link timer expired */ /* - * The following two 'message types' is really just implementation - * data conveniently stored in the message header. - * They must not be considered part of the protocol + * State value stored in 'failover_pkts' */ -#define OPEN_MSG 0 -#define CLOSED_MSG 1 - -/* - * State value stored in 'exp_msg_count' - */ -#define START_CHANGEOVER 100000u +#define FIRST_FAILOVER 0xffffu static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); static void tipc_link_proto_rcv(struct tipc_link *link, struct sk_buff *skb); -static int tipc_link_tunnel_rcv(struct tipc_node *node, - struct sk_buff **skb); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); @@ -114,7 +105,7 @@ static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); - +static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); /* * Simple link routines */ @@ -138,32 +129,11 @@ static void tipc_link_put(struct tipc_link *l_ptr) kref_put(&l_ptr->ref, tipc_link_release); } -static void link_init_max_pkt(struct tipc_link *l_ptr) +static struct tipc_link *tipc_parallel_link(struct tipc_link *l) { - struct tipc_node *node = l_ptr->owner; - struct tipc_net *tn = net_generic(node->net, tipc_net_id); - struct tipc_bearer *b_ptr; - u32 max_pkt; - - rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); - if (!b_ptr) { - rcu_read_unlock(); - return; - } - max_pkt = (b_ptr->mtu & ~3); - rcu_read_unlock(); - - if (max_pkt > MAX_MSG_SIZE) - max_pkt = MAX_MSG_SIZE; - - l_ptr->max_pkt_target = max_pkt; - if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT) - l_ptr->max_pkt = l_ptr->max_pkt_target; - else - l_ptr->max_pkt = MAX_PKT_DEFAULT; - - l_ptr->max_pkt_probes = 0; + if (l->owner->active_links[0] != l) + return l->owner->active_links[0]; + return l->owner->active_links[1]; } /* @@ -194,10 +164,10 @@ static void link_timeout(unsigned long data) tipc_node_lock(l_ptr->owner); /* update counters used in statistical profiling of send traffic */ - l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->outqueue); + l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq); l_ptr->stats.queue_sz_counts++; - skb = skb_peek(&l_ptr->outqueue); + skb = skb_peek(&l_ptr->transmq); if (skb) { struct tipc_msg *msg = buf_msg(skb); u32 length = msg_size(msg); @@ -229,7 +199,7 @@ static void link_timeout(unsigned long data) /* do all other link processing performed on a periodic basis */ link_state_event(l_ptr, TIMEOUT_EVT); - if (l_ptr->next_out) + if (skb_queue_len(&l_ptr->backlogq)) tipc_link_push_packets(l_ptr); tipc_node_unlock(l_ptr->owner); @@ -305,16 +275,15 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); - + l_ptr->net_plane = b_ptr->net_plane; + l_ptr->advertised_mtu = b_ptr->mtu; + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); - - l_ptr->net_plane = b_ptr->net_plane; - link_init_max_pkt(l_ptr); - l_ptr->next_out_no = 1; - __skb_queue_head_init(&l_ptr->outqueue); - __skb_queue_head_init(&l_ptr->deferred_queue); + __skb_queue_head_init(&l_ptr->transmq); + __skb_queue_head_init(&l_ptr->backlogq); + __skb_queue_head_init(&l_ptr->deferdq); skb_queue_head_init(&l_ptr->wakeupq); skb_queue_head_init(&l_ptr->inputq); skb_queue_head_init(&l_ptr->namedq); @@ -327,15 +296,19 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, } /** - * link_delete - Conditional deletion of link. - * If timer still running, real delete is done when it expires - * @link: link to be deleted + * tipc_link_delete - Delete a link + * @l: link to be deleted */ -void tipc_link_delete(struct tipc_link *link) +void tipc_link_delete(struct tipc_link *l) { - tipc_link_reset_fragments(link); - tipc_node_detach_link(link->owner, link); - tipc_link_put(link); + tipc_link_reset(l); + if (del_timer(&l->timer)) + tipc_link_put(l); + l->flags |= LINK_STOPPED; + /* Delete link now, or when timer is finished: */ + tipc_link_reset_fragments(l); + tipc_node_detach_link(l->owner, l); + tipc_link_put(l); } void tipc_link_delete_list(struct net *net, unsigned int bearer_id, @@ -349,16 +322,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); link = node->links[bearer_id]; - if (!link) { - tipc_node_unlock(node); - continue; - } - tipc_link_reset(link); - if (del_timer(&link->timer)) - tipc_link_put(link); - link->flags |= LINK_STOPPED; - /* Delete link now, or when failover is finished: */ - if (shutting_down || !tipc_node_is_up(node)) + if (link) tipc_link_delete(link); tipc_node_unlock(node); } @@ -366,28 +330,43 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, } /** - * link_schedule_user - schedule user for wakeup after congestion + * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link - * @oport: sending port - * @chain_sz: size of buffer chain that was attempted sent - * @imp: importance of message attempted sent + * @list: message that was attempted sent * Create pseudo msg to send back to user when congestion abates + * Only consumes message if there is an error */ -static bool link_schedule_user(struct tipc_link *link, u32 oport, - uint chain_sz, uint imp) +static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) { - struct sk_buff *buf; + struct tipc_msg *msg = buf_msg(skb_peek(list)); + int imp = msg_importance(msg); + u32 oport = msg_origport(msg); + u32 addr = link_own_addr(link); + struct sk_buff *skb; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, - link_own_addr(link), link_own_addr(link), - oport, 0, 0); - if (!buf) - return false; - TIPC_SKB_CB(buf)->chain_sz = chain_sz; - TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->wakeupq, buf); + /* This really cannot happen... */ + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + goto err; + } + /* Non-blocking sender: */ + if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) + return -ELINKCONG; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + addr, addr, oport, 0, 0); + if (!skb) + goto err; + TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + TIPC_SKB_CB(skb)->chain_imp = imp; + skb_queue_tail(&link->wakeupq, skb); link->stats.link_congs++; - return true; + return -ELINKCONG; +err: + __skb_queue_purge(list); + return -ENOBUFS; } /** @@ -396,19 +375,22 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *l) { - uint pend_qsz = skb_queue_len(&link->outqueue); + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->wakeupq, skb, tmp) { - if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) + skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = l->window + l->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + l->backlog[imp].len) >= lim) break; - pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->wakeupq); - skb_queue_tail(&link->inputq, skb); - link->owner->inputq = &link->inputq; - link->owner->action_flags |= TIPC_MSG_EVT; + skb_unlink(skb, &l->wakeupq); + skb_queue_tail(&l->inputq, skb); + l->owner->inputq = &l->inputq; + l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -422,31 +404,42 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; } +static void tipc_link_purge_backlog(struct tipc_link *l) +{ + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; +} + /** * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link */ void tipc_link_purge_queues(struct tipc_link *l_ptr) { - __skb_queue_purge(&l_ptr->deferred_queue); - __skb_queue_purge(&l_ptr->outqueue); + __skb_queue_purge(&l_ptr->deferdq); + __skb_queue_purge(&l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); tipc_link_reset_fragments(l_ptr); } void tipc_link_reset(struct tipc_link *l_ptr) { u32 prev_state = l_ptr->state; - u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); struct tipc_node *owner = l_ptr->owner; + struct tipc_link *pl = tipc_parallel_link(l_ptr); msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); /* Link is down, accept any session */ l_ptr->peer_session = INVALID_SESSION; - /* Prepare for max packet size negotiation */ - link_init_max_pkt(l_ptr); + /* Prepare for renewed mtu size negotiation */ + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->state = RESET_UNKNOWN; @@ -456,21 +449,26 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner)) { - l_ptr->reset_checkpoint = checkpoint; - l_ptr->exp_msg_count = START_CHANGEOVER; + if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { + l_ptr->flags |= LINK_FAILINGOVER; + l_ptr->failover_checkpt = l_ptr->next_in_no; + pl->failover_pkts = FIRST_FAILOVER; + pl->failover_checkpt = l_ptr->next_in_no; + pl->failover_skb = l_ptr->reasm_buf; + } else { + kfree_skb(l_ptr->reasm_buf); } - /* Clean up all queues, except inputq: */ - __skb_queue_purge(&l_ptr->outqueue); - __skb_queue_purge(&l_ptr->deferred_queue); + __skb_queue_purge(&l_ptr->transmq); + __skb_queue_purge(&l_ptr->deferdq); if (!owner->inputq) owner->inputq = &l_ptr->inputq; skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; - l_ptr->next_out = NULL; - l_ptr->unacked_window = 0; + tipc_link_purge_backlog(l_ptr); + l_ptr->reasm_buf = NULL; + l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; l_ptr->fsm_msg_cnt = 0; @@ -521,8 +519,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ - /* Check whether changeover is going on */ - if (l_ptr->exp_msg_count) { + if (l_ptr->flags & LINK_FAILINGOVER) { if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); return; @@ -539,11 +536,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); @@ -551,7 +544,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } l_ptr->state = WORKING_UNKNOWN; l_ptr->fsm_msg_cnt = 0; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); break; @@ -562,7 +555,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -585,7 +578,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -596,13 +589,13 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ @@ -612,7 +605,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, RESET_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); } @@ -632,7 +625,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -642,7 +635,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -652,7 +645,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case TIMEOUT_EVT: - tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -670,7 +663,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -680,7 +673,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case TIMEOUT_EVT: tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -693,101 +686,65 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* tipc_link_cong: determine return value and how to treat the - * sent buffer during link congestion. - * - For plain, errorless user data messages we keep the buffer and - * return -ELINKONG. - * - For all other messages we discard the buffer and return -EHOSTUNREACH - * - For TIPC internal messages we also reset the link - */ -static int tipc_link_cong(struct tipc_link *link, struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek(list); - struct tipc_msg *msg = buf_msg(skb); - uint imp = tipc_msg_tot_importance(msg); - u32 oport = msg_tot_origport(msg); - - if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); - tipc_link_reset(link); - goto drop; - } - if (unlikely(msg_errcode(msg))) - goto drop; - if (unlikely(msg_reroute_cnt(msg))) - goto drop; - if (TIPC_SKB_CB(skb)->wakeup_pending) - return -ELINKCONG; - if (link_schedule_user(link, oport, skb_queue_len(list), imp)) - return -ELINKCONG; -drop: - __skb_queue_purge(list); - return -EHOSTUNREACH; -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use * @list: chain of buffers containing message * - * Consumes the buffer chain, except when returning -ELINKCONG - * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket - * user data messages) or -EHOSTUNREACH (all other messages/senders) - * Only the socket functions tipc_send_stream() and tipc_send_packet() need - * to act on the return value, since they may need to do more send attempts. + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted */ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); - uint psz = msg_size(msg); - uint sndlim = link->queue_limit[0]; - uint imp = tipc_msg_tot_importance(msg); - uint mtu = link->max_pkt; + unsigned int maxwin = link->window; + unsigned int imp = msg_importance(msg); + uint mtu = link->mtu; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; uint bc_last_in = link->owner->bclink.last_in; struct tipc_media_addr *addr = &link->media_addr; - struct sk_buff_head *outqueue = &link->outqueue; + struct sk_buff_head *transmq = &link->transmq; + struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; - /* Match queue limits against msg importance: */ - if (unlikely(skb_queue_len(outqueue) >= link->queue_limit[imp])) - return tipc_link_cong(link, list); + /* Match backlog limit against msg importance: */ + if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) + return link_schedule_user(link, list); - /* Has valid packet limit been used ? */ - if (unlikely(psz > mtu)) { + if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } - - /* Prepare each packet for sending, and add to outqueue: */ + /* Prepare each packet for sending, and add to relevant queue: */ skb_queue_walk_safe(list, skb, tmp) { __skb_unlink(skb, list); msg = buf_msg(skb); - msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_seqno(msg, seqno); + msg_set_ack(msg, ack); msg_set_bcast_ack(msg, bc_last_in); - if (skb_queue_len(outqueue) < sndlim) { - __skb_queue_tail(outqueue, skb); - tipc_bearer_send(net, link->bearer_id, - skb, addr); - link->next_out = NULL; - link->unacked_window = 0; - } else if (tipc_msg_bundle(outqueue, skb, mtu)) { + if (likely(skb_queue_len(transmq) < maxwin)) { + __skb_queue_tail(transmq, skb); + tipc_bearer_send(net, link->bearer_id, skb, addr); + link->rcv_unacked = 0; + seqno++; + continue; + } + if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { link->stats.sent_bundled++; continue; - } else if (tipc_msg_make_bundle(outqueue, skb, mtu, - link->addr)) { + } + if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; - if (!link->next_out) - link->next_out = skb_peek_tail(outqueue); - } else { - __skb_queue_tail(outqueue, skb); - if (!link->next_out) - link->next_out = skb; + imp = msg_importance(buf_msg(skb)); } + __skb_queue_tail(backlogq, skb); + link->backlog[imp].len++; seqno++; } link->next_out_no = seqno; @@ -808,13 +765,25 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } +/* tipc_link_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; + int rc; skb2list(skb, &head); - return tipc_link_xmit(net, &head, dnode, selector); + rc = tipc_link_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; } /** @@ -841,12 +810,15 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; - if (likely(in_own_node(net, dnode))) - return tipc_sk_rcv(net, list); + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } __skb_queue_purge(list); return rc; @@ -893,14 +865,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) kfree_skb(buf); } -struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, - const struct sk_buff *skb) -{ - if (skb_queue_is_last(list, skb)) - return NULL; - return skb->next; -} - /* * tipc_link_push_packets - push unsent packets to bearer * @@ -909,30 +873,24 @@ struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, * * Called with node locked */ -void tipc_link_push_packets(struct tipc_link *l_ptr) +void tipc_link_push_packets(struct tipc_link *link) { - struct sk_buff_head *outqueue = &l_ptr->outqueue; - struct sk_buff *skb = l_ptr->next_out; + struct sk_buff *skb; struct tipc_msg *msg; - u32 next, first; + unsigned int ack = mod(link->next_in_no - 1); - skb_queue_walk_from(outqueue, skb) { - msg = buf_msg(skb); - next = msg_seqno(msg); - first = buf_seqno(skb_peek(outqueue)); - - if (mod(next - first) < l_ptr->queue_limit[0]) { - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - if (msg_user(msg) == MSG_BUNDLER) - TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->owner->net, - l_ptr->bearer_id, skb, - &l_ptr->media_addr); - l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); - } else { + while (skb_queue_len(&link->transmq) < link->window) { + skb = __skb_dequeue(&link->backlogq); + if (!skb) break; - } + msg = buf_msg(skb); + link->backlog[msg_importance(msg)].len--; + msg_set_ack(msg, ack); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + link->rcv_unacked = 0; + __skb_queue_tail(&link->transmq, skb); + tipc_bearer_send(link->owner->net, link->bearer_id, + skb, &link->media_addr); } } @@ -979,7 +937,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -991,9 +948,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } @@ -1019,8 +974,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, l_ptr->stale_count = 1; } - skb_queue_walk_from(&l_ptr->outqueue, skb) { - if (!retransmits || skb == l_ptr->next_out) + skb_queue_walk_from(&l_ptr->transmq, skb) { + if (!retransmits) break; msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); @@ -1032,72 +987,43 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, } } -static void link_retrieve_defq(struct tipc_link *link, - struct sk_buff_head *list) -{ - u32 seq_no; - - if (skb_queue_empty(&link->deferred_queue)) - return; - - seq_no = buf_seqno(skb_peek(&link->deferred_queue)); - if (seq_no == mod(link->next_in_no)) - skb_queue_splice_tail_init(&link->deferred_queue, list); -} - -/** - * link_recv_buf_validate - validate basic format of received message - * - * This routine ensures a TIPC message has an acceptable header, and at least - * as much data as the header indicates it should. The routine also ensures - * that the entire message header is stored in the main fragment of the message - * buffer, to simplify future access to message header fields. - * - * Note: Having extra info present in the message header or data areas is OK. - * TIPC will ignore the excess, under the assumption that it is optional info - * introduced by a later release of the protocol. +/* link_synch(): check if all packets arrived before the synch + * point have been consumed + * Returns true if the parallel links are synched, otherwise false */ -static int link_recv_buf_validate(struct sk_buff *buf) +static bool link_synch(struct tipc_link *l) { - static u32 min_data_hdr_size[8] = { - SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE, - MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE - }; + unsigned int post_synch; + struct tipc_link *pl; - struct tipc_msg *msg; - u32 tipc_hdr[2]; - u32 size; - u32 hdr_size; - u32 min_hdr_size; + pl = tipc_parallel_link(l); + if (pl == l) + goto synched; - /* If this packet comes from the defer queue, the skb has already - * been validated - */ - if (unlikely(TIPC_SKB_CB(buf)->deferred)) - return 1; - - if (unlikely(buf->len < MIN_H_SIZE)) - return 0; - - msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr); - if (msg == NULL) - return 0; + /* Was last pre-synch packet added to input queue ? */ + if (less_eq(pl->next_in_no, l->synch_point)) + return false; - if (unlikely(msg_version(msg) != TIPC_VERSION)) - return 0; + /* Is it still in the input queue ? */ + post_synch = mod(pl->next_in_no - l->synch_point) - 1; + if (skb_queue_len(&pl->inputq) > post_synch) + return false; +synched: + l->flags &= ~LINK_SYNCHING; + return true; +} - size = msg_size(msg); - hdr_size = msg_hdr_sz(msg); - min_hdr_size = msg_isdata(msg) ? - min_data_hdr_size[msg_type(msg)] : INT_H_SIZE; +static void link_retrieve_defq(struct tipc_link *link, + struct sk_buff_head *list) +{ + u32 seq_no; - if (unlikely((hdr_size < min_hdr_size) || - (size < hdr_size) || - (buf->len < size) || - (size - hdr_size > TIPC_MAX_USER_MSG_SIZE))) - return 0; + if (skb_queue_empty(&link->deferdq)) + return; - return pskb_may_pull(buf, hdr_size); + seq_no = buf_seqno(skb_peek(&link->deferdq)); + if (seq_no == mod(link->next_in_no)) + skb_queue_splice_tail_init(&link->deferdq, list); } /** @@ -1125,16 +1051,11 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) while ((skb = __skb_dequeue(&head))) { /* Ensure message is well-formed */ - if (unlikely(!link_recv_buf_validate(skb))) - goto discard; - - /* Ensure message data is a single contiguous unit */ - if (unlikely(skb_linearize(skb))) + if (unlikely(!tipc_msg_validate(skb))) goto discard; /* Handle arrival of a non-unicast link message */ msg = buf_msg(skb); - if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) tipc_disc_rcv(net, skb, b_ptr); @@ -1152,8 +1073,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1175,21 +1096,20 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) ackd = msg_ack(msg); /* Release acked messages */ - if (n_ptr->bclink.recv_permitted) + if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg))) tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); released = 0; - skb_queue_walk_safe(&l_ptr->outqueue, skb1, tmp) { - if (skb1 == l_ptr->next_out || - more(buf_seqno(skb1), ackd)) + skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) { + if (more(buf_seqno(skb1), ackd)) break; - __skb_unlink(skb1, &l_ptr->outqueue); + __skb_unlink(skb1, &l_ptr->transmq); kfree_skb(skb1); released = 1; } /* Try sending any messages link endpoint has pending */ - if (unlikely(l_ptr->next_out)) + if (unlikely(skb_queue_len(&l_ptr->backlogq))) tipc_link_push_packets(l_ptr); if (released && !skb_queue_empty(&l_ptr->wakeupq)) @@ -1223,18 +1143,26 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; goto unlock; } + /* Synchronize with parallel link if applicable */ + if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + link_handle_out_of_seq_msg(l_ptr, skb); + if (link_synch(l_ptr)) + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } l_ptr->next_in_no++; - if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue))) + if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); - - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } tipc_link_input(l_ptr, skb); skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -1271,7 +1199,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) node->action_flags |= TIPC_NAMED_MSG_EVT; return true; case MSG_BUNDLER: - case CHANGEOVER_PROTOCOL: + case TUNNEL_PROTOCOL: case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; @@ -1298,8 +1226,14 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) return; switch (msg_user(msg)) { - case CHANGEOVER_PROTOCOL: - if (!tipc_link_tunnel_rcv(node, &skb)) + case TUNNEL_PROTOCOL: + if (msg_dup(msg)) { + link->flags |= LINK_SYNCHING; + link->synch_point = msg_seqno(msg_get_wrapped(msg)); + kfree_skb(skb); + break; + } + if (!tipc_link_failover_rcv(link, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { tipc_data_input(link, skb); @@ -1394,11 +1328,10 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, return; } - if (tipc_link_defer_pkt(&l_ptr->deferred_queue, buf)) { + if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { l_ptr->stats.deferred_recv++; - TIPC_SKB_CB(buf)->deferred = true; - if ((skb_queue_len(&l_ptr->deferred_queue) % 16) == 1) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } else { l_ptr->stats.duplicates++; } @@ -1408,15 +1341,15 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority, u32 ack_mtu) + u32 gap, u32 tolerance, u32 priority) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; u32 msg_size = sizeof(l_ptr->proto_msg); int r_flag; - /* Don't send protocol message during link changeover */ - if (l_ptr->exp_msg_count) + /* Don't send protocol message during link failover */ + if (l_ptr->flags & LINK_FAILINGOVER) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1434,11 +1367,11 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, if (!tipc_link_is_up(l_ptr)) return; - if (l_ptr->next_out) - next_sent = buf_seqno(l_ptr->next_out); + if (skb_queue_len(&l_ptr->backlogq)) + next_sent = buf_seqno(skb_peek(&l_ptr->backlogq)); msg_set_next_sent(msg, next_sent); - if (!skb_queue_empty(&l_ptr->deferred_queue)) { - u32 rec = buf_seqno(skb_peek(&l_ptr->deferred_queue)); + if (!skb_queue_empty(&l_ptr->deferdq)) { + u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); gap = mod(rec - mod(l_ptr->next_in_no)); } msg_set_seq_gap(msg, gap); @@ -1446,35 +1379,20 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, l_ptr->stats.sent_nacks++; msg_set_link_tolerance(msg, tolerance); msg_set_linkprio(msg, priority); - msg_set_max_pkt(msg, ack_mtu); + msg_set_max_pkt(msg, l_ptr->mtu); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_probe(msg, probe_msg != 0); - if (probe_msg) { - u32 mtu = l_ptr->max_pkt; - - if ((mtu < l_ptr->max_pkt_target) && - link_working_working(l_ptr) && - l_ptr->fsm_msg_cnt) { - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - if (l_ptr->max_pkt_probes == 10) { - l_ptr->max_pkt_target = (msg_size - 4); - l_ptr->max_pkt_probes = 0; - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - } - l_ptr->max_pkt_probes++; - } - + if (probe_msg) l_ptr->stats.sent_probes++; - } l_ptr->stats.sent_states++; } else { /* RESET_MSG or ACTIVATE_MSG */ - msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); + msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); msg_set_seq_gap(msg, 0); msg_set_next_sent(msg, 1); msg_set_probe(msg, 0); msg_set_link_tolerance(msg, l_ptr->tolerance); msg_set_linkprio(msg, l_ptr->priority); - msg_set_max_pkt(msg, l_ptr->max_pkt_target); + msg_set_max_pkt(msg, l_ptr->advertised_mtu); } r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); @@ -1490,10 +1408,9 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; + l_ptr->rcv_unacked = 0; kfree_skb(buf); } @@ -1506,13 +1423,10 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 rec_gap = 0; - u32 max_pkt_info; - u32 max_pkt_ack; u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - /* Discard protocol message during link changeover */ - if (l_ptr->exp_msg_count) + if (l_ptr->flags & LINK_FAILINGOVER) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) @@ -1551,15 +1465,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, if (msg_linkprio(msg) > l_ptr->priority) l_ptr->priority = msg_linkprio(msg); - max_pkt_info = msg_max_pkt(msg); - if (max_pkt_info) { - if (max_pkt_info < l_ptr->max_pkt_target) - l_ptr->max_pkt_target = max_pkt_info; - if (l_ptr->max_pkt > l_ptr->max_pkt_target) - l_ptr->max_pkt = l_ptr->max_pkt_target; - } else { - l_ptr->max_pkt = l_ptr->max_pkt_target; - } + if (l_ptr->mtu > msg_max_pkt(msg)) + l_ptr->mtu = msg_max_pkt(msg); /* Synchronize broadcast link info, if not done previously */ if (!tipc_node_is_up(l_ptr->owner)) { @@ -1604,18 +1511,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, mod(l_ptr->next_in_no)); } - max_pkt_ack = msg_max_pkt(msg); - if (max_pkt_ack > l_ptr->max_pkt) { - l_ptr->max_pkt = max_pkt_ack; - l_ptr->max_pkt_probes = 0; - } - - max_pkt_ack = 0; - if (msg_probe(msg)) { + if (msg_probe(msg)) l_ptr->stats.recv_probes++; - if (msg_size(msg) > sizeof(l_ptr->proto_msg)) - max_pkt_ack = msg_size(msg); - } /* Protocol message before retransmits, reduce loss risk */ if (l_ptr->owner->bclink.recv_permitted) @@ -1623,12 +1520,12 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, msg_last_bcast(msg)); if (rec_gap || (msg_probe(msg))) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0, - 0, max_pkt_ack); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, + rec_gap, 0, 0); } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; - tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->outqueue), + tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), msg_seq_gap(msg)); } break; @@ -1675,7 +1572,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, */ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { - u32 msgcount = skb_queue_len(&l_ptr->outqueue); + int msgcount; struct tipc_link *tunnel = l_ptr->owner->active_links[0]; struct tipc_msg tunnel_hdr; struct sk_buff *skb; @@ -1684,12 +1581,15 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, + FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); + skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); + msgcount = skb_queue_len(&l_ptr->transmq); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); - if (skb_queue_empty(&l_ptr->outqueue)) { + if (skb_queue_empty(&l_ptr->transmq)) { skb = tipc_buf_acquire(INT_H_SIZE); if (skb) { skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE); @@ -1705,7 +1605,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) split_bundles = (l_ptr->owner->active_links[0] != l_ptr->owner->active_links[1]); - skb_queue_walk(&l_ptr->outqueue, skb) { + skb_queue_walk(&l_ptr->transmq, skb) { struct tipc_msg *msg = buf_msg(skb); if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { @@ -1736,157 +1636,105 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) * and sequence order is preserved per sender/receiver socket pair. * Owner node is locked. */ -void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, - struct tipc_link *tunnel) +void tipc_link_dup_queue_xmit(struct tipc_link *link, + struct tipc_link *tnl) { struct sk_buff *skb; - struct tipc_msg tunnel_hdr; - - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - skb_queue_walk(&l_ptr->outqueue, skb) { + struct tipc_msg tnl_hdr; + struct sk_buff_head *queue = &link->transmq; + int mcnt; + + tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, + SYNCH_MSG, INT_H_SIZE, link->addr); + mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); + msg_set_msgcnt(&tnl_hdr, mcnt); + msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); + +tunnel_queue: + skb_queue_walk(queue, skb) { struct sk_buff *outskb; struct tipc_msg *msg = buf_msg(skb); - u32 length = msg_size(msg); + u32 len = msg_size(msg); - if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_size(&tunnel_hdr, length + INT_H_SIZE); - outskb = tipc_buf_acquire(length + INT_H_SIZE); + msg_set_ack(msg, mod(link->next_in_no - 1)); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + msg_set_size(&tnl_hdr, len + INT_H_SIZE); + outskb = tipc_buf_acquire(len + INT_H_SIZE); if (outskb == NULL) { pr_warn("%sunable to send duplicate msg\n", link_co_err); return; } - skb_copy_to_linear_data(outskb, &tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, skb->data, - length); - __tipc_link_xmit_skb(tunnel, outskb); - if (!tipc_link_is_up(l_ptr)) + skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, + skb->data, len); + __tipc_link_xmit_skb(tnl, outskb); + if (!tipc_link_is_up(link)) return; } -} - -/** - * buf_extract - extracts embedded TIPC message from another message - * @skb: encapsulating message buffer - * @from_pos: offset to extract from - * - * Returns a new message buffer containing an embedded message. The - * encapsulating buffer is left unchanged. - */ -static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) -{ - struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos); - u32 size = msg_size(msg); - struct sk_buff *eb; - - eb = tipc_buf_acquire(size); - if (eb) - skb_copy_to_linear_data(eb, msg, size); - return eb; -} - -/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. - * Owner node is locked. - */ -static void tipc_link_dup_rcv(struct tipc_link *l_ptr, - struct sk_buff *t_buf) -{ - struct sk_buff *buf; - - if (!tipc_link_is_up(l_ptr)) + if (queue == &link->backlogq) return; - - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { - pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); - return; - } - - /* Add buffer to deferred queue, if applicable: */ - link_handle_out_of_seq_msg(l_ptr, buf); + queue = &link->backlogq; + goto tunnel_queue; } -/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet +/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet * Owner node is locked. */ -static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, - struct sk_buff *t_buf) +static bool tipc_link_failover_rcv(struct tipc_link *link, + struct sk_buff **skb) { - struct tipc_msg *t_msg = buf_msg(t_buf); - struct sk_buff *buf = NULL; - struct tipc_msg *msg; - - if (tipc_link_is_up(l_ptr)) - tipc_link_reset(l_ptr); - - /* First failover packet? */ - if (l_ptr->exp_msg_count == START_CHANGEOVER) - l_ptr->exp_msg_count = msg_msgcnt(t_msg); - - /* Should there be an inner packet? */ - if (l_ptr->exp_msg_count) { - l_ptr->exp_msg_count--; - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { - pr_warn("%sno inner failover pkt\n", link_co_err); - goto exit; - } - msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(*skb); + struct sk_buff *iskb = NULL; + struct tipc_link *pl = NULL; + int bearer_id = msg_bearer_id(msg); + int pos = 0; - if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) { - kfree_skb(buf); - buf = NULL; - goto exit; - } - if (msg_user(msg) == MSG_FRAGMENTER) { - l_ptr->stats.recv_fragments++; - tipc_buf_append(&l_ptr->reasm_buf, &buf); - } + if (msg_type(msg) != FAILOVER_MSG) { + pr_warn("%sunknown tunnel pkt received\n", link_co_err); + goto exit; } -exit: - if ((!l_ptr->exp_msg_count) && (l_ptr->flags & LINK_STOPPED)) - tipc_link_delete(l_ptr); - return buf; -} + if (bearer_id >= MAX_BEARERS) + goto exit; -/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent - * via other link as result of a failover (ORIGINAL_MSG) or - * a new active link (DUPLICATE_MSG). Failover packets are - * returned to the active link for delivery upwards. - * Owner node is locked. - */ -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, - struct sk_buff **buf) -{ - struct sk_buff *t_buf = *buf; - struct tipc_link *l_ptr; - struct tipc_msg *t_msg = buf_msg(t_buf); - u32 bearer_id = msg_bearer_id(t_msg); + if (bearer_id == link->bearer_id) + goto exit; - *buf = NULL; + pl = link->owner->links[bearer_id]; + if (pl && tipc_link_is_up(pl)) + tipc_link_reset(pl); - if (bearer_id >= MAX_BEARERS) + if (link->failover_pkts == FIRST_FAILOVER) + link->failover_pkts = msg_msgcnt(msg); + + /* Should we expect an inner packet? */ + if (!link->failover_pkts) goto exit; - l_ptr = n_ptr->links[bearer_id]; - if (!l_ptr) + if (!tipc_msg_extract(*skb, &iskb, &pos)) { + pr_warn("%sno inner failover pkt\n", link_co_err); + *skb = NULL; goto exit; + } + link->failover_pkts--; + *skb = NULL; - if (msg_type(t_msg) == DUPLICATE_MSG) - tipc_link_dup_rcv(l_ptr, t_buf); - else if (msg_type(t_msg) == ORIGINAL_MSG) - *buf = tipc_link_failover_rcv(l_ptr, t_buf); - else - pr_warn("%sunknown tunnel pkt received\n", link_co_err); + /* Was this packet already delivered? */ + if (less(buf_seqno(iskb), link->failover_checkpt)) { + kfree_skb(iskb); + iskb = NULL; + goto exit; + } + if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { + link->stats.recv_fragments++; + tipc_buf_append(&link->failover_skb, &iskb); + } exit: - kfree_skb(t_buf); - return *buf != NULL; + if (!link->failover_pkts && pl) + pl->flags &= ~LINK_FAILINGOVER; + kfree_skb(*skb); + *skb = iskb; + return *skb; } static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) @@ -1901,23 +1749,16 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { - /* Data messages from this node, inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6; - /* Transiting data messages,inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; - l_ptr->queue_limit[CONN_MANAGER] = 1200; - l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; - l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000; - /* FRAGMENT and LAST_FRAGMENT packets */ - l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); + + l->window = win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /* tipc_link_find_owner - locate owner node of link by link's name @@ -2082,14 +1923,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); link_set_supervision_props(link, tol); - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -2194,7 +2035,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) goto attr_msg_full; @@ -2216,7 +2057,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, - link->queue_limit[TIPC_LOW_IMPORTANCE])) + link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; @@ -2282,7 +2123,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2295,6 +2135,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2302,6 +2143,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/link.h b/net/tipc/link.h index 7aeb52092bf3..b5b4e3554d4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -58,8 +58,10 @@ /* Link endpoint execution states */ -#define LINK_STARTED 0x0001 -#define LINK_STOPPED 0x0002 +#define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 +#define LINK_FAILINGOVER 0x0008 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -118,13 +120,13 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') - * @queue_limit: outbound message queue congestion thresholds (indexed by user) + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset - * @max_pkt: current maximum packet size for this link - * @max_pkt_target: desired maximum packet size for this link - * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) - * @outqueue: outbound message queue + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent * @next_out_no: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer @@ -165,36 +167,40 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; - u32 queue_limit[15]; /* queue_limit[0]==window limit */ + u16 synch_point; - /* Changeover */ - u32 exp_msg_count; - u32 reset_checkpoint; + /* Failover */ + u16 failover_pkts; + u16 failover_checkpt; + struct sk_buff *failover_skb; /* Max packet negotiation */ - u32 max_pkt; - u32 max_pkt_target; - u32 max_pkt_probes; + u16 mtu; + u16 advertised_mtu; /* Sending */ - struct sk_buff_head outqueue; + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; u32 next_out_no; + u32 window; u32 last_retransmitted; u32 stale_count; /* Reception */ u32 next_in_no; - struct sk_buff_head deferred_queue; - u32 unacked_window; + u32 rcv_unacked; + struct sk_buff_head deferdq; struct sk_buff_head inputq; struct sk_buff_head namedq; /* Congestion handling */ - struct sk_buff *next_out; struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ - u32 long_msg_seq_no; struct sk_buff *reasm_buf; /* Statistics */ @@ -225,7 +231,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); + u32 gap, u32 tolerance, u32 priority); void tipc_link_push_packets(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); @@ -302,9 +308,4 @@ static inline int link_reset_reset(struct tipc_link *l_ptr) return l_ptr->state == RESET_RESET; } -static inline int link_congested(struct tipc_link *l_ptr) -{ - return skb_queue_len(&l_ptr->outqueue) >= l_ptr->queue_limit[0]; -} - #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6eb90cd3ef7..c3e96e815418 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -165,6 +165,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = false; + if (unlikely(!tipc_msg_validate(head))) + goto err; *buf = head; TIPC_SKB_CB(head)->tail = NULL; *headbuf = NULL; @@ -172,7 +175,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } *buf = NULL; return 0; - err: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); @@ -181,6 +183,48 @@ err: return 0; } +/* tipc_msg_validate - validate basic format of received message + * + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. + * + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. + */ +bool tipc_msg_validate(struct sk_buff *skb) +{ + struct tipc_msg *msg; + int msz, hsz; + + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + msg = buf_msg(skb); + if (unlikely(msg_version(msg) != TIPC_VERSION)) + return false; + + msz = msg_size(msg); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = true; + return true; +} /** * tipc_msg_build - create buffer chain containing specified header and data @@ -228,6 +272,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); /* Prepare first fragment */ skb = tipc_buf_acquire(pktmax); @@ -286,33 +331,36 @@ error: /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one - * @list: the buffer chain of the existing buffer ("bundle") + * @bskb: the buffer to append to ("bundle") * @skb: buffer to be appended * @mtu: max allowable size for the bundle buffer * Consumes buffer if successful * Returns true if bundling could be performed, otherwise false */ -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) { - struct sk_buff *bskb = skb_peek_tail(list); - struct tipc_msg *bmsg = buf_msg(bskb); + struct tipc_msg *bmsg; struct tipc_msg *msg = buf_msg(skb); - unsigned int bsz = msg_size(bmsg); + unsigned int bsz; unsigned int msz = msg_size(msg); - u32 start = align(bsz); + u32 start, pad; u32 max = mtu - INT_H_SIZE; - u32 pad = start - bsz; if (likely(msg_user(msg) == MSG_FRAGMENTER)) return false; - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + if (!bskb) + return false; + bmsg = buf_msg(bskb); + bsz = msg_size(bmsg); + start = align(bsz); + pad = start - bsz; + + if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; if (likely(msg_user(bmsg) != MSG_BUNDLER)) return false; - if (likely(!TIPC_SKB_CB(bskb)->bundling)) - return false; if (unlikely(skb_tailroom(bskb) < (pad + msz))) return false; if (unlikely(max < (start + msz))) @@ -328,34 +376,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) /** * tipc_msg_extract(): extract bundled inner packet from buffer - * @skb: linear outer buffer, to be extracted from. + * @skb: buffer to be extracted from. * @iskb: extracted inner buffer, to be returned - * @pos: position of msg to be extracted. Returns with pointer of next msg + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg * Consumes outer buffer when last packet extracted * Returns true when when there is an extracted buffer, otherwise false */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { - struct tipc_msg *msg = buf_msg(skb); - int imsz; - struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos); + struct tipc_msg *msg; + int imsz, offset; - /* Is there space left for shortest possible message? */ - if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) + *iskb = NULL; + if (unlikely(skb_linearize(skb))) + goto none; + + msg = buf_msg(skb); + offset = msg_hdr_sz(msg) + *pos; + if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE))) goto none; - imsz = msg_size(imsg); - /* Is there space left for current message ? */ - if ((*pos + imsz) > msg_data_sz(msg)) + *iskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!*iskb)) goto none; - *iskb = tipc_buf_acquire(imsz); - if (!*iskb) + skb_pull(*iskb, offset); + imsz = msg_size(buf_msg(*iskb)); + skb_trim(*iskb, imsz); + if (unlikely(!tipc_msg_validate(*iskb))) goto none; - skb_copy_to_linear_data(*iskb, imsg, imsz); *pos += align(imsz); return true; none: kfree_skb(skb); + kfree_skb(*iskb); *iskb = NULL; return false; } @@ -369,18 +423,17 @@ none: * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; - struct tipc_msg *msg = buf_msg(skb); + struct tipc_msg *msg = buf_msg(*skb); u32 msz = msg_size(msg); u32 max = mtu - INT_H_SIZE; if (msg_user(msg) == MSG_FRAGMENTER) return false; - if (msg_user(msg) == CHANGEOVER_PROTOCOL) + if (msg_user(msg) == TUNNEL_PROTOCOL) return false; if (msg_user(msg) == BCAST_PROTOCOL) return false; @@ -398,9 +451,9 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); - TIPC_SKB_CB(bskb)->bundling = true; - __skb_queue_tail(list, bskb); - return tipc_msg_bundle(list, skb, mtu); + tipc_msg_bundle(bskb, *skb, mtu); + *skb = bskb; + return true; } /** @@ -415,21 +468,17 @@ bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err) { struct tipc_msg *msg = buf_msg(buf); - uint imp = msg_importance(msg); struct tipc_msg ohdr; uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); if (skb_linearize(buf)) goto exit; + msg = buf_msg(buf); if (msg_dest_droppable(msg)) goto exit; if (msg_errcode(msg)) goto exit; - memcpy(&ohdr, msg, msg_hdr_sz(msg)); - imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); - if (msg_isdata(msg)) - msg_set_importance(msg, imp); msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); @@ -462,15 +511,18 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, { struct tipc_msg *msg = buf_msg(skb); u32 dport; + u32 own_addr = tipc_own_addr(net); if (!msg_isdata(msg)) return false; if (!msg_named(msg)) return false; + if (msg_errcode(msg)) + return false; *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; - if (msg_reroute_cnt(msg) > 0) + if (msg_reroute_cnt(msg)) return false; *dnode = addr_domain(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), @@ -478,6 +530,8 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, if (!dport) return false; msg_incr_reroute_cnt(msg); + if (*dnode != own_addr) + msg_set_prevnode(msg, own_addr); msg_set_destnode(msg, *dnode); msg_set_destport(msg, dport); *err = TIPC_OK; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9ace47f44a69..e1d3595e2ee9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014, Ericsson AB + * Copyright (c) 2000-2007, 2014-2015 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -54,6 +54,8 @@ struct plist; * - TIPC_HIGH_IMPORTANCE * - TIPC_CRITICAL_IMPORTANCE */ +#define TIPC_SYSTEM_IMPORTANCE 4 + /* * Payload message types @@ -64,6 +66,19 @@ struct plist; #define TIPC_DIRECT_MSG 3 /* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define TUNNEL_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ + +/* * Message header sizes */ #define SHORT_H_SIZE 24 /* In-cluster basic payload message */ @@ -76,7 +91,7 @@ struct plist; #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define TIPC_MEDIA_ADDR_OFFSET 5 +#define TIPC_MEDIA_INFO_OFFSET 5 /** * TIPC message buffer code @@ -87,12 +102,12 @@ struct plist; * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields * are word aligned for quicker access */ -#define BUF_HEADROOM LL_MAX_HEADER +#define BUF_HEADROOM (LL_MAX_HEADER + 48) struct tipc_skb_cb { void *handle; struct sk_buff *tail; - bool deferred; + bool validated; bool wakeup_pending; bool bundling; u16 chain_sz; @@ -170,16 +185,6 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } -static inline u32 msg_importance(struct tipc_msg *m) -{ - return msg_bits(m, 0, 25, 0xf); -} - -static inline void msg_set_importance(struct tipc_msg *m, u32 i) -{ - msg_set_user(m, i); -} - static inline u32 msg_hdr_sz(struct tipc_msg *m) { return msg_bits(m, 0, 21, 0xf) << 2; @@ -235,6 +240,15 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} /* * Word 1 @@ -336,6 +350,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) /* * Words 3-10 */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + return msg_bits(m, 5, 13, 0x7); + if (likely(msg_isdata(m) && !msg_errcode(m))) + return msg_user(m); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + msg_set_bits(m, 5, 13, 0x7, i); + else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + static inline u32 msg_prevnode(struct tipc_msg *m) { return msg_word(m, 3); @@ -348,6 +381,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); return msg_word(m, 4); } @@ -443,35 +478,11 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) msg_set_word(m, 10, n); } -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) -{ - return (struct tipc_msg *)msg_data(m); -} - /* * Constants and routines used to read and write TIPC internal message headers */ /* - * Internal message users - */ -#define BCAST_PROTOCOL 5 -#define MSG_BUNDLER 6 -#define LINK_PROTOCOL 7 -#define CONN_MANAGER 8 -#define ROUTE_DISTRIBUTOR 9 /* obsoleted */ -#define CHANGEOVER_PROTOCOL 10 -#define NAME_DISTRIBUTOR 11 -#define MSG_FRAGMENTER 12 -#define LINK_CONFIG 13 -#define SOCK_WAKEUP 14 /* pseudo user */ - -/* * Connection management protocol message types */ #define CONN_PROBE 0 @@ -501,8 +512,8 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) /* * Changeover tunnel message types */ -#define DUPLICATE_MSG 0 -#define ORIGINAL_MSG 1 +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 /* * Config protocol message types @@ -510,7 +521,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 - /* * Word 1 */ @@ -534,6 +544,24 @@ static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 0, 0xffff, n); } +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} + +static inline bool msg_dup(struct tipc_msg *m) +{ + if (likely(msg_user(m) != TUNNEL_PROTOCOL)) + return false; + if (msg_type(m) != SYNCH_MSG) + return false; + return true; +} /* * Word 2 @@ -688,7 +716,7 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) static inline char *msg_media_addr(struct tipc_msg *m) { - return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET]; + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } /* @@ -734,21 +762,8 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); -} - -static inline u32 msg_tot_origport(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_origport(msg_get_wrapped(m)); - return msg_origport(m); -} - struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, @@ -757,9 +772,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index fcb07915aaac..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -98,7 +98,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) continue; if (!tipc_node_active_links(node)) continue; - oskb = skb_copy(skb, GFP_ATOMIC); + oskb = pskb_copy(skb, GFP_ATOMIC); if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 105ba7adf06f..ab0ac62a1287 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -811,8 +811,8 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, + publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); diff --git a/net/tipc/node.c b/net/tipc/node.c index 86152de8248d..22c059ad2999 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,12 +125,13 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - __skb_queue_head_init(&n_ptr->bclink.deferred_queue); + __skb_queue_head_init(&n_ptr->bclink.deferdq); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -227,8 +254,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; } /** @@ -292,11 +319,10 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) /* Leave room for changeover header when returning 'mtu' to users: */ if (active[0]) { - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; return; } - /* Loopback link went down? No fragmentation needed from now on. */ if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; @@ -354,7 +380,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferred_queue); + __skb_queue_purge(&n_ptr->bclink.deferdq); if (n_ptr->bclink.reasm_buf) { kfree_skb(n_ptr->bclink.reasm_buf); @@ -367,18 +393,17 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.recv_permitted = false; } - /* Abort link changeover */ + /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { struct tipc_link *l_ptr = n_ptr->links[i]; if (!l_ptr) continue; - l_ptr->reset_checkpoint = l_ptr->next_in_no; - l_ptr->exp_msg_count = 0; + l_ptr->flags &= ~LINK_FAILINGOVER; + l_ptr->failover_checkpt = 0; + l_ptr->failover_pkts = 0; + kfree_skb(l_ptr->failover_skb); + l_ptr->failover_skb = NULL; tipc_link_reset_fragments(l_ptr); - - /* Link marked for deletion after failover? => do it now */ - if (l_ptr->flags & LINK_STOPPED) - tipc_link_delete(l_ptr); } n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; @@ -417,19 +442,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -459,7 +490,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +519,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ @@ -542,17 +576,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 3d18c66b7f78..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** @@ -84,7 +85,7 @@ struct tipc_node_bclink { u32 last_sent; u32 oos_state; u32 deferred_size; - struct sk_buff_head deferred_queue; + struct sk_buff_head deferdq; struct sk_buff *reasm_buf; int inputq_map; bool recv_permitted; @@ -93,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -106,6 +108,7 @@ struct tipc_node_bclink { * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any * @publ_list: list of publications @@ -113,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -125,7 +129,8 @@ struct tipc_node { struct tipc_node_bclink bclink; struct list_head list; int link_cnt; - int working_links; + u16 working_links; + u16 capabilities; u32 signature; u32 link_id; struct list_head publ_list; @@ -134,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -168,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } diff --git a/net/tipc/server.c b/net/tipc/server.c index eadd4ed45905..ab6183cdb121 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -37,11 +37,13 @@ #include "core.h" #include "socket.h" #include <net/sock.h> +#include <linux/module.h> /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 #define MAX_RECV_MSG_COUNT 25 #define CF_CONNECTED 1 +#define CF_SERVER 2 #define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) @@ -88,9 +90,19 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct sockaddr_tipc *saddr = con->server->saddr; + struct socket *sock = con->sock; + struct sock *sk; - if (con->sock) { - tipc_sock_release_local(con->sock); + if (sock) { + sk = sock->sk; + if (test_bit(CF_SERVER, &con->flags)) { + __module_get(sock->ops->owner); + __module_get(sk->sk_prot_creator->owner); + } + saddr->scope = -TIPC_NODE_SCOPE; + kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + sk_release_kernel(sk); con->sock = NULL; } @@ -281,7 +293,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) struct tipc_conn *newcon; int ret; - ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + ret = kernel_accept(sock, &newsock, O_NONBLOCK); if (ret < 0) return ret; @@ -309,9 +321,12 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->net, s->type, &sock); + ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); if (ret < 0) return NULL; + + sk_change_net(sock->sk, s->net); + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -337,11 +352,31 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) pr_err("Unknown socket type %d\n", s->type); goto create_err; } + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(sock->ops->owner); + module_put(sock->sk->sk_prot_creator->owner); + set_bit(CF_SERVER, &con->flags); + return sock; create_err: - sock_release(sock); - con->sock = NULL; + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b4d4467d0bb0..ee90d74d7516 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,7 +35,6 @@ */ #include <linux/rhashtable.h> -#include <linux/jhash.h> #include "core.h" #include "name_table.h" #include "node.h" @@ -74,6 +73,7 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @remote: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ @@ -96,6 +96,7 @@ struct tipc_sock { bool link_cong; uint sent_unacked; uint rcv_unacked; + struct sockaddr_tipc remote; struct rhash_head node; struct rcu_head rcu; }; @@ -114,13 +115,14 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, + size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; - static struct proto tipc_proto; -static struct proto tipc_proto_kern; static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -130,6 +132,8 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } }; +static const struct rhashtable_params tsk_rht_params; + /* * Revised TIPC socket locking policy: * @@ -338,11 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - if (!kern) - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); - else - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); - + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); if (sk == NULL) return -ENOMEM; @@ -380,75 +380,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return 0; } -/** - * tipc_sock_create_local - create TIPC socket from inside TIPC module - * @type: socket type - SOCK_RDM or SOCK_SEQPACKET - * - * We cannot use sock_creat_kern here because it bumps module user count. - * Since socket owner and creator is the same module we must make sure - * that module count remains zero for module local sockets, otherwise - * we cannot do rmmod. - * - * Returns 0 on success, errno otherwise - */ -int tipc_sock_create_local(struct net *net, int type, struct socket **res) -{ - int rc; - - rc = sock_create_lite(AF_TIPC, type, 0, res); - if (rc < 0) { - pr_err("Failed to create kernel socket\n"); - return rc; - } - tipc_sk_create(net, *res, 0, 1); - - return 0; -} - -/** - * tipc_sock_release_local - release socket created by tipc_sock_create_local - * @sock: the socket to be released. - * - * Module reference count is not incremented when such sockets are created, - * so we must keep it from being decremented when they are released. - */ -void tipc_sock_release_local(struct socket *sock) -{ - tipc_release(sock); - sock->ops = NULL; - sock_release(sock); -} - -/** - * tipc_sock_accept_local - accept a connection on a socket created - * with tipc_sock_create_local. Use this function to avoid that - * module reference count is inadvertently incremented. - * - * @sock: the accepting socket - * @newsock: reference to the new socket to be created - * @flags: socket flags - */ - -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) -{ - struct sock *sk = sock->sk; - int ret; - - ret = sock_create_lite(sk->sk_family, sk->sk_type, - sk->sk_protocol, newsock); - if (ret < 0) - return ret; - - ret = tipc_accept(sock, *newsock, flags); - if (ret < 0) { - sock_release(*newsock); - return ret; - } - (*newsock)->ops = sock->ops; - return ret; -} - static void tipc_sk_callback(struct rcu_head *head) { struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); @@ -892,7 +823,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) /** * tipc_sendmsg - send message in connectionless manner - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: amount of user data to be sent @@ -904,9 +834,21 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, +static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) +{ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -915,49 +857,40 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, u32 dnode, dport; struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; - struct tipc_name_seq *seq = &dest->addr.nameseq; + struct tipc_name_seq *seq; struct iov_iter save; u32 mtu; long timeo; int rc; - if (unlikely(!dest)) - return -EDESTADDRREQ; - - if (unlikely((m->msg_namelen < sizeof(*dest)) || - (dest->family != AF_TIPC))) - return -EINVAL; - if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - - if (iocb) - lock_sock(sk); - + if (unlikely(!dest)) { + if (tsk->connected && sock->state == SS_READY) + dest = &tsk->remote; + else + return -EDESTADDRREQ; + } else if (unlikely(m->msg_namelen < sizeof(*dest)) || + dest->family != AF_TIPC) { + return -EINVAL; + } if (unlikely(sock->state != SS_READY)) { - if (sock->state == SS_LISTENING) { - rc = -EPIPE; - goto exit; - } - if (sock->state != SS_UNCONNECTED) { - rc = -EISCONN; - goto exit; - } - if (tsk->published) { - rc = -EOPNOTSUPP; - goto exit; - } + if (sock->state == SS_LISTENING) + return -EPIPE; + if (sock->state != SS_UNCONNECTED) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; if (dest->addrtype == TIPC_ADDR_NAME) { tsk->conn_type = dest->addr.name.name.type; tsk->conn_instance = dest->addr.name.name.instance; } } - + seq = &dest->addr.nameseq; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); if (dest->addrtype == TIPC_ADDR_MCAST) { - rc = tipc_sendmcast(sock, seq, m, dsz, timeo); - goto exit; + return tipc_sendmcast(sock, seq, m, dsz, timeo); } else if (dest->addrtype == TIPC_ADDR_NAME) { u32 type = dest->addr.name.name.type; u32 inst = dest->addr.name.name.instance; @@ -972,10 +905,8 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); - if (unlikely(!dport && !dnode)) { - rc = -EHOSTUNREACH; - goto exit; - } + if (unlikely(!dport && !dnode)) + return -EHOSTUNREACH; } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(mhdr, TIPC_DIRECT_MSG); @@ -990,7 +921,7 @@ new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) - goto exit; + return rc; do { skb = skb_peek(pktchain); @@ -1013,9 +944,6 @@ new_mtu: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); return rc; } @@ -1052,7 +980,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) /** * tipc_send_stream - send stream-oriented data - * @iocb: (unused) * @sock: socket structure * @m: data to send * @dsz: total length of data to be transmitted @@ -1062,8 +989,19 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_send_stream(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1080,7 +1018,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, /* Handle implied connection establishment */ if (unlikely(dest)) { - rc = tipc_sendmsg(iocb, sock, m, dsz); + rc = __tipc_sendmsg(sock, m, dsz); if (dsz && (dsz == rc)) tsk->sent_unacked = 1; return rc; @@ -1088,15 +1026,11 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, if (dsz > (uint)INT_MAX) return -EMSGSIZE; - if (iocb) - lock_sock(sk); - if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - rc = -EPIPE; + return -EPIPE; else - rc = -ENOTCONN; - goto exit; + return -ENOTCONN; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); @@ -1108,7 +1042,7 @@ next: send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) - goto exit; + return rc; do { if (likely(!tsk_conn_cong(tsk))) { rc = tipc_link_xmit(net, pktchain, dnode, portid); @@ -1133,15 +1067,12 @@ next: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); + return sent ? sent : rc; } /** * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: length of data to be transmitted @@ -1150,13 +1081,12 @@ exit: * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - return tipc_send_stream(iocb, sock, m, dsz); + return tipc_send_stream(sock, m, dsz); } /* tipc_sk_finish_conn - complete the setup of a connection @@ -1317,12 +1247,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -1331,7 +1261,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1341,8 +1270,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * * Returns size of returned message data, errno otherwise */ -static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, + int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1426,7 +1355,6 @@ exit: /** * tipc_recv_stream - receive stream-oriented data - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1436,8 +1364,8 @@ exit: * * Returns size of returned message data, errno otherwise */ -static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recv_stream(struct socket *sock, struct msghdr *m, + size_t buf_len, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1909,17 +1837,26 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int destlen, int flags) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; socket_state previous; - int res; + int res = 0; lock_sock(sk); - /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */ + /* DGRAM/RDM connect(), just save the destaddr */ if (sock->state == SS_READY) { - res = -EOPNOTSUPP; + if (dst->family == AF_UNSPEC) { + memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + tsk->connected = 0; + } else if (destlen != sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + } else { + memcpy(&tsk->remote, dest, destlen); + tsk->connected = 1; + } goto exit; } @@ -1947,7 +1884,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!timeout) m.msg_flags = MSG_DONTWAIT; - res = tipc_sendmsg(NULL, sock, &m, 0); + res = __tipc_sendmsg(sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; @@ -2027,12 +1964,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = -EINVAL; if (sock->state != SS_LISTENING) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); return err; @@ -2103,7 +2040,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) struct msghdr m = {NULL,}; tsk_advance_rx_queue(sk); - tipc_send_packet(NULL, new_sock, &m, 0); + __tipc_send_stream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); @@ -2154,7 +2091,6 @@ restart: TIPC_CONN_SHUTDOWN)) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); @@ -2312,7 +2248,7 @@ static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) struct tipc_sock *tsk; rcu_read_lock(); - tsk = rhashtable_lookup(&tn->sk_rht, &portid); + tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); @@ -2334,7 +2270,8 @@ static int tipc_sk_insert(struct tipc_sock *tsk) portid = TIPC_MIN_PORT; tsk->portid = portid; sock_hold(&tsk->sk); - if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) return 0; sock_put(&tsk->sk); } @@ -2347,26 +2284,27 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, +}; + int tipc_sk_rht_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct rhashtable_params rht_params = { - .nelem_hint = 192, - .head_offset = offsetof(struct tipc_sock, node), - .key_offset = offsetof(struct tipc_sock, portid), - .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, - .max_shift = 20, /* 1M */ - .min_shift = 8, /* 256 */ - }; - return rhashtable_init(&tn->sk_rht, &rht_params); + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); } void tipc_sk_rht_destroy(struct net *net) @@ -2609,12 +2547,6 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; -static struct proto tipc_proto_kern = { - .name = "TIPC", - .obj_size = sizeof(struct tipc_sock), - .sysctl_rmem = sysctl_tipc_rmem -}; - /** * tipc_socket_init - initialize TIPC socket interface * diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 238f1b7bd9bd..bf6551389522 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,10 +44,6 @@ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) int tipc_socket_init(void); void tipc_socket_stop(void); -int tipc_sock_create_local(struct net *net, int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 72c339e432aa..1c147c869c2e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -162,19 +162,6 @@ static void subscr_del(struct tipc_subscription *sub) atomic_dec(&tn->subscription_count); } -/** - * subscr_terminate - terminate communication with a subscriber - * - * Note: Must call it in process context since it might sleep. - */ -static void subscr_terminate(struct tipc_subscription *sub) -{ - struct tipc_subscriber *subscriber = sub->subscriber; - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - - tipc_conn_terminate(tn->topsrv, subscriber->conid); -} - static void subscr_release(struct tipc_subscriber *subscriber) { struct tipc_subscription *sub; @@ -312,16 +299,14 @@ static void subscr_conn_msg_event(struct net *net, int conid, { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&subscriber->lock); - if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, - &sub) < 0) { - spin_unlock_bh(&subscriber->lock); - subscr_terminate(sub); - return; - } + subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); if (sub) tipc_nametbl_subscribe(sub); + else + tipc_conn_terminate(tn->topsrv, subscriber->conid); spin_unlock_bh(&subscriber->lock); } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000000..66deebc66aa1 --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,448 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/socket.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/udp_tunnel.h> +#include <net/addrconf.h> +#include <linux/tipc_netlink.h> +#include "core.h" +#include "bearer.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 udp_port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; +}; + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + if (ntohs(ua->proto) == ETH_P_IP) { + if (ipv4_is_multicast(ua->ipv4.s_addr)) + addr->broadcast = 1; + } else if (ntohs(ua->proto) == ETH_P_IPV6) { + if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) + addr->broadcast = 1; + } else { + pr_err("Invalid UDP media address\n"); + } +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + int ttl, err = 0; + struct udp_bearer *ub; + struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct sk_buff *clone; + struct rtable *rt; + + clone = skb_clone(skb, GFP_ATOMIC); + skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + if (dst->proto == htons(ETH_P_IP)) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = clone->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + ttl = ip4_dst_hoplimit(&rt->dst); + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, + src->udp_port, dst->udp_port, + false, true); + if (err < 0) { + ip_rt_put(rt); + goto tx_error; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + if (err) + goto tx_error; + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + ndst->dev, &src->ipv6, + &dst->ipv6, 0, ttl, src->udp_port, + dst->udp_port, false); +#endif + } + return err; + +tx_error: + kfree_skb(clone); + return err; +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + kfree_skb(skb); + return 0; + } + + skb_pull(skb, sizeof(struct udphdr)); + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + + if (b) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + if (!ipv4_is_multicast(remote->ipv4.s_addr)) + return 0; + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (!ipv6_addr_is_multicast(&remote->ipv6)) + return 0; + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); +#endif + } + return err; +} + +/** + * parse_options - build local/remote addresses from configuration + * @attrs: netlink config data + * @ub: UDP bearer instance + * @local: local bearer IP address/port + * @remote: peer or multicast IP/port + */ +static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, + struct udp_media_addr *local, + struct udp_media_addr *remote) +{ + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct sockaddr_storage *sa_local, *sa_remote; + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { + sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); + sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); + } else { +err: + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { + struct sockaddr_in *ip4; + + ip4 = (struct sockaddr_in *)sa_local; + local->proto = htons(ETH_P_IP); + local->udp_port = ip4->sin_port; + local->ipv4.s_addr = ip4->sin_addr.s_addr; + + ip4 = (struct sockaddr_in *)sa_remote; + remote->proto = htons(ETH_P_IP); + remote->udp_port = ip4->sin_port; + remote->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { + struct sockaddr_in6 *ip6; + + ip6 = (struct sockaddr_in6 *)sa_local; + local->proto = htons(ETH_P_IPV6); + local->udp_port = ip6->sin6_port; + local->ipv6 = ip6->sin6_addr; + ub->ifindex = ip6->sin6_scope_id; + + ip6 = (struct sockaddr_in6 *)sa_remote; + remote->proto = htons(ETH_P_IPV6); + remote->udp_port = ip6->sin6_port; + remote->ipv6 = ip6->sin6_addr; + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr *remote; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + remote = (struct udp_media_addr *)&b->bcast_addr.value; + memset(remote, 0, sizeof(struct udp_media_addr)); + err = parse_options(attrs, ub, &local, remote); + if (err) + goto err; + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = 1; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (local.proto == htons(ETH_P_IP)) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); +#if IS_ENABLED(CONFIG_IPV6) + } else if (local.proto == htons(ETH_P_IPV6)) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.udp_port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + if (enable_mcast(ub, remote)) + goto err; + return 0; +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 526b6edab018..433f287ee548 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); -static int unix_stream_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_stream_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -static int unix_dgram_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_dgram_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); -static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, + int); static int unix_set_peek_off(struct sock *sk, int val) { @@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, * Send AF_UNIX data. */ -static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1622,8 +1617,8 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; @@ -1725,8 +1720,8 @@ out_err: return sent ? : err; } -static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk = sock->sk; @@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_namelen) msg->msg_namelen = 0; - return unix_dgram_sendmsg(kiocb, sock, msg, len); + return unix_dgram_sendmsg(sock, msg, len); } -static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; - return unix_dgram_recvmsg(iocb, sock, msg, size, flags); + return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) @@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; @@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb) return skb->len - UNIXCB(skb).consumed; } -static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1d0e39c9a3e2..2ec86e652a19 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, return mask; } -static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk; @@ -1062,11 +1062,10 @@ out: return err; } -static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { - return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, - flags); + return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags); } static const struct proto_ops vsock_dgram_ops = { @@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock, return 0; } -static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk; struct vsock_sock *vsk; @@ -1644,9 +1643,8 @@ out: static int -vsock_stream_recvmsg(struct kiocb *kiocb, - struct socket *sock, - struct msghdr *msg, size_t len, int flags) +vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7f3255084a6c..c294da095461 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue( return err - sizeof(*dg); } -static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, - struct vsock_sock *vsk, +static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 29c8675f9a11..4f5543dd2524 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -175,13 +175,21 @@ config CFG80211_INTERNAL_REGDB Most distributions have a CRDA package. So if unsure, say N. config CFG80211_WEXT - bool "cfg80211 wireless extensions compatibility" + bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT depends on CFG80211 select WEXT_CORE + default y if CFG80211_WEXT_EXPORT help Enable this option if you need old userspace for wireless extensions with cfg80211-based drivers. +config CFG80211_WEXT_EXPORT + bool + depends on CFG80211 + help + Drivers should select this option if they require cfg80211's + wext compatibility symbols to be exported. + config LIB80211 tristate default n diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index e24fc585c883..4c55fab9b4e4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -30,7 +30,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, return; bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0, - WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS); + IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY_ANY); if (WARN_ON(!bss)) return; @@ -533,7 +533,7 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, else if (wdev->wext.ibss.bssid) memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); else - memset(ap_addr->sa_data, 0, ETH_ALEN); + eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 2c52b59e43f3..7aae329e2b4e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -229,7 +229,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, return -EALREADY; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, - WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_ANY); if (!req.bss) return -ENOENT; @@ -296,7 +297,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, rdev->wiphy.vht_capa_mod_mask); req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, - WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_ANY); if (!req->bss) return -ENOENT; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b6f84f6a2a09..dd78445c7d50 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -399,6 +399,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, + [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -1098,8 +1099,6 @@ static int nl80211_send_wowlan(struct sk_buff *msg, if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; - /* TODO: send wowlan net detect */ - nla_nest_end(msg, nl_wowlan); return 0; @@ -2668,7 +2667,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), - type, err ? NULL : &flags, ¶ms); + NET_NAME_USER, type, err ? NULL : &flags, + ¶ms); if (WARN_ON(!wdev)) { nlmsg_free(msg); return -EPROTO; @@ -4968,7 +4968,10 @@ static int parse_reg_rule(struct nlattr *tb[], static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { char *data = NULL; + bool is_indoor; enum nl80211_user_reg_hint_type user_reg_hint_type; + u32 owner_nlportid; + /* * You should only get this when cfg80211 hasn't yet initialized @@ -4994,7 +4997,15 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); return regulatory_hint_user(data, user_reg_hint_type); case NL80211_USER_REG_HINT_INDOOR: - return regulatory_hint_indoor_user(); + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + owner_nlportid = info->snd_portid; + is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR]; + } else { + owner_nlportid = 0; + is_indoor = true; + } + + return regulatory_hint_indoor(is_indoor, owner_nlportid); default: return -EINVAL; } @@ -5275,7 +5286,7 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff, mask, NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32); if (mask_out) @@ -5653,7 +5664,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - r = set_regdom(rd); + r = set_regdom(rd, REGD_SOURCE_CRDA); /* set_regdom took ownership */ rd = NULL; @@ -5693,8 +5704,8 @@ static int nl80211_parse_random_mac(struct nlattr **attrs, int i; if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) { - memset(mac_addr, 0, ETH_ALEN); - memset(mac_addr_mask, 0, ETH_ALEN); + eth_zero_addr(mac_addr); + eth_zero_addr(mac_addr_mask); mac_addr[0] = 0x2; mac_addr_mask[0] = 0x3; @@ -7275,8 +7286,18 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: - if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) - break; + if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + return -EINVAL; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + return -EINVAL; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_VHT_IBSS)) + return -EINVAL; + break; default: return -EINVAL; } @@ -7389,8 +7410,8 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) static struct sk_buff * __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 portid, u32 seq, - enum nl80211_commands cmd, + struct wireless_dev *wdev, int approxlen, + u32 portid, u32 seq, enum nl80211_commands cmd, enum nl80211_attrs attr, const struct nl80211_vendor_cmd_info *info, gfp_t gfp) @@ -7421,6 +7442,16 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, goto nla_put_failure; } + if (wdev) { + if (nla_put_u64(skb, NL80211_ATTR_WDEV, + wdev_id(wdev))) + goto nla_put_failure; + if (wdev->netdev && + nla_put_u32(skb, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto nla_put_failure; + } + data = nla_nest_start(skb, attr); ((void **)skb->cb)[0] = rdev; @@ -7435,6 +7466,7 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_commands cmd, enum nl80211_attrs attr, int vendor_event_idx, @@ -7460,7 +7492,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, return NULL; } - return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0, + return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, 0, 0, cmd, attr, info, gfp); } EXPORT_SYMBOL(__cfg80211_alloc_event_skb); @@ -8761,8 +8793,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, if (!nl_tcp) return -ENOBUFS; - if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || - nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || @@ -8808,6 +8840,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval)) return -ENOBUFS; + if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) + return -ENOBUFS; + freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; @@ -8993,8 +9028,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, cfg = kzalloc(size, GFP_KERNEL); if (!cfg) return -ENOMEM; - cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); - cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + cfg->src = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]); memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), ETH_ALEN); if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) @@ -9094,6 +9129,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; + bool regular = false; if (!wowlan) return -EOPNOTSUPP; @@ -9121,12 +9157,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT)) return -EINVAL; new_triggers.disconnect = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) { if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT)) return -EINVAL; new_triggers.magic_pkt = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED]) @@ -9136,24 +9174,28 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE)) return -EINVAL; new_triggers.gtk_rekey_failure = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) { if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ)) return -EINVAL; new_triggers.eap_identity_req = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) { if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE)) return -EINVAL; new_triggers.four_way_handshake = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) { if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE)) return -EINVAL; new_triggers.rfkill_release = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { @@ -9162,6 +9204,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; + regular = true; + nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) n_patterns++; @@ -9223,6 +9267,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + regular = true; err = nl80211_parse_wowlan_tcp( rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], &new_triggers); @@ -9231,6 +9276,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) { + regular = true; err = nl80211_parse_wowlan_nd( rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT], &new_triggers); @@ -9238,6 +9284,17 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } + /* The 'any' trigger means the device continues operating more or less + * as in its normal operation mode and wakes up the host on most of the + * normal interrupts (like packet RX, ...) + * It therefore makes little sense to combine with the more constrained + * wakeup trigger modes. + */ + if (new_triggers.any && regular) { + err = -EINVAL; + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -9906,7 +9963,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, if (WARN_ON(!rdev->cur_cmd_info)) return NULL; - return __cfg80211_alloc_vendor_skb(rdev, approxlen, + return __cfg80211_alloc_vendor_skb(rdev, NULL, approxlen, rdev->cur_cmd_info->snd_portid, rdev->cur_cmd_info->snd_seq, cmd, attr, NULL, GFP_KERNEL); @@ -12775,6 +12832,11 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_unlock(); + /* + * It is possible that the user space process that is controlling the + * indoor setting disappeared, so notify the regulatory core. + */ + regulatory_netlink_notify(notify->portid); return NOTIFY_OK; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 35cfb7134bdb..c6e83a7468c0 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -35,13 +35,14 @@ static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev, static inline struct wireless_dev *rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name, + unsigned char name_assign_type, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { struct wireless_dev *ret; trace_rdev_add_virtual_intf(&rdev->wiphy, name, type); - ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, type, flags, - params); + ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type, + type, flags, params); trace_rdev_return_wdev(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 48dfc7b4e981..0e347f888fe9 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -82,17 +82,12 @@ * be intersected with the current one. * @REG_REQ_ALREADY_SET: the regulatory request will not change the current * regulatory settings, and no further processing is required. - * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no - * further processing is required, i.e., not need to update last_request - * etc. This should be used for user hints that do not provide an alpha2 - * but some other type of regulatory hint, i.e., indoor operation. */ enum reg_request_treatment { REG_REQ_OK, REG_REQ_IGNORE, REG_REQ_INTERSECT, REG_REQ_ALREADY_SET, - REG_REQ_USER_HINT_HANDLED, }; static struct regulatory_request core_request_world = { @@ -133,9 +128,17 @@ static int reg_num_devs_support_basehint; * State variable indicating if the platform on which the devices * are attached is operating in an indoor environment. The state variable * is relevant for all registered devices. - * (protected by RTNL) */ static bool reg_is_indoor; +static spinlock_t reg_indoor_lock; + +/* Used to track the userspace process controlling the indoor setting */ +static u32 reg_is_indoor_portid; + +/* Max number of consecutive attempts to communicate with CRDA */ +#define REG_MAX_CRDA_TIMEOUTS 10 + +static u32 reg_crda_timeouts; static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -487,7 +490,7 @@ static void reg_regdb_search(struct work_struct *work) mutex_unlock(®_regdb_search_mutex); if (!IS_ERR_OR_NULL(regdom)) - set_regdom(regdom); + set_regdom(regdom, REGD_SOURCE_INTERNAL_DB); rtnl_unlock(); } @@ -537,15 +540,20 @@ static int call_crda(const char *alpha2) snprintf(country, sizeof(country), "COUNTRY=%c%c", alpha2[0], alpha2[1]); + /* query internal regulatory database (if it exists) */ + reg_regdb_query(alpha2); + + if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { + pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n"); + return -EINVAL; + } + if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else pr_info("Calling CRDA to update world regulatory domain\n"); - /* query internal regulatory database (if it exists) */ - reg_regdb_query(alpha2); - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); } @@ -554,6 +562,9 @@ reg_call_crda(struct regulatory_request *request) { if (call_crda(request->alpha2)) return REG_REQ_IGNORE; + + queue_delayed_work(system_power_efficient_wq, + ®_timeout, msecs_to_jiffies(3142)); return REG_REQ_OK; } @@ -1248,13 +1259,6 @@ static bool reg_request_cell_base(struct regulatory_request *request) return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } -static bool reg_request_indoor(struct regulatory_request *request) -{ - if (request->initiator != NL80211_REGDOM_SET_BY_USER) - return false; - return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR; -} - bool reg_last_request_cell_base(void) { return reg_request_cell_base(get_last_request()); @@ -1800,8 +1804,7 @@ static void reg_set_request_processed(void) need_more_processing = true; spin_unlock(®_requests_lock); - if (lr->initiator == NL80211_REGDOM_SET_BY_USER) - cancel_delayed_work(®_timeout); + cancel_delayed_work(®_timeout); if (need_more_processing) schedule_work(®_work); @@ -1833,11 +1836,6 @@ __reg_process_hint_user(struct regulatory_request *user_request) { struct regulatory_request *lr = get_last_request(); - if (reg_request_indoor(user_request)) { - reg_is_indoor = true; - return REG_REQ_USER_HINT_HANDLED; - } - if (reg_request_cell_base(user_request)) return reg_ignore_cell_hint(user_request); @@ -1885,8 +1883,7 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET || - treatment == REG_REQ_USER_HINT_HANDLED) { + treatment == REG_REQ_ALREADY_SET) { reg_free_request(user_request); return treatment; } @@ -1947,7 +1944,6 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - case REG_REQ_USER_HINT_HANDLED: reg_free_request(driver_request); return treatment; case REG_REQ_INTERSECT: @@ -2047,7 +2043,6 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - case REG_REQ_USER_HINT_HANDLED: /* fall through */ case REG_REQ_ALREADY_SET: reg_free_request(country_ie_request); @@ -2086,11 +2081,8 @@ static void reg_process_hint(struct regulatory_request *reg_request) case NL80211_REGDOM_SET_BY_USER: treatment = reg_process_hint_user(reg_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET || - treatment == REG_REQ_USER_HINT_HANDLED) + treatment == REG_REQ_ALREADY_SET) return; - queue_delayed_work(system_power_efficient_wq, - ®_timeout, msecs_to_jiffies(3142)); return; case NL80211_REGDOM_SET_BY_DRIVER: if (!wiphy) @@ -2177,6 +2169,13 @@ static void reg_process_pending_hints(void) } reg_process_hint(reg_request); + + lr = get_last_request(); + + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list) && lr && lr->processed) + schedule_work(®_work); + spin_unlock(®_requests_lock); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -2304,27 +2303,58 @@ int regulatory_hint_user(const char *alpha2, request->initiator = NL80211_REGDOM_SET_BY_USER; request->user_reg_hint_type = user_reg_hint_type; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); return 0; } -int regulatory_hint_indoor_user(void) +int regulatory_hint_indoor(bool is_indoor, u32 portid) { - struct regulatory_request *request; + spin_lock(®_indoor_lock); - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); - if (!request) - return -ENOMEM; + /* It is possible that more than one user space process is trying to + * configure the indoor setting. To handle such cases, clear the indoor + * setting in case that some process does not think that the device + * is operating in an indoor environment. In addition, if a user space + * process indicates that it is controlling the indoor setting, save its + * portid, i.e., make it the owner. + */ + reg_is_indoor = is_indoor; + if (reg_is_indoor) { + if (!reg_is_indoor_portid) + reg_is_indoor_portid = portid; + } else { + reg_is_indoor_portid = 0; + } - request->wiphy_idx = WIPHY_IDX_INVALID; - request->initiator = NL80211_REGDOM_SET_BY_USER; - request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR; - queue_regulatory_request(request); + spin_unlock(®_indoor_lock); + + if (!is_indoor) + reg_check_channels(); return 0; } +void regulatory_netlink_notify(u32 portid) +{ + spin_lock(®_indoor_lock); + + if (reg_is_indoor_portid != portid) { + spin_unlock(®_indoor_lock); + return; + } + + reg_is_indoor = false; + reg_is_indoor_portid = 0; + + spin_unlock(®_indoor_lock); + + reg_check_channels(); +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2345,6 +2375,9 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_DRIVER; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); return 0; @@ -2398,6 +2431,9 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band, request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_env = env; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); request = NULL; out: @@ -2486,13 +2522,22 @@ static void restore_regulatory_settings(bool reset_user) char alpha2[2]; char world_alpha2[2]; struct reg_beacon *reg_beacon, *btmp; - struct regulatory_request *reg_request, *tmp; LIST_HEAD(tmp_reg_req_list); struct cfg80211_registered_device *rdev; ASSERT_RTNL(); - reg_is_indoor = false; + /* + * Clear the indoor setting in case that it is not controlled by user + * space, as otherwise there is no guarantee that the device is still + * operating in an indoor environment. + */ + spin_lock(®_indoor_lock); + if (reg_is_indoor && !reg_is_indoor_portid) { + reg_is_indoor = false; + reg_check_channels(); + } + spin_unlock(®_indoor_lock); reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -2504,11 +2549,7 @@ static void restore_regulatory_settings(bool reset_user) * settings. */ spin_lock(®_requests_lock); - list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { - if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) - continue; - list_move_tail(®_request->list, &tmp_reg_req_list); - } + list_splice_tail_init(®_requests_list, &tmp_reg_req_list); spin_unlock(®_requests_lock); /* Clear beacon hints */ @@ -2871,7 +2912,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, * multiple drivers can be ironed out later. Caller must've already * kmalloc'd the rd structure. */ -int set_regdom(const struct ieee80211_regdomain *rd) +int set_regdom(const struct ieee80211_regdomain *rd, + enum ieee80211_regd_source regd_src) { struct regulatory_request *lr; bool user_reset = false; @@ -2882,6 +2924,9 @@ int set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } + if (regd_src == REGD_SOURCE_CRDA) + reg_crda_timeouts = 0; + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -3041,6 +3086,7 @@ static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); rtnl_lock(); + reg_crda_timeouts++; restore_regulatory_settings(true); rtnl_unlock(); } @@ -3089,6 +3135,7 @@ int __init regulatory_init(void) spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); + spin_lock_init(®_indoor_lock); reg_regdb_size_check(); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4b45d6e61d24..9f495d76eca0 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -16,6 +16,11 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +enum ieee80211_regd_source { + REGD_SOURCE_INTERNAL_DB, + REGD_SOURCE_CRDA, +}; + extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool reg_is_valid_request(const char *alpha2); @@ -25,7 +30,20 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); -int regulatory_hint_indoor_user(void); + +/** + * regulatory_hint_indoor - hint operation in indoor env. or not + * @is_indoor: if true indicates that user space thinks that the + * device is operating in an indoor environment. + * @portid: the netlink port ID on which the hint was given. + */ +int regulatory_hint_indoor(bool is_indoor, u32 portid); + +/** + * regulatory_netlink_notify - notify on released netlink socket + * @portid: the netlink socket port ID + */ +void regulatory_netlink_notify(u32 portid); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); @@ -33,7 +51,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy); int __init regulatory_init(void); void regulatory_exit(void); -int set_regdom(const struct ieee80211_regdomain *rd); +int set_regdom(const struct ieee80211_regdomain *rd, + enum ieee80211_regd_source regd_src); + unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, const struct ieee80211_reg_rule *rule); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index c705c3e2b751..3a50aa2553bf 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -531,24 +531,78 @@ static int cmp_bss(struct cfg80211_bss *a, } } +static bool cfg80211_bss_type_match(u16 capability, + enum ieee80211_band band, + enum ieee80211_bss_type bss_type) +{ + bool ret = true; + u16 mask, val; + + if (bss_type == IEEE80211_BSS_TYPE_ANY) + return ret; + + if (band == IEEE80211_BAND_60GHZ) { + mask = WLAN_CAPABILITY_DMG_TYPE_MASK; + switch (bss_type) { + case IEEE80211_BSS_TYPE_ESS: + val = WLAN_CAPABILITY_DMG_TYPE_AP; + break; + case IEEE80211_BSS_TYPE_PBSS: + val = WLAN_CAPABILITY_DMG_TYPE_PBSS; + break; + case IEEE80211_BSS_TYPE_IBSS: + val = WLAN_CAPABILITY_DMG_TYPE_IBSS; + break; + default: + return false; + } + } else { + mask = WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS; + switch (bss_type) { + case IEEE80211_BSS_TYPE_ESS: + val = WLAN_CAPABILITY_ESS; + break; + case IEEE80211_BSS_TYPE_IBSS: + val = WLAN_CAPABILITY_IBSS; + break; + case IEEE80211_BSS_TYPE_MBSS: + val = 0; + break; + default: + return false; + } + } + + ret = ((capability & mask) == val); + return ret; +} + /* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, const u8 *ssid, size_t ssid_len, - u16 capa_mask, u16 capa_val) + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; unsigned long now = jiffies; + int bss_privacy; - trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask, - capa_val); + trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, bss_type, + privacy); spin_lock_bh(&rdev->bss_lock); list_for_each_entry(bss, &rdev->bss_list, list) { - if ((bss->pub.capability & capa_mask) != capa_val) + if (!cfg80211_bss_type_match(bss->pub.capability, + bss->pub.channel->band, bss_type)) + continue; + + bss_privacy = (bss->pub.capability & WLAN_CAPABILITY_PRIVACY); + if ((privacy == IEEE80211_PRIVACY_ON && !bss_privacy) || + (privacy == IEEE80211_PRIVACY_OFF && bss_privacy)) continue; if (channel && bss->pub.channel != channel) continue; @@ -896,6 +950,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; + int bss_type; bool signal_valid; if (WARN_ON(!wiphy)) @@ -950,8 +1005,15 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, if (!res) return NULL; - if (res->pub.capability & WLAN_CAPABILITY_ESS) - regulatory_hint_found_beacon(wiphy, channel, gfp); + if (channel->band == IEEE80211_BAND_60GHZ) { + bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; + if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || + bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } else { + if (res->pub.capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } trace_cfg80211_return_bss(&res->pub); /* cfg80211_bss_update gives us a referenced result */ @@ -973,6 +1035,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, bool signal_valid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); + int bss_type; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -1025,8 +1088,15 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, if (!res) return NULL; - if (res->pub.capability & WLAN_CAPABILITY_ESS) - regulatory_hint_found_beacon(wiphy, channel, gfp); + if (channel->band == IEEE80211_BAND_60GHZ) { + bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; + if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || + bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } else { + if (res->pub.capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } trace_cfg80211_return_bss(&res->pub); /* cfg80211_bss_update gives us a referenced result */ @@ -1237,17 +1307,17 @@ int cfg80211_wext_siwscan(struct net_device *dev, kfree(creq); return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); +EXPORT_WEXT_HANDLER(cfg80211_wext_siwscan); -static void ieee80211_scan_add_ies(struct iw_request_info *info, - const struct cfg80211_bss_ies *ies, - char **current_ev, char *end_buf) +static char *ieee80211_scan_add_ies(struct iw_request_info *info, + const struct cfg80211_bss_ies *ies, + char *current_ev, char *end_buf) { const u8 *pos, *end, *next; struct iw_event iwe; if (!ies) - return; + return current_ev; /* * If needed, fragment the IEs buffer (at IE boundaries) into short @@ -1264,10 +1334,11 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVGENIE; iwe.u.data.length = next - pos; - *current_ev = iwe_stream_add_point(info, *current_ev, - end_buf, &iwe, - (void *)pos); - + current_ev = iwe_stream_add_point_check(info, current_ev, + end_buf, &iwe, + (void *)pos); + if (IS_ERR(current_ev)) + return current_ev; pos = next; } @@ -1275,10 +1346,14 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVGENIE; iwe.u.data.length = end - pos; - *current_ev = iwe_stream_add_point(info, *current_ev, - end_buf, &iwe, - (void *)pos); + current_ev = iwe_stream_add_point_check(info, current_ev, + end_buf, &iwe, + (void *)pos); + if (IS_ERR(current_ev)) + return current_ev; } + + return current_ev; } static char * @@ -1289,7 +1364,8 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, const struct cfg80211_bss_ies *ies; struct iw_event iwe; const u8 *ie; - u8 *buf, *cfg, *p; + u8 buf[50]; + u8 *cfg, *p, *tmp; int rem, i, sig; bool ismesh = false; @@ -1297,22 +1373,28 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, iwe.cmd = SIOCGIWAP; iwe.u.ap_addr.sa_family = ARPHRD_ETHER; memcpy(iwe.u.ap_addr.sa_data, bss->pub.bssid, ETH_ALEN); - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_ADDR_LEN); + current_ev = iwe_stream_add_event_check(info, current_ev, end_buf, &iwe, + IW_EV_ADDR_LEN); + if (IS_ERR(current_ev)) + return current_ev; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; iwe.u.freq.m = ieee80211_frequency_to_channel(bss->pub.channel->center_freq); iwe.u.freq.e = 0; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); + current_ev = iwe_stream_add_event_check(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + if (IS_ERR(current_ev)) + return current_ev; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; iwe.u.freq.m = bss->pub.channel->center_freq; iwe.u.freq.e = 6; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); + current_ev = iwe_stream_add_event_check(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + if (IS_ERR(current_ev)) + return current_ev; if (wiphy->signal_type != CFG80211_SIGNAL_TYPE_NONE) { memset(&iwe, 0, sizeof(iwe)); @@ -1341,8 +1423,11 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, /* not reached */ break; } - current_ev = iwe_stream_add_event(info, current_ev, end_buf, - &iwe, IW_EV_QUAL_LEN); + current_ev = iwe_stream_add_event_check(info, current_ev, + end_buf, &iwe, + IW_EV_QUAL_LEN); + if (IS_ERR(current_ev)) + return current_ev; } memset(&iwe, 0, sizeof(iwe)); @@ -1352,8 +1437,10 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, else iwe.u.data.flags = IW_ENCODE_DISABLED; iwe.u.data.length = 0; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, ""); + current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, + &iwe, ""); + if (IS_ERR(current_ev)) + return current_ev; rcu_read_lock(); ies = rcu_dereference(bss->pub.ies); @@ -1371,66 +1458,91 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, iwe.cmd = SIOCGIWESSID; iwe.u.data.length = ie[1]; iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, (u8 *)ie + 2); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, &iwe, + (u8 *)ie + 2); + if (IS_ERR(current_ev)) + goto unlock; break; case WLAN_EID_MESH_ID: memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWESSID; iwe.u.data.length = ie[1]; iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, (u8 *)ie + 2); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, &iwe, + (u8 *)ie + 2); + if (IS_ERR(current_ev)) + goto unlock; break; case WLAN_EID_MESH_CONFIG: ismesh = true; if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) break; - buf = kmalloc(50, GFP_ATOMIC); - if (!buf) - break; cfg = (u8 *)ie + 2; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; sprintf(buf, "Mesh Network Path Selection Protocol ID: " "0x%02X", cfg[0]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Path Selection Metric ID: 0x%02X", cfg[1]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Congestion Control Mode ID: 0x%02X", cfg[2]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Synchronization ID: 0x%02X", cfg[3]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Authentication ID: 0x%02X", cfg[4]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Formation Info: 0x%02X", cfg[5]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; sprintf(buf, "Capabilities: 0x%02X", cfg[6]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - kfree(buf); + current_ev = iwe_stream_add_point_check(info, + current_ev, + end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; break; case WLAN_EID_SUPP_RATES: case WLAN_EID_EXT_SUPP_RATES: @@ -1445,8 +1557,14 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, for (i = 0; i < ie[1]; i++) { iwe.u.bitrate.value = ((ie[i + 2] & 0x7f) * 500000); + tmp = p; p = iwe_stream_add_value(info, current_ev, p, - end_buf, &iwe, IW_EV_PARAM_LEN); + end_buf, &iwe, + IW_EV_PARAM_LEN); + if (p == tmp) { + current_ev = ERR_PTR(-E2BIG); + goto unlock; + } } current_ev = p; break; @@ -1465,31 +1583,35 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, iwe.u.mode = IW_MODE_MASTER; else iwe.u.mode = IW_MODE_ADHOC; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, - &iwe, IW_EV_UINT_LEN); - } - - buf = kmalloc(31, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, buf); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %ums ago", - elapsed_jiffies_msecs(bss->ts)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, &iwe, buf); - kfree(buf); + current_ev = iwe_stream_add_event_check(info, current_ev, + end_buf, &iwe, + IW_EV_UINT_LEN); + if (IS_ERR(current_ev)) + goto unlock; } - ieee80211_scan_add_ies(info, ies, ¤t_ev, end_buf); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, + &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, " Last beacon: %ums ago", + elapsed_jiffies_msecs(bss->ts)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point_check(info, current_ev, + end_buf, &iwe, buf); + if (IS_ERR(current_ev)) + goto unlock; + + current_ev = ieee80211_scan_add_ies(info, ies, current_ev, end_buf); + + unlock: rcu_read_unlock(); - return current_ev; } @@ -1501,19 +1623,27 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *rdev, char *current_ev = buf; char *end_buf = buf + len; struct cfg80211_internal_bss *bss; + int err = 0; spin_lock_bh(&rdev->bss_lock); cfg80211_bss_expire(rdev); list_for_each_entry(bss, &rdev->bss_list, list) { if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&rdev->bss_lock); - return -E2BIG; + err = -E2BIG; + break; } current_ev = ieee80211_bss(&rdev->wiphy, info, bss, current_ev, end_buf); + if (IS_ERR(current_ev)) { + err = PTR_ERR(current_ev); + break; + } } spin_unlock_bh(&rdev->bss_lock); + + if (err) + return err; return current_ev - buf; } @@ -1545,5 +1675,5 @@ int cfg80211_wext_giwscan(struct net_device *dev, return res; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwscan); #endif diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0ab3711c79a0..d11454f87bac 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -42,7 +42,7 @@ struct cfg80211_conn { CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; - u8 *ie; + const u8 *ie; size_t ie_len; bool auto_auth, prev_bssid_valid; }; @@ -257,19 +257,15 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; - u16 capa = WLAN_CAPABILITY_ESS; ASSERT_WDEV_LOCK(wdev); - if (wdev->conn->params.privacy) - capa |= WLAN_CAPABILITY_PRIVACY; - bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, wdev->conn->params.bssid, wdev->conn->params.ssid, wdev->conn->params.ssid_len, - WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY, - capa); + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY(wdev->conn->params.privacy)); if (!bss) return NULL; @@ -427,6 +423,62 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, + const u8 *ies, size_t ies_len, + const u8 **out_ies, size_t *out_ies_len) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + u8 *buf; + size_t offs; + + if (!rdev->wiphy.extended_capabilities_len || + (ies && cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY, ies, ies_len))) { + *out_ies = kmemdup(ies, ies_len, GFP_KERNEL); + if (!*out_ies) + return -ENOMEM; + *out_ies_len = ies_len; + return 0; + } + + buf = kmalloc(ies_len + rdev->wiphy.extended_capabilities_len + 2, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (ies_len) { + static const u8 before_extcapa[] = { + /* not listing IEs expected to be created by driver */ + WLAN_EID_RSN, + WLAN_EID_QOS_CAPA, + WLAN_EID_RRM_ENABLED_CAPABILITIES, + WLAN_EID_MOBILITY_DOMAIN, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + WLAN_EID_BSS_COEX_2040, + }; + + offs = ieee80211_ie_split(ies, ies_len, before_extcapa, + ARRAY_SIZE(before_extcapa), 0); + memcpy(buf, ies, offs); + /* leave a whole for extended capabilities IE */ + memcpy(buf + offs + rdev->wiphy.extended_capabilities_len + 2, + ies + offs, ies_len - offs); + } else { + offs = 0; + } + + /* place extended capabilities IE (with only driver capabilities) */ + buf[offs] = WLAN_EID_EXT_CAPABILITY; + buf[offs + 1] = rdev->wiphy.extended_capabilities_len; + memcpy(buf + offs + 2, + rdev->wiphy.extended_capabilities, + rdev->wiphy.extended_capabilities_len); + + *out_ies = buf; + *out_ies_len = ies_len + rdev->wiphy.extended_capabilities_len + 2; + + return 0; +} + static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) @@ -457,16 +509,14 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); } - if (connect->ie) { - wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, - GFP_KERNEL); - wdev->conn->params.ie = wdev->conn->ie; - if (!wdev->conn->ie) { - kfree(wdev->conn); - wdev->conn = NULL; - return -ENOMEM; - } + if (cfg80211_sme_get_conn_ies(wdev, connect->ie, connect->ie_len, + &wdev->conn->ie, + &wdev->conn->params.ie_len)) { + kfree(wdev->conn); + wdev->conn = NULL; + return -ENOMEM; } + wdev->conn->params.ie = wdev->conn->ie; if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { wdev->conn->auto_auth = true; @@ -637,8 +687,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_ANY); if (bss) cfg80211_hold_bss(bss_from_pub(bss)); } @@ -795,8 +845,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_bss *bss; bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, - wdev->ssid_len, WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); + wdev->ssid_len, + IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); if (WARN_ON(!bss)) return; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b17b3692f8c2..af3617c9879e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -7,6 +7,7 @@ #include <linux/tracepoint.h> #include <linux/rtnetlink.h> +#include <linux/etherdevice.h> #include <net/cfg80211.h> #include "core.h" @@ -15,7 +16,7 @@ if (given_mac) \ memcpy(__entry->entry_mac, given_mac, ETH_ALEN); \ else \ - memset(__entry->entry_mac, 0, ETH_ALEN); \ + eth_zero_addr(__entry->entry_mac); \ } while (0) #define MAC_PR_FMT "%pM" #define MAC_PR_ARG(entry_mac) (__entry->entry_mac) @@ -627,6 +628,7 @@ DECLARE_EVENT_CLASS(station_add_change, __field(u8, plink_state) __field(u8, uapsd_queues) __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap)) + __array(char, vlan, IFNAMSIZ) ), TP_fast_assign( WIPHY_ASSIGN; @@ -644,16 +646,19 @@ DECLARE_EVENT_CLASS(station_add_change, if (params->ht_capa) memcpy(__entry->ht_capa, params->ht_capa, sizeof(struct ieee80211_ht_cap)); + memset(__entry->vlan, 0, sizeof(__entry->vlan)); + if (params->vlan) + memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT ", station flags mask: %u, station flags set: %u, " "station modify mask: %u, listen interval: %d, aid: %u, " - "plink action: %u, plink state: %u, uapsd queues: %u", + "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac), __entry->sta_flags_mask, __entry->sta_flags_set, __entry->sta_modify_mask, __entry->listen_interval, __entry->aid, __entry->plink_action, __entry->plink_state, - __entry->uapsd_queues) + __entry->uapsd_queues, __entry->vlan) ); DEFINE_EVENT(station_add_change, rdev_add_station, @@ -1077,7 +1082,7 @@ TRACE_EVENT(rdev_auth, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); __entry->auth_type = req->auth_type; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: " MAC_PR_FMT, @@ -1103,7 +1108,7 @@ TRACE_EVENT(rdev_assoc, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); MAC_ASSIGN(prev_bssid, req->prev_bssid); __entry->use_mfp = req->use_mfp; __entry->flags = req->flags; @@ -1153,7 +1158,7 @@ TRACE_EVENT(rdev_disassoc, if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else - memset(__entry->bssid, 0, ETH_ALEN); + eth_zero_addr(__entry->bssid); __entry->reason_code = req->reason_code; __entry->local_state_change = req->local_state_change; ), @@ -2636,28 +2641,30 @@ DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_stopped, TRACE_EVENT(cfg80211_get_bss, TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, const u8 *ssid, size_t ssid_len, - u16 capa_mask, u16 capa_val), - TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, capa_mask, capa_val), + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy), + TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, bss_type, privacy), TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY MAC_ENTRY(bssid) __dynamic_array(u8, ssid, ssid_len) - __field(u16, capa_mask) - __field(u16, capa_val) + __field(enum ieee80211_bss_type, bss_type) + __field(enum ieee80211_privacy, privacy) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(channel); MAC_ASSIGN(bssid, bssid); memcpy(__get_dynamic_array(ssid), ssid, ssid_len); - __entry->capa_mask = capa_mask; - __entry->capa_val = capa_val; - ), - TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", " MAC_PR_FMT ", buf: %#.2x, " - "capa_mask: %d, capa_val: %u", WIPHY_PR_ARG, CHAN_PR_ARG, - MAC_PR_ARG(bssid), ((u8 *)__get_dynamic_array(ssid))[0], - __entry->capa_mask, __entry->capa_val) + __entry->bss_type = bss_type; + __entry->privacy = privacy; + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", " MAC_PR_FMT + ", buf: %#.2x, bss_type: %d, privacy: %d", + WIPHY_PR_ARG, CHAN_PR_ARG, MAC_PR_ARG(bssid), + ((u8 *)__get_dynamic_array(ssid))[0], __entry->bss_type, + __entry->privacy) ); TRACE_EVENT(cfg80211_inform_bss_width_frame, diff --git a/net/wireless/util.c b/net/wireless/util.c index 6903dbdcb8c1..70051ab52f4f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1290,12 +1290,54 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +{ + int i; + + for (i = 0; i < n_ids; i++) + if (ids[i] == id) + return true; + return false; +} + +size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, + const u8 *after_ric, int n_after_ric, + size_t offset) +{ + size_t pos = offset; + + while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { + if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { + pos += 2 + ies[pos + 1]; + + while (pos < ielen && + !ieee80211_id_in_list(after_ric, n_after_ric, + ies[pos])) + pos += 2 + ies[pos + 1]; + } else { + pos += 2 + ies[pos + 1]; + } + } + + return pos; +} +EXPORT_SYMBOL(ieee80211_ie_split_ric); + +size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset) +{ + return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); +} +EXPORT_SYMBOL(ieee80211_ie_split); + bool ieee80211_operating_class_to_band(u8 operating_class, enum ieee80211_band *band) { switch (operating_class) { case 112: case 115 ... 127: + case 128 ... 130: *band = IEEE80211_BAND_5GHZ; return true; case 81: @@ -1313,6 +1355,135 @@ bool ieee80211_operating_class_to_band(u8 operating_class, } EXPORT_SYMBOL(ieee80211_operating_class_to_band); +bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, + u8 *op_class) +{ + u8 vht_opclass; + u16 freq = chandef->center_freq1; + + if (freq >= 2412 && freq <= 2472) { + if (chandef->width > NL80211_CHAN_WIDTH_40) + return false; + + /* 2.407 GHz, channels 1..13 */ + if (chandef->width == NL80211_CHAN_WIDTH_40) { + if (freq > chandef->chan->center_freq) + *op_class = 83; /* HT40+ */ + else + *op_class = 84; /* HT40- */ + } else { + *op_class = 81; + } + + return true; + } + + if (freq == 2484) { + if (chandef->width > NL80211_CHAN_WIDTH_40) + return false; + + *op_class = 82; /* channel 14 */ + return true; + } + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80: + vht_opclass = 128; + break; + case NL80211_CHAN_WIDTH_160: + vht_opclass = 129; + break; + case NL80211_CHAN_WIDTH_80P80: + vht_opclass = 130; + break; + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_5: + return false; /* unsupported for now */ + default: + vht_opclass = 0; + break; + } + + /* 5 GHz, channels 36..48 */ + if (freq >= 5180 && freq <= 5240) { + if (vht_opclass) { + *op_class = vht_opclass; + } else if (chandef->width == NL80211_CHAN_WIDTH_40) { + if (freq > chandef->chan->center_freq) + *op_class = 116; + else + *op_class = 117; + } else { + *op_class = 115; + } + + return true; + } + + /* 5 GHz, channels 52..64 */ + if (freq >= 5260 && freq <= 5320) { + if (vht_opclass) { + *op_class = vht_opclass; + } else if (chandef->width == NL80211_CHAN_WIDTH_40) { + if (freq > chandef->chan->center_freq) + *op_class = 119; + else + *op_class = 120; + } else { + *op_class = 118; + } + + return true; + } + + /* 5 GHz, channels 100..144 */ + if (freq >= 5500 && freq <= 5720) { + if (vht_opclass) { + *op_class = vht_opclass; + } else if (chandef->width == NL80211_CHAN_WIDTH_40) { + if (freq > chandef->chan->center_freq) + *op_class = 122; + else + *op_class = 123; + } else { + *op_class = 121; + } + + return true; + } + + /* 5 GHz, channels 149..169 */ + if (freq >= 5745 && freq <= 5845) { + if (vht_opclass) { + *op_class = vht_opclass; + } else if (chandef->width == NL80211_CHAN_WIDTH_40) { + if (freq > chandef->chan->center_freq) + *op_class = 126; + else + *op_class = 127; + } else if (freq <= 5805) { + *op_class = 124; + } else { + *op_class = 125; + } + + return true; + } + + /* 56.16 GHz, channel 1..4 */ + if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 4) { + if (chandef->width >= NL80211_CHAN_WIDTH_40) + return false; + + *op_class = 180; + return true; + } + + /* not supported yet */ + return false; +} +EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); + int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, u32 beacon_int) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5b24d39d7903..fff1bef6ed6d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -63,7 +63,7 @@ int cfg80211_wext_giwname(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwname); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwname); int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, u32 *mode, char *extra) @@ -99,7 +99,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); +EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode); int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, u32 *mode, char *extra) @@ -134,7 +134,7 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, } return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwmode); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwmode); int cfg80211_wext_giwrange(struct net_device *dev, @@ -248,7 +248,7 @@ int cfg80211_wext_giwrange(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); /** @@ -303,7 +303,7 @@ int cfg80211_wext_siwrts(struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwrts); +EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts); int cfg80211_wext_giwrts(struct net_device *dev, struct iw_request_info *info, @@ -317,7 +317,7 @@ int cfg80211_wext_giwrts(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwrts); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwrts); int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_request_info *info, @@ -343,7 +343,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwfrag); +EXPORT_WEXT_HANDLER(cfg80211_wext_siwfrag); int cfg80211_wext_giwfrag(struct net_device *dev, struct iw_request_info *info, @@ -357,7 +357,7 @@ int cfg80211_wext_giwfrag(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwfrag); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwfrag); static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_request_info *info, @@ -427,7 +427,7 @@ int cfg80211_wext_giwretry(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); +EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry); static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, struct net_device *dev, bool pairwise, diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index ebcacca2f731..94c7405a5413 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -4,6 +4,12 @@ #include <net/iw_handler.h> #include <linux/wireless.h> +#ifdef CONFIG_CFG80211_WEXT_EXPORT +#define EXPORT_WEXT_HANDLER(h) EXPORT_SYMBOL_GPL(h) +#else +#define EXPORT_WEXT_HANDLER(h) +#endif /* CONFIG_CFG80211_WEXT_EXPORT */ + int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 368611c05739..a4e8af3321d2 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -322,7 +322,7 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, if (wdev->current_bss) memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); else - memset(ap_addr->sa_data, 0, ETH_ALEN); + eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); return 0; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d9149b68b9bc..c3ab230e4493 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1077,8 +1077,7 @@ out_clear_request: goto out; } -static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); @@ -1252,8 +1251,7 @@ out_kfree_skb: } -static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, +static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 85d1d4764612..526c4feb3b50 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; - if (xfrm_tunnel_check(skb, x, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); XFRM_SKB_CB(skb)->seq.input.low = seq; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 7c532856b398..fbcedbe33190 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -19,7 +19,7 @@ #include <net/dst.h> #include <net/xfrm.h> -static int xfrm_output2(struct sk_buff *skb); +static int xfrm_output2(struct sock *sk, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -130,7 +130,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) return dst_output(skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, skb, + NF_INET_POST_ROUTING, skb->sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -144,12 +144,12 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output_resume); -static int xfrm_output2(struct sk_buff *skb) +static int xfrm_output2(struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(skb, 1); } -static int xfrm_output_gso(struct sk_buff *skb) +static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; @@ -165,7 +165,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = xfrm_output2(segs); + err = xfrm_output2(sk, segs); if (unlikely(err)) { kfree_skb_list(nskb); @@ -178,13 +178,13 @@ static int xfrm_output_gso(struct sk_buff *skb) return 0; } -int xfrm_output(struct sk_buff *skb) +int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); int err; if (skb_is_gso(skb)) - return xfrm_output_gso(skb); + return xfrm_output_gso(sk, skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); @@ -195,7 +195,7 @@ int xfrm_output(struct sk_buff *skb) } } - return xfrm_output2(skb); + return xfrm_output2(sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index de971b6d38c5..f5e39e35d73a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1043,12 +1043,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, break; case AF_INET6: - *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; - *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; + x->sel.daddr.in6 = daddr->in6; + x->sel.saddr.in6 = saddr->in6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; - *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; - *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; + x->props.saddr.in6 = saddr->in6; + x->id.daddr.in6 = daddr->in6; break; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7de2ed9ec46d..2091664295ba 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2423,6 +2423,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct xfrm_link *link; int type, err; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + return -ENOTSUPP; +#endif + type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) return -EINVAL; |