diff options
Diffstat (limited to 'net')
134 files changed, 3792 insertions, 2964 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 78c8a495b571..346b5c1a9185 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -49,36 +49,178 @@ #include <linux/bitops.h> #include <linux/if_arp.h> #include <linux/netdevice.h> + #include <net/6lowpan.h> #include <net/ipv6.h> -#include <net/af_ieee802154.h> + +/* special link-layer handling */ +#include <net/mac802154.h> #include "nhc.h" +/* Values of fields within the IPHC encoding first byte */ +#define LOWPAN_IPHC_TF_MASK 0x18 +#define LOWPAN_IPHC_TF_00 0x00 +#define LOWPAN_IPHC_TF_01 0x08 +#define LOWPAN_IPHC_TF_10 0x10 +#define LOWPAN_IPHC_TF_11 0x18 + +#define LOWPAN_IPHC_NH 0x04 + +#define LOWPAN_IPHC_HLIM_MASK 0x03 +#define LOWPAN_IPHC_HLIM_00 0x00 +#define LOWPAN_IPHC_HLIM_01 0x01 +#define LOWPAN_IPHC_HLIM_10 0x02 +#define LOWPAN_IPHC_HLIM_11 0x03 + +/* Values of fields within the IPHC encoding second byte */ +#define LOWPAN_IPHC_CID 0x80 + +#define LOWPAN_IPHC_SAC 0x40 + +#define LOWPAN_IPHC_SAM_MASK 0x30 +#define LOWPAN_IPHC_SAM_00 0x00 +#define LOWPAN_IPHC_SAM_01 0x10 +#define LOWPAN_IPHC_SAM_10 0x20 +#define LOWPAN_IPHC_SAM_11 0x30 + +#define LOWPAN_IPHC_M 0x08 + +#define LOWPAN_IPHC_DAC 0x04 + +#define LOWPAN_IPHC_DAM_MASK 0x03 +#define LOWPAN_IPHC_DAM_00 0x00 +#define LOWPAN_IPHC_DAM_01 0x01 +#define LOWPAN_IPHC_DAM_10 0x02 +#define LOWPAN_IPHC_DAM_11 0x03 + +/* ipv6 address based on mac + * second bit-flip (Universe/Local) is done according RFC2464 + */ +#define is_addr_mac_addr_based(a, m) \ + ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \ + (((a)->s6_addr[9]) == (m)[1]) && \ + (((a)->s6_addr[10]) == (m)[2]) && \ + (((a)->s6_addr[11]) == (m)[3]) && \ + (((a)->s6_addr[12]) == (m)[4]) && \ + (((a)->s6_addr[13]) == (m)[5]) && \ + (((a)->s6_addr[14]) == (m)[6]) && \ + (((a)->s6_addr[15]) == (m)[7])) + +/* check whether we can compress the IID to 16 bits, + * it's possible for unicast addresses with first 49 bits are zero only. + */ +#define lowpan_is_iid_16_bit_compressable(a) \ + ((((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr[10]) == 0) && \ + (((a)->s6_addr[11]) == 0xff) && \ + (((a)->s6_addr[12]) == 0xfe) && \ + (((a)->s6_addr[13]) == 0)) + +/* check whether the 112-bit gid of the multicast address is mappable to: */ + +/* 48 bits, FFXX::00XX:XXXX:XXXX */ +#define lowpan_is_mcast_addr_compressable48(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr[10]) == 0)) + +/* 32 bits, FFXX::00XX:XXXX */ +#define lowpan_is_mcast_addr_compressable32(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr[12]) == 0)) + +/* 8 bits, FF02::00XX */ +#define lowpan_is_mcast_addr_compressable8(a) \ + ((((a)->s6_addr[1]) == 2) && \ + (((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0)) + +static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; +} + +static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + const struct ieee802154_addr *addr = lladdr; + u8 eui64[EUI64_ADDR_LEN] = { }; + + switch (addr->mode) { + case IEEE802154_ADDR_LONG: + ieee802154_le64_to_be64(eui64, &addr->extended_addr); + iphc_uncompress_eui64_lladdr(ipaddr, eui64); + break; + case IEEE802154_ADDR_SHORT: + /* fe:80::ff:fe00:XXXX + * \__/ + * short_addr + * + * Universe/Local bit is zero. + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + ieee802154_le16_to_be16(&ipaddr->s6_addr16[7], + &addr->short_addr); + break; + default: + /* should never handled and filtered by 802154 6lowpan */ + WARN_ON_ONCE(1); + break; + } +} + /* Uncompress address function for source and * destination address(non-multicast). * - * address_mode is sam value or dam value. + * address_mode is the masked value for sam or dam value */ -static int uncompress_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, const u8 address_mode, - const u8 *lladdr, const u8 addr_type, - const u8 addr_len) +static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, + struct in6_addr *ipaddr, u8 address_mode, + const void *lladdr) { bool fail; switch (address_mode) { - case LOWPAN_IPHC_ADDR_00: + /* SAM and DAM are the same here */ + case LOWPAN_IPHC_DAM_00: /* for global link addresses */ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); break; - case LOWPAN_IPHC_ADDR_01: + case LOWPAN_IPHC_SAM_01: + case LOWPAN_IPHC_DAM_01: /* fe:80::XXXX:XXXX:XXXX:XXXX */ ipaddr->s6_addr[0] = 0xFE; ipaddr->s6_addr[1] = 0x80; fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); break; - case LOWPAN_IPHC_ADDR_02: + case LOWPAN_IPHC_SAM_10: + case LOWPAN_IPHC_DAM_10: /* fe:80::ff:fe00:XXXX */ ipaddr->s6_addr[0] = 0xFE; ipaddr->s6_addr[1] = 0x80; @@ -86,38 +228,16 @@ static int uncompress_addr(struct sk_buff *skb, ipaddr->s6_addr[12] = 0xFE; fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); break; - case LOWPAN_IPHC_ADDR_03: + case LOWPAN_IPHC_SAM_11: + case LOWPAN_IPHC_DAM_11: fail = false; - switch (addr_type) { - case IEEE802154_ADDR_LONG: - /* fe:80::XXXX:XXXX:XXXX:XXXX - * \_________________/ - * hwaddr - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - memcpy(&ipaddr->s6_addr[8], lladdr, addr_len); - /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - ipaddr->s6_addr[8] ^= 0x02; - break; - case IEEE802154_ADDR_SHORT: - /* fe:80::ff:fe00:XXXX - * \__/ - * short_addr - * - * Universe/Local bit is zero. - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - ipaddr->s6_addr[11] = 0xFF; - ipaddr->s6_addr[12] = 0xFE; - ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr)); + switch (lowpan_priv(dev)->lltype) { + case LOWPAN_LLTYPE_IEEE802154: + iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - pr_debug("Invalid addr_type set\n"); - return -EINVAL; + iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + break; } break; default: @@ -141,24 +261,25 @@ static int uncompress_addr(struct sk_buff *skb, */ static int uncompress_context_based_src_addr(struct sk_buff *skb, struct in6_addr *ipaddr, - const u8 sam) + u8 address_mode) { - switch (sam) { - case LOWPAN_IPHC_ADDR_00: + switch (address_mode) { + case LOWPAN_IPHC_SAM_00: /* unspec address :: * Do nothing, address is already :: */ break; - case LOWPAN_IPHC_ADDR_01: + case LOWPAN_IPHC_SAM_01: /* TODO */ - case LOWPAN_IPHC_ADDR_02: + case LOWPAN_IPHC_SAM_10: /* TODO */ - case LOWPAN_IPHC_ADDR_03: + case LOWPAN_IPHC_SAM_11: /* TODO */ - netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); + netdev_warn(skb->dev, "SAM value 0x%x not supported\n", + address_mode); return -EINVAL; default: - pr_debug("Invalid sam value: 0x%x\n", sam); + pr_debug("Invalid sam value: 0x%x\n", address_mode); return -EINVAL; } @@ -174,11 +295,11 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb, */ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, struct in6_addr *ipaddr, - const u8 dam) + u8 address_mode) { bool fail; - switch (dam) { + switch (address_mode) { case LOWPAN_IPHC_DAM_00: /* 00: 128 bits. The full address * is carried in-line. @@ -210,7 +331,7 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); break; default: - pr_debug("DAM value has a wrong value: 0x%x\n", dam); + pr_debug("DAM value has a wrong value: 0x%x\n", address_mode); return -EINVAL; } @@ -225,77 +346,142 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, return 0; } -/* TTL uncompression values */ -static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; - -int -lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, - const u8 *saddr, const u8 saddr_type, - const u8 saddr_len, const u8 *daddr, - const u8 daddr_type, const u8 daddr_len, - u8 iphc0, u8 iphc1) +/* get the ecn values from iphc tf format and set it to ipv6hdr */ +static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf) { - struct ipv6hdr hdr = {}; - u8 tmp, num_context = 0; - int err; + /* get the two higher bits which is ecn */ + u8 ecn = tf[0] & 0xc0; - raw_dump_table(__func__, "raw skb data dump uncompressed", - skb->data, skb->len); + /* ECN takes 0x30 in hdr->flow_lbl[0] */ + hdr->flow_lbl[0] |= (ecn >> 2); +} - /* another if the CID flag is set */ - if (iphc1 & LOWPAN_IPHC_CID) { - pr_debug("CID flag is set, increase header with one\n"); - if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context))) - return -EINVAL; - } +/* get the dscp values from iphc tf format and set it to ipv6hdr */ +static inline void lowpan_iphc_tf_set_dscp(struct ipv6hdr *hdr, const u8 *tf) +{ + /* DSCP is at place after ECN */ + u8 dscp = tf[0] & 0x3f; - hdr.version = 6; + /* The four highest bits need to be set at hdr->priority */ + hdr->priority |= ((dscp & 0x3c) >> 2); + /* The two lower bits is part of hdr->flow_lbl[0] */ + hdr->flow_lbl[0] |= ((dscp & 0x03) << 6); +} - /* Traffic Class and Flow Label */ - switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { - /* Traffic Class and FLow Label carried in-line - * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) +/* get the flow label values from iphc tf format and set it to ipv6hdr */ +static inline void lowpan_iphc_tf_set_lbl(struct ipv6hdr *hdr, const u8 *lbl) +{ + /* flow label is always some array started with lower nibble of + * flow_lbl[0] and followed with two bytes afterwards. Inside inline + * data the flow_lbl position can be different, which will be handled + * by lbl pointer. E.g. case "01" vs "00" the traffic class is 8 bit + * shifted, the different lbl pointer will handle that. + * + * The flow label will started at lower nibble of flow_lbl[0], the + * higher nibbles are part of DSCP + ECN. */ - case 0: /* 00b */ - if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) + hdr->flow_lbl[0] |= lbl[0] & 0x0f; + memcpy(&hdr->flow_lbl[1], &lbl[1], 2); +} + +/* lowpan_iphc_tf_decompress - decompress the traffic class. + * This function will return zero on success, a value lower than zero if + * failed. + */ +static int lowpan_iphc_tf_decompress(struct sk_buff *skb, struct ipv6hdr *hdr, + u8 val) +{ + u8 tf[4]; + + /* Traffic Class and Flow Label */ + switch (val) { + case LOWPAN_IPHC_TF_00: + /* ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */ + if (lowpan_fetch_skb(skb, tf, 4)) return -EINVAL; - memcpy(&hdr.flow_lbl, &skb->data[0], 3); - skb_pull(skb, 3); - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | - (hdr.flow_lbl[0] & 0x0f); + /* 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |ECN| DSCP | rsv | Flow Label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + lowpan_iphc_tf_set_ecn(hdr, tf); + lowpan_iphc_tf_set_dscp(hdr, tf); + lowpan_iphc_tf_set_lbl(hdr, &tf[1]); break; - /* Traffic class carried in-line - * ECN + DSCP (1 byte), Flow Label is elided - */ - case 2: /* 10b */ - if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) + case LOWPAN_IPHC_TF_01: + /* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided. */ + if (lowpan_fetch_skb(skb, tf, 3)) return -EINVAL; - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + /* 1 2 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |ECN|rsv| Flow Label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + lowpan_iphc_tf_set_ecn(hdr, tf); + lowpan_iphc_tf_set_lbl(hdr, &tf[0]); break; - /* Flow Label carried in-line - * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided - */ - case 1: /* 01b */ - if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) + case LOWPAN_IPHC_TF_10: + /* ECN + DSCP (1 byte), Flow Label is elided. */ + if (lowpan_fetch_skb(skb, tf, 1)) return -EINVAL; - hdr.flow_lbl[0] = (tmp & 0x0F) | ((tmp >> 2) & 0x30); - memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); - skb_pull(skb, 2); + /* 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+ + * |ECN| DSCP | + * +-+-+-+-+-+-+-+-+ + */ + lowpan_iphc_tf_set_ecn(hdr, tf); + lowpan_iphc_tf_set_dscp(hdr, tf); break; - /* Traffic Class and Flow Label are elided */ - case 3: /* 11b */ + case LOWPAN_IPHC_TF_11: + /* Traffic Class and Flow Label are elided */ break; default: - break; + WARN_ON_ONCE(1); + return -EINVAL; } + return 0; +} + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = { + [LOWPAN_IPHC_HLIM_01] = 1, + [LOWPAN_IPHC_HLIM_10] = 64, + [LOWPAN_IPHC_HLIM_11] = 255, +}; + +int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, + const void *daddr, const void *saddr) +{ + struct ipv6hdr hdr = {}; + u8 iphc0, iphc1; + int err; + + raw_dump_table(__func__, "raw skb data dump uncompressed", + skb->data, skb->len); + + if (lowpan_fetch_skb(skb, &iphc0, sizeof(iphc0)) || + lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1))) + return -EINVAL; + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) + return -ENOTSUPP; + + hdr.version = 6; + + err = lowpan_iphc_tf_decompress(skb, &hdr, + iphc0 & LOWPAN_IPHC_TF_MASK); + if (err < 0) + return err; + /* Next Header */ - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + if (!(iphc0 & LOWPAN_IPHC_NH)) { /* Next header is carried inline */ if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr))) return -EINVAL; @@ -305,35 +491,30 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, } /* Hop Limit */ - if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) { - hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + if ((iphc0 & LOWPAN_IPHC_HLIM_MASK) != LOWPAN_IPHC_HLIM_00) { + hdr.hop_limit = lowpan_ttl_values[iphc0 & LOWPAN_IPHC_HLIM_MASK]; } else { if (lowpan_fetch_skb(skb, &hdr.hop_limit, sizeof(hdr.hop_limit))) return -EINVAL; } - /* Extract SAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; - if (iphc1 & LOWPAN_IPHC_SAC) { /* Source address context based uncompression */ pr_debug("SAC bit is set. Handle context based source address.\n"); - err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp); + err = uncompress_context_based_src_addr(skb, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK); } else { /* Source address uncompression */ pr_debug("source address stateless compression\n"); - err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, - saddr_type, saddr_len); + err = uncompress_addr(skb, dev, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); } /* Check on error of previous branch */ if (err) return -EINVAL; - /* Extract DAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; - /* check for Multicast Compression */ if (iphc1 & LOWPAN_IPHC_M) { if (iphc1 & LOWPAN_IPHC_DAC) { @@ -341,22 +522,22 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, /* TODO: implement this */ } else { err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, - tmp); + iphc1 & LOWPAN_IPHC_DAM_MASK); if (err) return -EINVAL; } } else { - err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, - daddr_type, daddr_len); + err = uncompress_addr(skb, dev, &hdr.daddr, + iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); pr_debug("dest: stateless compression mode %d dest %pI6c\n", - tmp, &hdr.daddr); + iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); if (err) return -EINVAL; } /* Next header data uncompression */ - if (iphc0 & LOWPAN_IPHC_NH_C) { + if (iphc0 & LOWPAN_IPHC_NH) { err = lowpan_nhc_do_uncompression(skb, dev, &hdr); if (err < 0) return err; @@ -397,42 +578,176 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(lowpan_header_decompress); -static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift, - const struct in6_addr *ipaddr, - const unsigned char *lladdr) +static const u8 lowpan_iphc_dam_to_sam_value[] = { + [LOWPAN_IPHC_DAM_00] = LOWPAN_IPHC_SAM_00, + [LOWPAN_IPHC_DAM_01] = LOWPAN_IPHC_SAM_01, + [LOWPAN_IPHC_DAM_10] = LOWPAN_IPHC_SAM_10, + [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11, +}; + +static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr, + const unsigned char *lladdr, bool sam) { - u8 val = 0; + u8 dam = LOWPAN_IPHC_DAM_00; if (is_addr_mac_addr_based(ipaddr, lladdr)) { - val = 3; /* 0-bits */ + dam = LOWPAN_IPHC_DAM_11; /* 0-bits */ pr_debug("address compression 0 bits\n"); } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { /* compress IID to 16 bits xxxx::XXXX */ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2); - val = 2; /* 16-bits */ + dam = LOWPAN_IPHC_DAM_10; /* 16-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", *hc_ptr - 2, 2); } else { /* do not compress IID => xxxx::IID */ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8); - val = 1; /* 64-bits */ + dam = LOWPAN_IPHC_DAM_01; /* 64-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", *hc_ptr - 8, 8); } - return rol8(val, shift); + if (sam) + return lowpan_iphc_dam_to_sam_value[dam]; + else + return dam; } -int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) +/* lowpan_iphc_get_tc - get the ECN + DCSP fields in hc format */ +static inline u8 lowpan_iphc_get_tc(const struct ipv6hdr *hdr) { - u8 tmp, iphc0, iphc1, *hc_ptr; + u8 dscp, ecn; + + /* hdr->priority contains the higher bits of dscp, lower are part of + * flow_lbl[0]. Note ECN, DCSP is swapped in ipv6 hdr. + */ + dscp = (hdr->priority << 2) | ((hdr->flow_lbl[0] & 0xc0) >> 6); + /* ECN is at the two lower bits from first nibble of flow_lbl[0] */ + ecn = (hdr->flow_lbl[0] & 0x30); + /* for pretty debug output, also shift ecn to get the ecn value */ + pr_debug("ecn 0x%02x dscp 0x%02x\n", ecn >> 4, dscp); + /* ECN is at 0x30 now, shift it to have ECN + DCSP */ + return (ecn << 2) | dscp; +} + +/* lowpan_iphc_is_flow_lbl_zero - check if flow label is zero */ +static inline bool lowpan_iphc_is_flow_lbl_zero(const struct ipv6hdr *hdr) +{ + return ((!(hdr->flow_lbl[0] & 0x0f)) && + !hdr->flow_lbl[1] && !hdr->flow_lbl[2]); +} + +/* lowpan_iphc_tf_compress - compress the traffic class which is set by + * ipv6hdr. Return the corresponding format identifier which is used. + */ +static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr) +{ + /* get ecn dscp data in a byteformat as: ECN(hi) + DSCP(lo) */ + u8 tc = lowpan_iphc_get_tc(hdr), tf[4], val; + + /* printout the traffic class in hc format */ + pr_debug("tc 0x%02x\n", tc); + + if (lowpan_iphc_is_flow_lbl_zero(hdr)) { + if (!tc) { + /* 11: Traffic Class and Flow Label are elided. */ + val = LOWPAN_IPHC_TF_11; + } else { + /* 10: ECN + DSCP (1 byte), Flow Label is elided. + * + * 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+ + * |ECN| DSCP | + * +-+-+-+-+-+-+-+-+ + */ + lowpan_push_hc_data(hc_ptr, &tc, sizeof(tc)); + val = LOWPAN_IPHC_TF_10; + } + } else { + /* check if dscp is zero, it's after the first two bit */ + if (!(tc & 0x3f)) { + /* 01: ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + * + * 1 2 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |ECN|rsv| Flow Label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + memcpy(&tf[0], &hdr->flow_lbl[0], 3); + /* zero the highest 4-bits, contains DCSP + ECN */ + tf[0] &= ~0xf0; + /* set ECN */ + tf[0] |= (tc & 0xc0); + + lowpan_push_hc_data(hc_ptr, tf, 3); + val = LOWPAN_IPHC_TF_01; + } else { + /* 00: ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + * + * 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |ECN| DSCP | rsv | Flow Label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + memcpy(&tf[0], &tc, sizeof(tc)); + /* highest nibble of flow_lbl[0] is part of DSCP + ECN + * which will be the 4-bit pad and will be filled with + * zeros afterwards. + */ + memcpy(&tf[1], &hdr->flow_lbl[0], 3); + /* zero the 4-bit pad, which is reserved */ + tf[1] &= ~0xf0; + + lowpan_push_hc_data(hc_ptr, tf, 4); + val = LOWPAN_IPHC_TF_00; + } + } + + return val; +} + +static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr, + const struct in6_addr *ipaddr) +{ + u8 val; + + if (lowpan_is_mcast_addr_compressable8(ipaddr)) { + pr_debug("compressed to 1 octet\n"); + /* use last byte */ + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[15], 1); + val = LOWPAN_IPHC_DAM_11; + } else if (lowpan_is_mcast_addr_compressable32(ipaddr)) { + pr_debug("compressed to 4 octets\n"); + /* second byte + the last three */ + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1); + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[13], 3); + val = LOWPAN_IPHC_DAM_10; + } else if (lowpan_is_mcast_addr_compressable48(ipaddr)) { + pr_debug("compressed to 6 octets\n"); + /* second byte + the last five */ + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1); + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[11], 5); + val = LOWPAN_IPHC_DAM_01; + } else { + pr_debug("using full address\n"); + lowpan_push_hc_data(hc_ptr, ipaddr->s6_addr, 16); + val = LOWPAN_IPHC_DAM_00; + } + + return val; +} + +int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, + const void *daddr, const void *saddr) +{ + u8 iphc0, iphc1, *hc_ptr; struct ipv6hdr *hdr; - u8 head[100] = {}; + u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {}; int ret, addr_type; - if (type != ETH_P_IPV6) + if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; hdr = ipv6_hdr(skb); @@ -456,63 +771,26 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, /* TODO: context lookup */ - raw_dump_inline(__func__, "saddr", - (unsigned char *)_saddr, IEEE802154_ADDR_LEN); - raw_dump_inline(__func__, "daddr", - (unsigned char *)_daddr, IEEE802154_ADDR_LEN); + raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN); + raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN); raw_dump_table(__func__, "sending raw skb network uncompressed packet", skb->data, skb->len); - /* Traffic class, flow label - * If flow label is 0, compress it. If traffic class is 0, compress it - * We have to process both in the same time as the offset of traffic - * class depends on the presence of version and flow label - */ - - /* hc format of TC is ECN | DSCP , original one is DSCP | ECN */ - tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); - tmp = ((tmp & 0x03) << 6) | (tmp >> 2); - - if (((hdr->flow_lbl[0] & 0x0F) == 0) && - (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { - /* flow label can be compressed */ - iphc0 |= LOWPAN_IPHC_FL_C; - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress (elide) all */ - iphc0 |= LOWPAN_IPHC_TC_C; - } else { - /* compress only the flow label */ - *hc_ptr = tmp; - hc_ptr += 1; - } - } else { - /* Flow label cannot be compressed */ - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress only traffic class */ - iphc0 |= LOWPAN_IPHC_TC_C; - *hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); - memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2); - hc_ptr += 3; - } else { - /* compress nothing */ - memcpy(hc_ptr, hdr, 4); - /* replace the top byte with new ECN | DSCP format */ - *hc_ptr = tmp; - hc_ptr += 4; - } - } + /* Traffic Class, Flow Label compression */ + iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr); /* NOTE: payload length is always compressed */ /* Check if we provide the nhc format for nexthdr and compression * functionality. If not nexthdr is handled inline and not compressed. */ - ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0); - if (ret < 0) - return ret; + ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr); + if (ret == -ENOENT) + lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr, + sizeof(hdr->nexthdr)); + else + iphc0 |= LOWPAN_IPHC_NH; /* Hop limit * if 1: compress, encoding is 01 @@ -522,13 +800,13 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, */ switch (hdr->hop_limit) { case 1: - iphc0 |= LOWPAN_IPHC_TTL_1; + iphc0 |= LOWPAN_IPHC_HLIM_01; break; case 64: - iphc0 |= LOWPAN_IPHC_TTL_64; + iphc0 |= LOWPAN_IPHC_HLIM_10; break; case 255: - iphc0 |= LOWPAN_IPHC_TTL_255; + iphc0 |= LOWPAN_IPHC_HLIM_11; break; default: lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit, @@ -542,9 +820,8 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, iphc1 |= LOWPAN_IPHC_SAC; } else { if (addr_type & IPV6_ADDR_LINKLOCAL) { - iphc1 |= lowpan_compress_addr_64(&hc_ptr, - LOWPAN_IPHC_SAM_BIT, - &hdr->saddr, _saddr); + iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr, + saddr, true); pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n", &hdr->saddr, iphc1); } else { @@ -558,38 +835,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, if (addr_type & IPV6_ADDR_MULTICAST) { pr_debug("destination address is multicast: "); iphc1 |= LOWPAN_IPHC_M; - if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { - pr_debug("compressed to 1 octet\n"); - iphc1 |= LOWPAN_IPHC_DAM_11; - /* use last byte */ - lowpan_push_hc_data(&hc_ptr, - &hdr->daddr.s6_addr[15], 1); - } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { - pr_debug("compressed to 4 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_10; - /* second byte + the last three */ - lowpan_push_hc_data(&hc_ptr, - &hdr->daddr.s6_addr[1], 1); - lowpan_push_hc_data(&hc_ptr, - &hdr->daddr.s6_addr[13], 3); - } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { - pr_debug("compressed to 6 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_01; - /* second byte + the last five */ - lowpan_push_hc_data(&hc_ptr, - &hdr->daddr.s6_addr[1], 1); - lowpan_push_hc_data(&hc_ptr, - &hdr->daddr.s6_addr[11], 5); - } else { - pr_debug("using full address\n"); - iphc1 |= LOWPAN_IPHC_DAM_00; - lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16); - } + iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr); } else { if (addr_type & IPV6_ADDR_LINKLOCAL) { /* TODO: context lookup */ - iphc1 |= lowpan_compress_addr_64(&hc_ptr, - LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr); + iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr, + daddr, false); pr_debug("dest address unicast link-local %pI6c " "iphc1 0x%02x\n", &hdr->daddr, iphc1); } else { @@ -599,7 +850,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } /* next header compression */ - if (iphc0 & LOWPAN_IPHC_NH_C) { + if (iphc0 & LOWPAN_IPHC_NH) { ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr); if (ret < 0) return ret; diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c index fd20fc51a7c4..7008d53e455c 100644 --- a/net/6lowpan/nhc.c +++ b/net/6lowpan/nhc.c @@ -95,23 +95,20 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb) } int lowpan_nhc_check_compression(struct sk_buff *skb, - const struct ipv6hdr *hdr, u8 **hc_ptr, - u8 *iphc0) + const struct ipv6hdr *hdr, u8 **hc_ptr) { struct lowpan_nhc *nhc; + int ret = 0; spin_lock_bh(&lowpan_nhc_lock); nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; - if (nhc && nhc->compress) - *iphc0 |= LOWPAN_IPHC_NH_C; - else - lowpan_push_hc_data(hc_ptr, &hdr->nexthdr, - sizeof(hdr->nexthdr)); + if (!(nhc && nhc->compress)) + ret = -ENOENT; spin_unlock_bh(&lowpan_nhc_lock); - return 0; + return ret; } int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, @@ -157,7 +154,8 @@ out: return ret; } -int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, +int lowpan_nhc_do_uncompression(struct sk_buff *skb, + const struct net_device *dev, struct ipv6hdr *hdr) { struct lowpan_nhc *nhc; diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h index c249f17fa37b..803041400136 100644 --- a/net/6lowpan/nhc.h +++ b/net/6lowpan/nhc.h @@ -86,19 +86,16 @@ struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr); /** * lowpan_nhc_check_compression - checks if we support compression format. If - * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be - * set. If we don't support nexthdr will be added as inline data to the - * 6LoWPAN header. + * we support the nhc by nexthdr field, the function will return 0. If we + * don't support the nhc by nexthdr this function will return -ENOENT. * * @skb: skb of 6LoWPAN header to read nhc and replace header. * @hdr: ipv6hdr to check the nexthdr value * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of * replaced header. - * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit */ int lowpan_nhc_check_compression(struct sk_buff *skb, - const struct ipv6hdr *hdr, u8 **hc_ptr, - u8 *iphc0); + const struct ipv6hdr *hdr, u8 **hc_ptr); /** * lowpan_nhc_do_compression - calling compress callback for nhc @@ -119,7 +116,8 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, * @dev: netdevice for print logging information. * @hdr: ipv6hdr for setting nexthdr value. */ -int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, +int lowpan_nhc_do_uncompression(struct sk_buff *skb, + const struct net_device *dev, struct ipv6hdr *hdr); /** diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c index 72d0b57eb6e5..69537a2eaab1 100644 --- a/net/6lowpan/nhc_udp.c +++ b/net/6lowpan/nhc_udp.c @@ -17,7 +17,27 @@ #include "nhc.h" -#define LOWPAN_NHC_UDP_IDLEN 1 +#define LOWPAN_NHC_UDP_MASK 0xF8 +#define LOWPAN_NHC_UDP_ID 0xF0 +#define LOWPAN_NHC_UDP_IDLEN 1 + +#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0 +#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0 +#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000 +#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00 + +/* values for port compression, _with checksum_ ie bit 5 set to 0 */ + +/* all inline */ +#define LOWPAN_NHC_UDP_CS_P_00 0xF0 +/* source 16bit inline, dest = 0xF0 + 8 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_01 0xF1 +/* source = 0xF0 + 8bit inline, dest = 16 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_10 0xF2 +/* source & dest = 0xF0B + 4bit inline */ +#define LOWPAN_NHC_UDP_CS_P_11 0xF3 +/* checksum elided */ +#define LOWPAN_NHC_UDP_CS_C 0x04 static int udp_uncompress(struct sk_buff *skb, size_t needed) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 61bf2a06e85d..496b27588493 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -206,7 +206,10 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, return -ENOMEM; if (vlan_hw_filter_capable(dev, vid_info)) { - err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); + if (netif_device_present(dev)) + err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); + else + err = -ENODEV; if (err) { kfree(vid_info); return err; @@ -264,7 +267,10 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, int err; if (vlan_hw_filter_capable(dev, vid_info)) { - err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); + if (netif_device_present(dev)) + err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); + else + err = -ENODEV; if (err) { pr_warn("failed to kill vid %04x/%d for device %s\n", proto, vid, dev->name); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index db73b8a1433f..9e9cca3689a0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -21,8 +21,6 @@ #include <net/ip6_route.h> #include <net/addrconf.h> -#include <net/af_ieee802154.h> /* to get the address type */ - #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> @@ -265,14 +263,13 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) if (!skb_cp) return NET_RX_DROP; - return netif_rx(skb_cp); + return netif_rx_ni(skb_cp); } static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct l2cap_chan *chan) { const u8 *saddr, *daddr; - u8 iphc0, iphc1; struct lowpan_dev *dev; struct lowpan_peer *peer; @@ -287,22 +284,7 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, saddr = peer->eui64_addr; daddr = dev->netdev->dev_addr; - /* at least two bytes will be used for the encoding */ - if (skb->len < 2) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc0)) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc1)) - return -EINVAL; - - return lowpan_header_decompress(skb, netdev, - saddr, IEEE802154_ADDR_LONG, - EUI64_ADDR_LEN, daddr, - IEEE802154_ADDR_LONG, EUI64_ADDR_LEN, - iphc0, iphc1); - + return lowpan_header_decompress(skb, netdev, daddr, saddr); } static int recv_pkt(struct sk_buff *skb, struct net_device *dev, @@ -314,15 +296,17 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, if (!netif_running(dev)) goto drop; - if (dev->type != ARPHRD_6LOWPAN) + if (dev->type != ARPHRD_6LOWPAN || !skb->len) goto drop; + skb_reset_network_header(skb); + skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto drop; /* check that it's our buffer */ - if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + if (lowpan_is_ipv6(*skb_network_header(skb))) { /* Copy the packet so that the IPv6 header is * properly aligned. */ @@ -334,7 +318,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; - skb_reset_network_header(local_skb); skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { @@ -347,38 +330,34 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, consume_skb(local_skb); consume_skb(skb); - } else { - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - local_skb = skb_clone(skb, GFP_ATOMIC); - if (!local_skb) - goto drop; + } else if (lowpan_is_iphc(*skb_network_header(skb))) { + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; - ret = iphc_decompress(local_skb, dev, chan); - if (ret < 0) { - kfree_skb(local_skb); - goto drop; - } + ret = iphc_decompress(local_skb, dev, chan); + if (ret < 0) { + kfree_skb(local_skb); + goto drop; + } - local_skb->protocol = htons(ETH_P_IPV6); - local_skb->pkt_type = PACKET_HOST; - local_skb->dev = dev; + local_skb->protocol = htons(ETH_P_IPV6); + local_skb->pkt_type = PACKET_HOST; + local_skb->dev = dev; - if (give_skb_to_upper(local_skb, dev) - != NET_RX_SUCCESS) { - kfree_skb(local_skb); - goto drop; - } + if (give_skb_to_upper(local_skb, dev) + != NET_RX_SUCCESS) { + kfree_skb(local_skb); + goto drop; + } - dev->stats.rx_bytes += skb->len; - dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; - consume_skb(local_skb); - consume_skb(skb); - break; - default: - break; - } + consume_skb(local_skb); + consume_skb(skb); + } else { + goto drop; } return NET_RX_SUCCESS; @@ -492,8 +471,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev, status = 1; } - lowpan_header_compress(skb, netdev, ETH_P_IPV6, daddr, - dev->netdev->dev_addr, skb->len); + lowpan_header_compress(skb, netdev, daddr, dev->netdev->dev_addr); err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0); if (err < 0) @@ -1135,7 +1113,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, return -ENOENT; hci_dev_lock(hdev); - hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); if (!hcon) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 70f9d945faf7..a3bffd1ec2b4 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -33,7 +33,7 @@ #include "selftest.h" -#define VERSION "2.20" +#define VERSION "2.21" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, BT_DBG("sock %p sk %p len %zu", sock, sk, len); - if (flags & (MSG_OOB)) + if (flags & MSG_OOB) return -EOPNOTSUPP; skb = skb_recv_datagram(sk, flags, noblock, &err); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2dda439c8cb8..85b82f7adbd2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -59,15 +59,11 @@ static const struct sco_param esco_param_msbc[] = { { EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */ }; -static void hci_le_create_connection_cancel(struct hci_conn *conn) -{ - hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); -} - /* This function requires the caller holds hdev->lock */ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) { struct hci_conn_params *params; + struct hci_dev *hdev = conn->hdev; struct smp_irk *irk; bdaddr_t *bdaddr; u8 bdaddr_type; @@ -76,14 +72,15 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) bdaddr_type = conn->dst_type; /* Check if we need to convert to identity address */ - irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type); + irk = hci_get_irk(hdev, bdaddr, bdaddr_type); if (irk) { bdaddr = &irk->bdaddr; bdaddr_type = irk->addr_type; } - params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type); - if (!params) + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr, + bdaddr_type); + if (!params || !params->explicit_connect) return; /* The connection attempt was doing scan for new RPA, and is @@ -97,21 +94,21 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) switch (params->auto_connect) { case HCI_AUTO_CONN_EXPLICIT: - hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); + hci_conn_params_del(hdev, bdaddr, bdaddr_type); /* return instead of break to avoid duplicate scan update */ return; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: - list_add(¶ms->action, &conn->hdev->pend_le_conns); + list_add(¶ms->action, &hdev->pend_le_conns); break; case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &conn->hdev->pend_le_reports); + list_add(¶ms->action, &hdev->pend_le_reports); break; default: break; } - hci_update_background_scan(conn->hdev); + hci_update_background_scan(hdev); } static void hci_conn_cleanup(struct hci_conn *conn) @@ -137,18 +134,51 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_conn_put(conn); } -/* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_remove(struct hci_conn *conn) +static void le_scan_cleanup(struct work_struct *work) { - hci_connect_le_scan_cleanup(conn); + struct hci_conn *conn = container_of(work, struct hci_conn, + le_scan_cleanup); + struct hci_dev *hdev = conn->hdev; + struct hci_conn *c = NULL; - /* We can't call hci_conn_del here since that would deadlock - * with trying to call cancel_delayed_work_sync(&conn->disc_work). - * Instead, call just hci_conn_cleanup() which contains the bare - * minimum cleanup operations needed for a connection in this - * state. + BT_DBG("%s hcon %p", hdev->name, conn); + + hci_dev_lock(hdev); + + /* Check that the hci_conn is still around */ + rcu_read_lock(); + list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { + if (c == conn) + break; + } + rcu_read_unlock(); + + if (c == conn) { + hci_connect_le_scan_cleanup(conn); + hci_conn_cleanup(conn); + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + hci_conn_put(conn); +} + +static void hci_connect_le_scan_remove(struct hci_conn *conn) +{ + BT_DBG("%s hcon %p", conn->hdev->name, conn); + + /* We can't call hci_conn_del/hci_conn_cleanup here since that + * could deadlock with another hci_conn_del() call that's holding + * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). + * Instead, grab temporary extra references to the hci_dev and + * hci_conn and perform the necessary cleanup in a separate work + * callback. */ - hci_conn_cleanup(conn); + + hci_dev_hold(conn->hdev); + hci_conn_get(conn); + + schedule_work(&conn->le_scan_cleanup); } static void hci_acl_create_connection(struct hci_conn *conn) @@ -194,33 +224,8 @@ static void hci_acl_create_connection(struct hci_conn *conn) hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); } -static void hci_acl_create_connection_cancel(struct hci_conn *conn) -{ - struct hci_cp_create_conn_cancel cp; - - BT_DBG("hcon %p", conn); - - if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) - return; - - bacpy(&cp.bdaddr, &conn->dst); - hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); -} - -static void hci_reject_sco(struct hci_conn *conn) -{ - struct hci_cp_reject_sync_conn_req cp; - - cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; - bacpy(&cp.bdaddr, &conn->dst); - - hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp); -} - int hci_disconnect(struct hci_conn *conn, __u8 reason) { - struct hci_cp_disconnect cp; - BT_DBG("hcon %p", conn); /* When we are master of an established connection and it enters @@ -228,7 +233,8 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason) * current clock offset. Processing of the result is done * within the event handling and hci_clock_offset_evt function. */ - if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) { + if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER && + (conn->state == BT_CONNECTED || conn->state == BT_CONFIG)) { struct hci_dev *hdev = conn->hdev; struct hci_cp_read_clock_offset clkoff_cp; @@ -237,25 +243,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason) &clkoff_cp); } - conn->state = BT_DISCONN; - - cp.handle = cpu_to_le16(conn->handle); - cp.reason = reason; - return hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); -} - -static void hci_amp_disconn(struct hci_conn *conn) -{ - struct hci_cp_disconn_phy_link cp; - - BT_DBG("hcon %p", conn); - - conn->state = BT_DISCONN; - - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = hci_proto_disconn_ind(conn); - hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp); + return hci_abort_conn(conn, reason); } static void hci_add_sco(struct hci_conn *conn, __u16 handle) @@ -421,35 +409,14 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; - switch (conn->state) { - case BT_CONNECT: - case BT_CONNECT2: - if (conn->out) { - if (conn->type == ACL_LINK) - hci_acl_create_connection_cancel(conn); - else if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - hci_connect_le_scan_remove(conn); - else - hci_le_create_connection_cancel(conn); - } - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - hci_reject_sco(conn); - } - break; - case BT_CONFIG: - case BT_CONNECTED: - if (conn->type == AMP_LINK) { - hci_amp_disconn(conn); - } else { - __u8 reason = hci_proto_disconn_ind(conn); - hci_disconnect(conn, reason); - } - break; - default: - conn->state = BT_CLOSED; - break; + /* LE connections in scanning state need special handling */ + if (conn->state == BT_CONNECT && conn->type == LE_LINK && + test_bit(HCI_CONN_SCANNING, &conn->flags)) { + hci_connect_le_scan_remove(conn); + return; } + + hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } /* Enter sniff mode */ @@ -517,7 +484,7 @@ static void le_conn_timeout(struct work_struct *work) return; } - hci_le_create_connection_cancel(conn); + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -580,6 +547,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); + INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -835,7 +803,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * attempt, we simply update pending_sec_level and auth_type fields * and return the object found. */ - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); conn_unfinished = NULL; if (conn) { if (conn->state == BT_CONNECT && @@ -985,13 +953,10 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) { struct hci_conn *conn; - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + conn = hci_conn_hash_lookup_le(hdev, addr, type); if (!conn) return false; - if (conn->dst_type != type) - return false; - if (conn->state != BT_CONNECTED) return false; @@ -1064,7 +1029,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, * attempt, we simply update pending_sec_level and auth_type fields * and return the object found. */ - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); if (conn) { if (conn->pending_sec_level < sec_level) conn->pending_sec_level = sec_level; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e4e53bd663df..83a6aacfab31 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -65,13 +65,6 @@ static DEFINE_IDA(hci_index_ida); #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) -/* ---- HCI notifications ---- */ - -static void hci_notify(struct hci_dev *hdev, int event) -{ - hci_sock_dev_event(hdev, event); -} - /* ---- HCI debugfs entries ---- */ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, @@ -162,6 +155,16 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, if (strtobool(buf, &enable)) return -EINVAL; + /* When the diagnostic flags are not persistent and the transport + * is not active, then there is no need for the vendor callback. + * + * Instead just store the desired value. If needed the setting + * will be programmed when the controller gets powered on. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + !test_bit(HCI_RUNNING, &hdev->flags)) + goto done; + hci_req_lock(hdev); err = hdev->set_diag(hdev, enable); hci_req_unlock(hdev); @@ -169,6 +172,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, if (err < 0) return err; +done: if (enable) hci_dev_set_flag(hdev, HCI_VENDOR_DIAG); else @@ -1444,12 +1448,14 @@ static int hci_dev_do_open(struct hci_dev *hdev) } set_bit(HCI_RUNNING, &hdev->flags); - hci_notify(hdev, HCI_DEV_OPEN); + hci_sock_dev_event(hdev, HCI_DEV_OPEN); atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); if (hci_dev_test_flag(hdev, HCI_SETUP)) { + hci_sock_dev_event(hdev, HCI_DEV_SETUP); + if (hdev->setup) ret = hdev->setup(hdev); @@ -1490,17 +1496,28 @@ static int hci_dev_do_open(struct hci_dev *hdev) if (!ret) { if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { ret = __hci_init(hdev); + if (!ret && hdev->post_init) + ret = hdev->post_init(hdev); + } } + /* If the HCI Reset command is clearing all diagnostic settings, + * then they need to be reprogrammed after the init procedure + * completed. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) + ret = hdev->set_diag(hdev, true); + clear_bit(HCI_INIT, &hdev->flags); if (!ret) { hci_dev_hold(hdev); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); set_bit(HCI_UP, &hdev->flags); - hci_notify(hdev, HCI_DEV_UP); + hci_sock_dev_event(hdev, HCI_DEV_UP); if (!hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && @@ -1528,7 +1545,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) } clear_bit(HCI_RUNNING, &hdev->flags); - hci_notify(hdev, HCI_DEV_CLOSE); + hci_sock_dev_event(hdev, HCI_DEV_CLOSE); hdev->close(hdev); hdev->flags &= BIT(HCI_RAW); @@ -1684,7 +1701,7 @@ int hci_dev_do_close(struct hci_dev *hdev) smp_unregister(hdev); - hci_notify(hdev, HCI_DEV_DOWN); + hci_sock_dev_event(hdev, HCI_DEV_DOWN); if (hdev->flush) hdev->flush(hdev); @@ -1715,7 +1732,7 @@ int hci_dev_do_close(struct hci_dev *hdev) } clear_bit(HCI_RUNNING, &hdev->flags); - hci_notify(hdev, HCI_DEV_CLOSE); + hci_sock_dev_event(hdev, HCI_DEV_CLOSE); /* After this point our queues are empty * and no tasks are scheduled. */ @@ -2917,23 +2934,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, } /* This function requires the caller holds hdev->lock */ -struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, - bdaddr_t *addr, - u8 addr_type) -{ - struct hci_conn_params *param; - - list_for_each_entry(param, &hdev->pend_le_conns, action) { - if (bacmp(¶m->addr, addr) == 0 && - param->addr_type == addr_type && - param->explicit_connect) - return param; - } - - return NULL; -} - -/* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { @@ -3407,7 +3407,7 @@ int hci_register_dev(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); - hci_notify(hdev, HCI_DEV_REG); + hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); queue_work(hdev->req_workqueue, &hdev->power_on); @@ -3455,7 +3455,7 @@ void hci_unregister_dev(struct hci_dev *hdev) * pending list */ BUG_ON(!list_empty(&hdev->mgmt_pending)); - hci_notify(hdev, HCI_DEV_UNREG); + hci_sock_dev_event(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { rfkill_unregister(hdev->rfkill); @@ -3492,7 +3492,7 @@ EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { - hci_notify(hdev, HCI_DEV_SUSPEND); + hci_sock_dev_event(hdev, HCI_DEV_SUSPEND); return 0; } EXPORT_SYMBOL(hci_suspend_dev); @@ -3500,7 +3500,7 @@ EXPORT_SYMBOL(hci_suspend_dev); /* Resume HCI device */ int hci_resume_dev(struct hci_dev *hdev) { - hci_notify(hdev, HCI_DEV_RESUME); + hci_sock_dev_event(hdev, HCI_DEV_RESUME); return 0; } EXPORT_SYMBOL(hci_resume_dev); @@ -3555,14 +3555,15 @@ EXPORT_SYMBOL(hci_recv_frame); /* Receive diagnostic message from HCI drivers */ int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb) { + /* Mark as diagnostic packet */ + bt_cb(skb)->pkt_type = HCI_DIAG_PKT; + /* Time stamp */ __net_timestamp(skb); - /* Mark as diagnostic packet and send to monitor */ - bt_cb(skb)->pkt_type = HCI_DIAG_PKT; - hci_send_to_monitor(hdev, skb); + skb_queue_tail(&hdev->rx_q, skb); + queue_work(hdev->workqueue, &hdev->rx_work); - kfree_skb(skb); return 0; } EXPORT_SYMBOL(hci_recv_diag); @@ -3642,7 +3643,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->req.start = true; + bt_cb(skb)->hci.req_start = true; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -4339,7 +4340,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev) if (!skb) return true; - return bt_cb(skb)->req.start; + return bt_cb(skb)->hci.req_start; } static void hci_resend_last(struct hci_dev *hdev) @@ -4399,26 +4400,26 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->req.complete) { - *req_complete = bt_cb(hdev->sent_cmd)->req.complete; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } - if (bt_cb(hdev->sent_cmd)->req.complete_skb) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { - if (bt_cb(skb)->req.start) { + if (bt_cb(skb)->hci.req_start) { __skb_queue_head(&hdev->cmd_q, skb); break; } - *req_complete = bt_cb(skb)->req.complete; - *req_complete_skb = bt_cb(skb)->req.complete_skb; + *req_complete = bt_cb(skb)->hci.req_complete; + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; kfree_skb(skb); } spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b4571d84cafa..d57c11c1c6b5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1915,7 +1915,8 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + conn = hci_conn_hash_lookup_le(hdev, &cp->peer_addr, + cp->peer_addr_type); if (!conn) goto unlock; @@ -3137,7 +3138,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, * complete event). */ if (ev->status || - (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) + (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->hci.req_event)) hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); @@ -5208,7 +5209,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) u8 status = 0, event = hdr->evt, req_evt = 0; u16 opcode = HCI_OP_NOP; - if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; opcode = __le16_to_cpu(cmd_hdr->opcode); hci_req_cmd_complete(hdev, opcode, status, &req_complete, diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b7369220c9ef..981f8a202c27 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -56,8 +56,8 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete, return -ENODATA; skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->req.complete = complete; - bt_cb(skb)->req.complete_skb = complete_skb; + bt_cb(skb)->hci.req_complete = complete; + bt_cb(skb)->hci.req_complete_skb = complete_skb; spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -99,7 +99,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, BT_DBG("skb len %d", skb->len); bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->opcode = opcode; + bt_cb(skb)->hci.opcode = opcode; return skb; } @@ -128,9 +128,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, } if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->req.start = true; + bt_cb(skb)->hci.req_start = true; - bt_cb(skb)->req.event = event; + bt_cb(skb)->hci.req_event = event; skb_queue_tail(&req->cmd_q, skb); } @@ -564,3 +564,96 @@ void hci_update_background_scan(struct hci_dev *hdev) if (err && err != -ENODATA) BT_ERR("Failed to run HCI request: err %d", err); } + +void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, + u8 reason) +{ + switch (conn->state) { + case BT_CONNECTED: + case BT_CONFIG: + if (conn->type == AMP_LINK) { + struct hci_cp_disconn_phy_link cp; + + cp.phy_handle = HCI_PHY_HANDLE(conn->handle); + cp.reason = reason; + hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp), + &cp); + } else { + struct hci_cp_disconnect dc; + + dc.handle = cpu_to_le16(conn->handle); + dc.reason = reason; + hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc); + } + + conn->state = BT_DISCONN; + + break; + case BT_CONNECT: + if (conn->type == LE_LINK) { + if (test_bit(HCI_CONN_SCANNING, &conn->flags)) + break; + hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL, + 0, NULL); + } else if (conn->type == ACL_LINK) { + if (req->hdev->hci_ver < BLUETOOTH_VER_1_2) + break; + hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL, + 6, &conn->dst); + } + break; + case BT_CONNECT2: + if (conn->type == ACL_LINK) { + struct hci_cp_reject_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + rej.reason = reason; + + hci_req_add(req, HCI_OP_REJECT_CONN_REQ, + sizeof(rej), &rej); + } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { + struct hci_cp_reject_sync_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + + /* SCO rejection has its own limited set of + * allowed error values (0x0D-0x0F) which isn't + * compatible with most values passed to this + * function. To be safe hard-code one of the + * values that's suitable for SCO. + */ + rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES; + + hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, + sizeof(rej), &rej); + } + break; + default: + conn->state = BT_CLOSED; + break; + } +} + +static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + if (status) + BT_DBG("Failed to abort connection: status 0x%2.2x", status); +} + +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + struct hci_request req; + int err; + + hci_req_init(&req, conn->hdev); + + __hci_abort_conn(&req, conn, reason); + + err = hci_req_run(&req, abort_conn_complete); + if (err && err != -ENODATA) { + BT_ERR("Failed to run HCI request: err %d", err); + return err; + } + + return 0; +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index bf6df92f42db..25c7f1305dcb 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,3 +55,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, void hci_update_background_scan(struct hci_dev *hdev); void __hci_update_background_scan(struct hci_request *req); + +int hci_abort_conn(struct hci_conn *conn, u8 reason); +void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, + u8 reason); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9a100c1fd7b5..b1eb8c09a660 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -120,10 +120,7 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) /* Apply filter */ flt = &hci_pi(sk)->filter; - if (bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) - flt_type = 0; - else - flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS; + flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS; if (!test_bit(flt_type, &flt->type_mask)) return true; @@ -173,6 +170,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { + if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_EVENT_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) + continue; if (is_filtered_packet(sk, skb)) continue; } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { @@ -333,6 +335,12 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) opcode = cpu_to_le16(HCI_MON_DEL_INDEX); break; + case HCI_DEV_SETUP: + if (hdev->manufacturer == 0xffff) + return NULL; + + /* fall through */ + case HCI_DEV_UP: skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC); if (!skb) @@ -401,15 +409,17 @@ static void send_monitor_replay(struct sock *sk) if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); - if (!test_bit(HCI_UP, &hdev->flags)) - continue; - - skb = create_monitor_event(hdev, HCI_DEV_UP); - if (!skb) - continue; + if (test_bit(HCI_UP, &hdev->flags)) + skb = create_monitor_event(hdev, HCI_DEV_UP); + else if (hci_dev_test_flag(hdev, HCI_SETUP)) + skb = create_monitor_event(hdev, HCI_DEV_SETUP); + else + skb = NULL; - if (sock_queue_rcv_skb(sk, skb)) - kfree_skb(skb); + if (skb) { + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); + } } read_unlock(&hci_dev_list_lock); @@ -991,7 +1001,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, BT_DBG("sock %p, sk %p", sock, sk); - if (flags & (MSG_OOB)) + if (flags & MSG_OOB) return -EOPNOTSUPP; if (sk->sk_state == BT_CLOSED) @@ -1239,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->req.start = true; + bt_cb(skb)->hci.req_start = true; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -1250,6 +1260,12 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } + if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index f1a117f8cad2..0bec4588c3c8 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; + /* The HIDP user-space API only contains calls to add and remove + * devices. There is no way to forward events of any kind. Therefore, + * we have to forcefully disconnect a device on idle-timeouts. This is + * unfortunate and weird API design, but it is spec-compliant and + * required for backwards-compatibility. Hence, on idle-timeout, we + * signal driver-detach events, so poll() will be woken up with an + * error-condition on both sockets. + */ + + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + wake_up_interruptible(sk_sleep(session->intr_sock->sk)); + wake_up_interruptible(sk_sleep(session->ctrl_sock->sk)); + hidp_session_terminate(session); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 586b3d580cfc..1bb551527044 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1111,53 +1111,76 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) if (!sk) return 0; + lock_sock(sk); + + if (sk->sk_shutdown) + goto shutdown_already; + + BT_DBG("Handling sock shutdown"); + /* prevent sk structure from being freed whilst unlocked */ sock_hold(sk); chan = l2cap_pi(sk)->chan; /* prevent chan structure from being freed whilst unlocked */ l2cap_chan_hold(chan); - conn = chan->conn; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (chan->mode == L2CAP_MODE_ERTM && + chan->unacked_frames > 0 && + chan->state == BT_CONNECTED) { + err = __l2cap_wait_ack(sk, chan); + + /* After waiting for ACKs, check whether shutdown + * has already been actioned to close the L2CAP + * link such as by l2cap_disconnection_req(). + */ + if (sk->sk_shutdown) + goto has_shutdown; + } + + sk->sk_shutdown = SHUTDOWN_MASK; + release_sock(sk); + + l2cap_chan_lock(chan); + conn = chan->conn; + if (conn) + /* prevent conn structure from being freed */ + l2cap_conn_get(conn); + l2cap_chan_unlock(chan); + if (conn) + /* mutex lock must be taken before l2cap_chan_lock() */ mutex_lock(&conn->chan_lock); l2cap_chan_lock(chan); - lock_sock(sk); + l2cap_chan_close(chan, 0); + l2cap_chan_unlock(chan); - if (!sk->sk_shutdown) { - if (chan->mode == L2CAP_MODE_ERTM && - chan->unacked_frames > 0 && - chan->state == BT_CONNECTED) - err = __l2cap_wait_ack(sk, chan); + if (conn) { + mutex_unlock(&conn->chan_lock); + l2cap_conn_put(conn); + } - sk->sk_shutdown = SHUTDOWN_MASK; + lock_sock(sk); - release_sock(sk); - l2cap_chan_close(chan, 0); - lock_sock(sk); + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && - !(current->flags & PF_EXITING)) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); - } +has_shutdown: + l2cap_chan_put(chan); + sock_put(sk); +shutdown_already: if (!err && sk->sk_err) err = -sk->sk_err; release_sock(sk); - l2cap_chan_unlock(chan); - - if (conn) - mutex_unlock(&conn->chan_lock); - - l2cap_chan_put(chan); - sock_put(sk); - BT_DBG("err: %d", err); + BT_DBG("Sock shutdown complete err: %d", err); return err; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c4fe2fee753f..7f22119276f3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -268,6 +268,14 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, HCI_SOCK_TRUSTED, skip_sk); } +static u8 le_addr_type(u8 mgmt_addr_type) +{ + if (mgmt_addr_type == BDADDR_LE_PUBLIC) + return ADDR_LE_DEV_PUBLIC; + else + return ADDR_LE_DEV_RANDOM; +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -1631,35 +1639,8 @@ static int clean_up_hci_state(struct hci_dev *hdev) discov_stopped = hci_stop_discovery(&req); list_for_each_entry(conn, &hdev->conn_hash.list, list) { - struct hci_cp_disconnect dc; - struct hci_cp_reject_conn_req rej; - - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - dc.handle = cpu_to_le16(conn->handle); - dc.reason = 0x15; /* Terminated due to Power Off */ - hci_req_add(&req, HCI_OP_DISCONNECT, sizeof(dc), &dc); - break; - case BT_CONNECT: - if (conn->type == LE_LINK) - hci_req_add(&req, HCI_OP_LE_CREATE_CONN_CANCEL, - 0, NULL); - else if (conn->type == ACL_LINK) - hci_req_add(&req, HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - break; - case BT_CONNECT2: - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = 0x15; /* Terminated due to Power Off */ - if (conn->type == ACL_LINK) - hci_req_add(&req, HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - else if (conn->type == SCO_LINK) - hci_req_add(&req, HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); - break; - } + /* 0x15 == Terminated due to Power Off */ + __hci_abort_conn(&req, conn, 0x15); } err = hci_req_run(&req, clean_up_hci_complete); @@ -3044,9 +3025,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_unpair_device *cp = data; struct mgmt_rp_unpair_device rp; - struct hci_cp_disconnect dc; + struct hci_conn_params *params; struct mgmt_pending_cmd *cmd; struct hci_conn *conn; + u8 addr_type; int err; memset(&rp, 0, sizeof(rp)); @@ -3087,36 +3069,23 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = NULL; err = hci_remove_link_key(hdev, &cp->addr.bdaddr); - } else { - u8 addr_type; - - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, - &cp->addr.bdaddr); - if (conn) { - /* Defer clearing up the connection parameters - * until closing to give a chance of keeping - * them if a repairing happens. - */ - set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); - - /* If disconnection is not requested, then - * clear the connection variable so that the - * link is not terminated. - */ - if (!cp->disconnect) - conn = NULL; + if (err < 0) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_NOT_PAIRED, &rp, + sizeof(rp)); + goto unlock; } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; + goto done; + } - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); + /* LE address type */ + addr_type = le_addr_type(cp->addr.type); - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); - } + hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); + err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -3124,6 +3093,36 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, addr_type); + if (!conn) { + hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type); + goto done; + } + + /* Abort any ongoing SMP pairing */ + smp_cancel_pairing(conn); + + /* Defer clearing up the connection parameters until closing to + * give a chance of keeping them if a repairing happens. + */ + set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); + + /* Disable auto-connection parameters if present */ + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); + if (params) { + if (params->explicit_connect) + params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + else + params->auto_connect = HCI_AUTO_CONN_DISABLED; + } + + /* If disconnection is not requested, then clear the connection + * variable so that the link is not terminated. + */ + if (!cp->disconnect) + conn = NULL; + +done: /* If the connection variable is set, then termination of the * link is requested. */ @@ -3143,9 +3142,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = addr_cmd_complete; - dc.handle = cpu_to_le16(conn->handle); - dc.reason = 0x13; /* Remote User Terminated Connection */ - err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); + err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); if (err < 0) mgmt_pending_remove(cmd); @@ -3193,7 +3190,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); + conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, @@ -3544,16 +3542,9 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, auth_type); } else { - u8 addr_type; + u8 addr_type = le_addr_type(cp->addr.type); struct hci_conn_params *p; - /* Convert from L2CAP channel address type to HCI address type - */ - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; - /* When pairing a new device, it is expected to remember * this device for future connections. Adding the connection * parameter information ahead of time allows tracking @@ -3697,7 +3688,8 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, if (addr->type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr); else - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr); + conn = hci_conn_hash_lookup_le(hdev, &addr->bdaddr, + le_addr_type(addr->type)); if (!conn) { err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, @@ -5600,14 +5592,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; - u8 addr_type; - - if (irk->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; - hci_add_irk(hdev, &irk->addr.bdaddr, addr_type, irk->val, + hci_add_irk(hdev, &irk->addr.bdaddr, + le_addr_type(irk->addr.type), irk->val, BDADDR_ANY); } @@ -5687,12 +5674,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; - u8 type, addr_type, authenticated; - - if (key->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; + u8 type, authenticated; switch (key->type) { case MGMT_LTK_UNAUTHENTICATED: @@ -5718,9 +5700,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } - hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type, - authenticated, key->val, key->enc_size, key->ediv, - key->rand); + hci_add_ltk(hdev, &key->addr.bdaddr, + le_addr_type(key->addr.type), type, authenticated, + key->val, key->enc_size, key->ediv, key->rand); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, @@ -6232,10 +6214,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, goto added; } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; + addr_type = le_addr_type(cp->addr.type); if (cp->action == 0x02) auto_conn = HCI_AUTO_CONN_ALWAYS; @@ -6364,10 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto complete; } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; + addr_type = le_addr_type(cp->addr.type); /* Kernel internally uses conn_params with resolvable private * address, but Remove Device allows only identity addresses. @@ -7873,27 +7849,13 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL); } -void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk) +void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) { struct mgmt_ev_new_irk ev; memset(&ev, 0, sizeof(ev)); - /* For identity resolving keys from devices that are already - * using a public address or static random address, do not - * ask for storing this key. The identity resolving key really - * is only mandatory for devices using resolvable random - * addresses. - * - * Storing all identity resolving keys has the downside that - * they will be also loaded on next boot of they system. More - * identity resolving keys, means more time during scanning is - * needed to actually resolve these addresses. - */ - if (bacmp(&irk->rpa, BDADDR_ANY)) - ev.store_hint = 0x01; - else - ev.store_hint = 0x00; + ev.store_hint = persistent; bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f315c8d0e43b..fe129663bd3f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -74,7 +74,7 @@ struct sco_pinfo { static void sco_sock_timeout(unsigned long arg) { - struct sock *sk = (struct sock *) arg; + struct sock *sk = (struct sock *)arg; BT_DBG("sock %p state %d", sk, sk->sk_state); @@ -170,18 +170,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err) sco_conn_unlock(conn); if (sk) { + sock_hold(sk); bh_lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); sco_sock_kill(sk); + sock_put(sk); } hcon->sco_data = NULL; kfree(conn); } -static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent) +static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, + struct sock *parent) { BT_DBG("conn %p", conn); @@ -414,8 +417,10 @@ static void __sco_sock_close(struct sock *sk) if (sco_pi(sk)->conn->hcon) { sk->sk_state = BT_DISCONN; sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); + sco_conn_lock(sco_pi(sk)->conn); hci_conn_drop(sco_pi(sk)->conn->hcon); sco_pi(sk)->conn->hcon = NULL; + sco_conn_unlock(sco_pi(sk)->conn); } else sco_chan_del(sk, ECONNRESET); break; @@ -459,7 +464,8 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, + int proto, gfp_t prio, int kern) { struct sock *sk; @@ -508,7 +514,8 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, return 0; } -static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, + int addr_len) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; @@ -615,7 +622,8 @@ done: return err; } -static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags) +static int sco_sock_accept(struct socket *sock, struct socket *newsock, + int flags) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; @@ -669,7 +677,8 @@ done: return err; } -static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, + int *len, int peer) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; @@ -779,7 +788,8 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, return bt_sock_recvmsg(sock, msg, len, flags); } -static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +static int sco_sock_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; int len, err = 0; @@ -819,7 +829,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char voice.setting = sco_pi(sk)->setting; len = min_t(unsigned int, sizeof(voice), optlen); - if (copy_from_user((char *) &voice, optval, len)) { + if (copy_from_user((char *)&voice, optval, len)) { err = -EFAULT; break; } @@ -843,7 +853,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char return err; } -static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) +static int sco_sock_getsockopt_old(struct socket *sock, int optname, + char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct sco_options opts; @@ -903,7 +914,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user return err; } -static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +static int sco_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; int len, err = 0; @@ -928,7 +940,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char } if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags), - (u32 __user *) optval)) + (u32 __user *)optval)) err = -EFAULT; break; @@ -961,7 +973,9 @@ static int sco_sock_shutdown(struct socket *sock, int how) if (!sk) return 0; + sock_hold(sk); lock_sock(sk); + if (!sk->sk_shutdown) { sk->sk_shutdown = SHUTDOWN_MASK; sco_sock_clear_timer(sk); @@ -972,7 +986,10 @@ static int sco_sock_shutdown(struct socket *sock, int how) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } + release_sock(sk); + sock_put(sk); + return err; } @@ -1016,6 +1033,11 @@ static void sco_conn_ready(struct sco_conn *conn) } else { sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } + parent = sco_get_sock_listen(&conn->hcon->src); if (!parent) { sco_conn_unlock(conn); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 25644e1bc479..c91353841e40 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -811,7 +811,6 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason) smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), &reason); - clear_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags); mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE); if (chan->data) @@ -1046,8 +1045,24 @@ static void smp_notify_keys(struct l2cap_conn *conn) struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1]; bool persistent; + if (hcon->type == ACL_LINK) { + if (hcon->key_type == HCI_LK_DEBUG_COMBINATION) + persistent = false; + else + persistent = !test_bit(HCI_CONN_FLUSH_KEY, + &hcon->flags); + } else { + /* The LTKs, IRKs and CSRKs should be persistent only if + * both sides had the bonding bit set in their + * authentication requests. + */ + persistent = !!((req->auth_req & rsp->auth_req) & + SMP_AUTH_BONDING); + } + if (smp->remote_irk) { - mgmt_new_irk(hdev, smp->remote_irk); + mgmt_new_irk(hdev, smp->remote_irk, persistent); + /* Now that user space can be considered to know the * identity address track the connection based on it * from now on (assuming this is an LE link). @@ -1075,21 +1090,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) } } - if (hcon->type == ACL_LINK) { - if (hcon->key_type == HCI_LK_DEBUG_COMBINATION) - persistent = false; - else - persistent = !test_bit(HCI_CONN_FLUSH_KEY, - &hcon->flags); - } else { - /* The LTKs and CSRKs should be persistent only if both sides - * had the bonding bit set in their authentication requests. - */ - persistent = !!((req->auth_req & rsp->auth_req) & - SMP_AUTH_BONDING); - } - - if (smp->csrk) { smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); @@ -2380,6 +2380,32 @@ unlock: return ret; } +void smp_cancel_pairing(struct hci_conn *hcon) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + struct l2cap_chan *chan; + struct smp_chan *smp; + + if (!conn) + return; + + chan = conn->smp; + if (!chan) + return; + + l2cap_chan_lock(chan); + + smp = chan->data; + if (smp) { + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) + smp_failure(conn, 0); + else + smp_failure(conn, SMP_UNSPECIFIED); + } + + l2cap_chan_unlock(chan); +} + static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_encrypt_info *rp = (void *) skb->data; diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 6cf872563ea7..ffcc70b6b199 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -180,6 +180,7 @@ enum smp_key_pref { }; /* SMP Commands */ +void smp_cancel_pairing(struct hci_conn *hcon); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c88bd8e8937e..a642bb829d09 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -495,7 +495,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, - __u16 vid) + __u16 vid, + unsigned char is_local, + unsigned char is_static) { struct net_bridge_fdb_entry *fdb; @@ -504,8 +506,8 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; fdb->vlan_id = vid; - fdb->is_local = 0; - fdb->is_static = 0; + fdb->is_local = is_local; + fdb->is_static = is_static; fdb->added_by_user = 0; fdb->added_by_external_learn = 0; fdb->updated = fdb->used = jiffies; @@ -536,11 +538,10 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 1, 1); if (!fdb) return -ENOMEM; - fdb->is_local = fdb->is_static = 1; fdb_add_hw_addr(br, addr); fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; @@ -597,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } else { spin_lock(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; @@ -774,7 +775,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 0, 0); if (!fdb) return -ENOMEM; @@ -1099,7 +1100,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, head = &br->hash[br_mac_hash(addr, vid)]; fdb = fdb_find(head, addr, vid); if (!fdb) { - fdb = fdb_create(head, p, addr, vid); + fdb = fdb_create(head, p, addr, vid, 0, 0); if (!fdb) { err = -ENOMEM; goto err_unlock; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a9d424e20229..fcdb86dd5a23 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { - if (should_deliver(to, skb)) { + if (to && should_deliver(to, skb)) { if (skb0) deliver_clone(to, skb, __br_forward); else diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 5f0d0cc4744f..1394da63614a 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -180,11 +180,11 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) * devices. There are four possible calls to this function in terms of the * vlan entry type: * 1. vlan is being added on a port (no master flags, global entry exists) - * 2. vlan is being added on a bridge (both master and brvlan flags) + * 2. vlan is being added on a bridge (both master and brentry flags) * 3. vlan is being added on a port, but a global entry didn't exist which - * is being created right now (master flag set, brvlan flag unset), the + * is being created right now (master flag set, brentry flag unset), the * global entry is used for global per-vlan features, but not for filtering - * 4. same as 3 but with both master and brvlan flags set so the entry + * 4. same as 3 but with both master and brentry flags set so the entry * will be used for filtering in both the port and the bridge */ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) @@ -482,7 +482,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) if (!br->vlan_enabled) return true; - vg = nbp_vlan_group(p); + vg = nbp_vlan_group_rcu(p); if (!vg || !vg->num_vlans) return false; @@ -914,6 +914,8 @@ out: return ret; err_vlan_add: + RCU_INIT_POINTER(p->vlgrp, NULL); + synchronize_rcu(); rhashtable_destroy(&vg->vlan_hash); err_rhtbl: kfree(vg); diff --git a/net/core/dev.c b/net/core/dev.c index 1225b4be8ed6..8ce3f74cd6b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -99,6 +99,7 @@ #include <linux/rtnetlink.h> #include <linux/stat.h> #include <net/dst.h> +#include <net/dst_metadata.h> #include <net/pkt_sched.h> #include <net/checksum.h> #include <net/xfrm.h> @@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev) EXPORT_SYMBOL(dev_get_iflink); /** + * dev_fill_metadata_dst - Retrieve tunnel egress information. + * @dev: targeted interface + * @skb: The packet. + * + * For better visibility of tunnel traffic OVS needs to retrieve + * egress tunnel information for a packet. Following API allows + * user to get this info. + */ +int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) +{ + struct ip_tunnel_info *info; + + if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst) + return -EINVAL; + + info = skb_tunnel_info_unclone(skb); + if (!info) + return -ENOMEM; + if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX))) + return -EINVAL; + + return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb); +} +EXPORT_SYMBOL_GPL(dev_fill_metadata_dst); + +/** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace * @name: name to find @@ -6261,6 +6288,48 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } +static netdev_features_t netdev_sync_upper_features(struct net_device *lower, + struct net_device *upper, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(upper->wanted_features & feature) + && (features & feature)) { + netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n", + &feature, upper->name); + features &= ~feature; + } + } + + return features; +} + +static void netdev_sync_lower_features(struct net_device *upper, + struct net_device *lower, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(features & feature) && (lower->features & feature)) { + netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", + &feature, lower->name); + lower->wanted_features &= ~feature; + netdev_update_features(lower); + + if (unlikely(lower->features & feature)) + netdev_WARN(upper, "failed to disable %pNF on %s!\n", + &feature, lower->name); + } + } +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -6330,7 +6399,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, int __netdev_update_features(struct net_device *dev) { + struct net_device *upper, *lower; netdev_features_t features; + struct list_head *iter; int err = 0; ASSERT_RTNL(); @@ -6343,6 +6414,10 @@ int __netdev_update_features(struct net_device *dev) /* driver might be less strict about feature dependencies */ features = netdev_fix_features(dev, features); + /* some features can't be enabled if they're off an an upper device */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) + features = netdev_sync_upper_features(dev, upper, features); + if (dev->features == features) return 0; @@ -6359,6 +6434,12 @@ int __netdev_update_features(struct net_device *dev) return -1; } + /* some features must be disabled on lower devices when disabled + * on an upper device (think: bonding master or bridge) + */ + netdev_for_each_lower_dev(dev, lower, iter) + netdev_sync_lower_features(dev, lower, features); + if (!err) dev->features = features; diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 4eab4a94a59d..703cf76aa7c2 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -58,7 +58,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2 * ret a ; return PTP class * * ; PTP over UDP over IPv4 over 802.1Q over Ethernet @@ -73,7 +73,7 @@ * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? * ldh [x + 26] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 * ret a ; return PTP class * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE * @@ -86,7 +86,7 @@ * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? * ldh [66] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 + * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * @@ -98,7 +98,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [14] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x30 ; PTP_CLASS_L2 + * or #0x40 ; PTP_CLASS_L2 * ret a ; return PTP class * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE */ @@ -150,7 +150,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000070 }, + { 0x44, 0, 0, 0x000000c0 }, { 0x16, 0, 0, 0x00000000 }, { 0x15, 0, 12, 0x00000800 }, { 0x30, 0, 0, 0x0000001b }, @@ -162,7 +162,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x48, 0, 0, 0x0000001a }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000050 }, + { 0x44, 0, 0, 0x00000090 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 8, 0x000086dd }, @@ -172,7 +172,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x28, 0, 0, 0x00000042 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000060 }, + { 0x44, 0, 0, 0x000000a0 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, @@ -181,7 +181,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x00000000 }, { 0x28, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000030 }, + { 0x44, 0, 0, 0x00000040 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7c78b5aca944..504bd17b7456 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -838,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_BROADCAST */ nla_total_size(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64))); + nla_total_size(sizeof(__u64)) + + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else return 0; @@ -1161,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_link_state vf_linkstate; struct ifla_vf_rss_query_en vf_rss_query_en; struct ifla_vf_stats vf_stats; + struct ifla_vf_trust vf_trust; /* * Not all SR-IOV capable drivers support the @@ -1170,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; ivi.rss_query_en = -1; + ivi.trusted = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1183,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = - vf_rss_query_en.vf = ivi.vf; + vf_rss_query_en.vf = + vf_trust.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1194,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; + vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1211,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, &vf_linkstate) || nla_put(skb, IFLA_VF_RSS_QUERY_EN, sizeof(vf_rss_query_en), - &vf_rss_query_en)) + &vf_rss_query_en) || + nla_put(skb, IFLA_VF_TRUST, + sizeof(vf_trust), &vf_trust)) goto nla_put_failure; memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) @@ -1348,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, + [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, }; static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { @@ -1587,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } + if (tb[IFLA_VF_TRUST]) { + struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_trust) + err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); + if (err < 0) + return err; + } + return err; } diff --git a/net/core/sock.c b/net/core/sock.c index dcc7d62654d5..7529eb9463be 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name) } } +static bool sock_needs_netstamp(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_UNSPEC: + case AF_UNIX: + return false; + default: + return true; + } +} + #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { sk->sk_flags &= ~flags; - if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); } } @@ -1582,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); - if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + if (sock_needs_netstamp(sk) && + newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); } out: @@ -1643,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_wfree); +void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; +#ifdef CONFIG_INET + if (unlikely(!sk_fullsock(sk))) { + skb->destructor = sock_edemux; + sock_hold(sk); + return; + } +#endif + skb->destructor = sock_wfree; + skb_set_hash_from_sk(skb, sk); + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ + atomic_add(skb->truesize, &sk->sk_wmem_alloc); +} +EXPORT_SYMBOL(skb_set_owner_w); + void skb_orphan_partial(struct sk_buff *skb) { /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, @@ -2510,7 +2545,8 @@ void sock_enable_timestamp(struct sock *sk, int flag) * time stamping, but time stamping might have been on * already because of the other one */ - if (!(previous_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } diff --git a/net/core/tso.c b/net/core/tso.c index 630b30b4fb53..5dca7ce8ee9f 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,4 +1,5 @@ #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/ip.h> #include <net/tso.h> #include <asm/unaligned.h> @@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs); void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct iphdr *iph; struct tcphdr *tcph; int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); - iph = (struct iphdr *)(hdr + mac_hdr_len); - iph->id = htons(tso->ip_id); - iph->tot_len = htons(size + hdr_len - mac_hdr_len); + if (!tso->ipv6) { + struct iphdr *iph = (void *)(hdr + mac_hdr_len); + + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tso->ip_id++; + } else { + struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); + + iph->payload_len = htons(size + tcp_hdrlen(skb)); + } tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); put_unaligned_be32(tso->tcp_seq, &tcph->seq); - tso->ip_id++; if (!is_last) { /* Clear all special flags for not last packet */ @@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->ip_id = ntohs(ip_hdr(skb)->id); tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); tso->next_frag_idx = 0; + tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); /* Build first data */ tso->size = skb_headlen(skb) - hdr_len; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 923f5a180134..b0e28d24e1a7 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -278,7 +278,9 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 59bc180b02d8..5684e14932bd 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -393,7 +393,9 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct inet_sock *newinet; @@ -426,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - __inet_hash_nolisten(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9cc731f2619..db5fc2440a23 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -380,7 +380,9 @@ drop: static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; @@ -393,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* * v6 mapped */ - newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); + newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, + req_unhash, own_req); if (newsk == NULL) return NULL; @@ -474,15 +477,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; - /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq->pktopts != NULL) { - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); - consume_skb(ireq->pktopts); - ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); - } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -511,7 +506,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - __inet_hash(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + /* Clone pktoptions received with SYN, if we own the req */ + if (*own_req && ireq->pktopts) { + newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; + if (newnp->pktoptions) + skb_set_owner_r(newnp->pktoptions, newsk); + } return newsk; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d10aace43672..1994f8af646b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); + bool own_req; /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, dreq, skb)) goto drop; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + req, &own_req); + if (!child) goto listen_overflow; - inet_csk_reqsk_queue_drop(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); -out: - return child; + return inet_csk_complete_hashdance(sk, child, req, own_req); + listen_overflow: dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -198,7 +198,7 @@ drop: req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - goto out; + return NULL; } EXPORT_SYMBOL_GPL(dccp_check_req); diff --git a/net/dccp/probe.c b/net/dccp/probe.c index d8346d0eadeb..3d3fda05b32d 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kfifo.h> #include <linux/vmalloc.h> +#include <linux/time64.h> #include <linux/gfp.h> #include <net/net_namespace.h> @@ -47,20 +48,20 @@ static struct { struct kfifo fifo; spinlock_t lock; wait_queue_head_t wait; - struct timespec tstart; + struct timespec64 tstart; } dccpw; static void printl(const char *fmt, ...) { va_list args; int len; - struct timespec now; + struct timespec64 now; char tbuf[256]; va_start(args, fmt); - getnstimeofday(&now); + getnstimeofday64(&now); - now = timespec_sub(now, dccpw.tstart); + now = timespec64_sub(now, dccpw.tstart); len = sprintf(tbuf, "%lu.%06lu ", (unsigned long) now.tv_sec, @@ -110,7 +111,7 @@ static struct jprobe dccp_send_probe = { static int dccpprobe_open(struct inode *inode, struct file *file) { kfifo_reset(&dccpw.fifo); - getnstimeofday(&dccpw.tstart); + getnstimeofday64(&dccpw.tstart); return 0; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b0b8da0f5af8..7bc787b095c8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -247,11 +247,10 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - u16 vid; int err; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set) + if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) return -EOPNOTSUPP; /* If the requested port doesn't belong to the same bridge as @@ -262,16 +261,14 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, vlan->vid_end); if (err) return err; + + err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); + if (err) + return err; } else { - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - err = ds->drv->port_vlan_add(ds, p->port, vid, - vlan->flags & - BRIDGE_VLAN_INFO_UNTAGGED); - if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID) - err = ds->drv->port_pvid_set(ds, p->port, vid); - if (err) - return err; - } + err = ds->drv->port_vlan_add(ds, p->port, vlan, trans); + if (err) + return err; } return 0; @@ -282,19 +279,11 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - u16 vid; - int err; if (!ds->drv->port_vlan_del) return -EOPNOTSUPP; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - err = ds->drv->port_vlan_del(ds, p->port, vid); - if (err) - return err; - } - - return 0; + return ds->drv->port_vlan_del(ds, p->port, vlan); } static int dsa_slave_port_vlan_dump(struct net_device *dev, @@ -378,31 +367,11 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - unsigned char addr[ETH_ALEN] = { 0 }; - u16 vid = 0; - int ret; - - if (!ds->drv->port_fdb_getnext) - return -EOPNOTSUPP; - - for (;;) { - bool is_static; - ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid, - &is_static); - if (ret < 0) - break; - - ether_addr_copy(fdb->addr, addr); - fdb->vid = vid; - fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - - ret = cb(&fdb->obj); - if (ret < 0) - break; - } + if (ds->drv->port_fdb_dump) + return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); - return ret == -ENOENT ? 0 : ret; + return -EOPNOTSUPP; } static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 12e8cf4bda9f..6b437e8760d3 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&ieee802154_lowpan->frags); - - return lowpan_frags_ns_sysctl_register(net); + res = inet_frags_init_net(&ieee802154_lowpan->frags); + if (res) + return res; + res = lowpan_frags_ns_sysctl_register(net); + if (res) + inet_frags_uninit_net(&ieee802154_lowpan->frags); + return res; } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c index 65d55e05516c..ef185dd4110d 100644 --- a/net/ieee802154/6lowpan/rx.c +++ b/net/ieee802154/6lowpan/rx.c @@ -90,36 +90,12 @@ static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb) int lowpan_iphc_decompress(struct sk_buff *skb) { - struct ieee802154_addr_sa sa, da; struct ieee802154_hdr hdr; - u8 iphc0, iphc1; - void *sap, *dap; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) return -EINVAL; - raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - - if (lowpan_fetch_skb_u8(skb, &iphc0) || - lowpan_fetch_skb_u8(skb, &iphc1)) - return -EINVAL; - - ieee802154_addr_to_sa(&sa, &hdr.source); - ieee802154_addr_to_sa(&da, &hdr.dest); - - if (sa.addr_type == IEEE802154_ADDR_SHORT) - sap = &sa.short_addr; - else - sap = &sa.hwaddr; - - if (da.addr_type == IEEE802154_ADDR_SHORT) - dap = &da.short_addr; - else - dap = &da.hwaddr; - - return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, - IEEE802154_ADDR_LEN, dap, da.addr_type, - IEEE802154_ADDR_LEN, iphc0, iphc1); + return lowpan_header_decompress(skb, skb->dev, &hdr.dest, &hdr.source); } static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb) @@ -308,16 +284,16 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev, if (wdev->type != ARPHRD_IEEE802154 || skb->pkt_type == PACKET_OTHERHOST || !lowpan_rx_h_check(skb)) - return NET_RX_DROP; + goto drop; ldev = wdev->ieee802154_ptr->lowpan_dev; if (!ldev || !netif_running(ldev)) - return NET_RX_DROP; + goto drop; /* Replacing skb->dev and followed rx handlers will manipulate skb. */ skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) - return NET_RX_DROP; + goto out; skb->dev = ldev; /* When receive frag1 it's likely that we manipulate the buffer. @@ -328,10 +304,15 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev, lowpan_is_iphc(*skb_network_header(skb))) { skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) - return NET_RX_DROP; + goto out; } return lowpan_invoke_rx_handlers(skb); + +drop: + kfree_skb(skb); +out: + return NET_RX_DROP; } static struct packet_type lowpan_packet_type = { diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index 62a21f6f021e..d4353faced35 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -14,6 +14,9 @@ #include "6lowpan_i.h" +#define LOWPAN_FRAG1_HEAD_SIZE 0x4 +#define LOWPAN_FRAGN_HEAD_SIZE 0x5 + /* don't save pan id, it's intra pan */ struct lowpan_addr { u8 mode; @@ -218,7 +221,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, saddr = &info.saddr.u.extended_addr; *dgram_size = skb->len; - lowpan_header_compress(skb, ldev, ETH_P_IPV6, daddr, saddr, skb->len); + lowpan_header_compress(skb, ldev, daddr, saddr); /* dgram_offset = (saved bytes after compression) + lowpan header len */ *dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb); @@ -235,7 +238,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, /* if the destination address is the broadcast address, use the * corresponding short address */ - if (lowpan_is_addr_broadcast((const u8 *)daddr)) { + if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) { da.mode = IEEE802154_ADDR_SHORT; da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); cb->ackreq = false; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d7c2bb0c4f65..e786873c89f2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -867,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { - fib_magic(RTM_NEWROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - prefix, ifa->ifa_prefixlen, prim); + if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) + fib_magic(RTM_NEWROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + prefix, ifa->ifa_prefixlen, prim); /* Add network specific broadcasts, when it takes a sense */ if (ifa->ifa_prefixlen < 31) { @@ -914,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } else if (!ipv4_is_zeronet(any) && (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { - fib_magic(RTM_DELROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - any, ifa->ifa_prefixlen, prim); + if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) + fib_magic(RTM_DELROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + any, ifa->ifa_prefixlen, prim); subnet = 1; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 42778d9d71e5..f30df0ee4f4d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1564,7 +1564,8 @@ void fib_select_path(struct net *net, struct fib_result *res, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { if (mp_hash < 0) - mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); + mp_hash = get_hash_from_flowi4(fl4) >> 1; + fib_select_multipath(res, mp_hash); } else diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c2af797f2f9..744e5936c10d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) do { /* record parent and next child index */ pn = n; - cindex = key ? get_index(key, pn) : 0; + cindex = (key > pn->key) ? get_index(key, pn) : 0; if (cindex >> pn->bits) break; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5aa46d4b44ef..5a8ee3282550 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_IPIP | + SKB_GSO_SIT))) goto out; if (!skb->encapsulation) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8430bc8ccd58..1feb15f23de8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -523,15 +523,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; - spinlock_t *lock; - bool found; + bool found = false; - lock = inet_ehash_lockp(hashinfo, req->rsk_hash); - - spin_lock(lock); - found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); - spin_unlock(lock); + if (sk_hashed(req_to_sk(req))) { + spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); + spin_lock(lock); + found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); + spin_unlock(lock); + } if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; @@ -811,6 +811,25 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, } EXPORT_SYMBOL(inet_csk_reqsk_queue_add); +struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, + struct request_sock *req, bool own_req) +{ + if (own_req) { + inet_csk_reqsk_queue_drop(sk, req); + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + inet_csk_reqsk_queue_add(sk, req, child); + /* Warning: caller must not call reqsk_put(req); + * child stole last reference on it. + */ + return child; + } + /* Too bad, another child took ownership of the request, undo. */ + bh_unlock_sock(child); + sock_put(child); + return NULL; +} +EXPORT_SYMBOL(inet_csk_complete_hashdance); + /* * This routine closes sockets which have been at least partially * opened, but not yet accepted. diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index d0a7c0319e3d..fe144dae7372 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -209,12 +209,6 @@ int inet_frags_init(struct inet_frags *f) } EXPORT_SYMBOL(inet_frags_init); -void inet_frags_init_net(struct netns_frags *nf) -{ - init_frag_mem_limit(nf); -} -EXPORT_SYMBOL(inet_frags_init_net); - void inet_frags_fini(struct inet_frags *f) { cancel_work_sync(&f->frags_work); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 958728a22001..ccc5980797fc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk) /* insert a socket into ehash, and eventually remove another one * (The another one can be a SYN_RECV or TIMEWAIT */ -int inet_ehash_insert(struct sock *sk, struct sock *osk) +bool inet_ehash_insert(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; struct inet_ehash_bucket *head; spinlock_t *lock; - int ret = 0; + bool ret = true; WARN_ON_ONCE(!sk_unhashed(sk)); @@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); - __sk_nulls_add_node_rcu(sk, list); if (osk) { - WARN_ON(sk->sk_hash != osk->sk_hash); - sk_nulls_del_node_init_rcu(osk); + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); + ret = sk_nulls_del_node_init_rcu(osk); } + if (ret) + __sk_nulls_add_node_rcu(sk, list); spin_unlock(lock); return ret; } -void __inet_hash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) { - inet_ehash_insert(sk, osk); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + bool ok = inet_ehash_insert(sk, osk); + + if (ok) { + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + } else { + percpu_counter_inc(sk->sk_prot->orphan_count); + sk->sk_state = TCP_CLOSE; + sock_set_flag(sk, SOCK_DEAD); + inet_csk_destroy_sock(sk); + } + return ok; } -EXPORT_SYMBOL_GPL(__inet_hash_nolisten); +EXPORT_SYMBOL_GPL(inet_ehash_nolisten); void __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; - if (sk->sk_state != TCP_LISTEN) - return __inet_hash_nolisten(sk, osk); - + if (sk->sk_state != TCP_LISTEN) { + inet_ehash_nolisten(sk, osk); + return; + } WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -567,7 +578,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - __inet_hash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw); } if (tw) inet_twsk_bind_unhash(tw, hinfo); @@ -584,7 +595,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5482745d5d68..1fe55ae81781 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -839,6 +839,8 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { + int res; + /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -862,9 +864,13 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; - inet_frags_init_net(&net->ipv4.frags); - - return ip4_frags_ns_ctl_register(net); + res = inet_frags_init_net(&net->ipv4.frags); + if (res) + return res; + res = ip4_frags_ns_ctl_register(net); + if (res) + inet_frags_uninit_net(&net->ipv4.frags); + return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bd0679d90519..614521437e30 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } +static struct rtable *gre_get_rt(struct sk_buff *skb, + struct net_device *dev, + struct flowi4 *fl, + const struct ip_tunnel_key *key) +{ + struct net *net = dev_net(dev); + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->u.ipv4.dst; + fl->saddr = key->u.ipv4.src; + fl->flowi4_tos = RT_TOS(key->tos); + fl->flowi4_mark = skb->mark; + fl->flowi4_proto = IPPROTO_GRE; + + return ip_route_output_key(net, fl); +} + static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel_info *tun_info; - struct net *net = dev_net(dev); const struct ip_tunnel_key *key; struct flowi4 fl; struct rtable *rt; @@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; key = &tun_info->key; - memset(&fl, 0, sizeof(fl)); - fl.daddr = key->u.ipv4.dst; - fl.saddr = key->u.ipv4.src; - fl.flowi4_tos = RT_TOS(key->tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); + rt = gre_get_rt(skb, dev, &fl, key); if (IS_ERR(rt)) goto err_free_skb; @@ -566,6 +575,24 @@ err_free_skb: dev->stats.tx_dropped++; } +static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) +{ + struct ip_tunnel_info *info = skb_tunnel_info(skb); + struct rtable *rt; + struct flowi4 fl4; + + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; + + rt = gre_get_rt(skb, dev, &fl4, &info->key); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + info->key.u.ipv4.src = fl4.saddr; + return 0; +} + static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = { .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; static void ipgre_tap_setup(struct net_device *dev) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50e29737b584..4233cbe47052 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -533,6 +533,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, dev = rt->dst.dev; + /* for offloaded checksums cleanup checksum before fragmentation */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto fail; + /* * Point into the IP datagram header. */ @@ -657,9 +662,6 @@ slow_path_clean: } slow_path: - /* for offloaded checksums cleanup checksum before fragmentation */ - if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) - goto fail; iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ @@ -911,6 +913,7 @@ static int __ip_append_data(struct sock *sk, if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & NETIF_F_V4_CSUM && + !(flags & MSG_MORE) && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 690d27d3f2f9..a35584176535 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -75,6 +75,7 @@ endif # NF_TABLES config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK help This option enables the nf_dup_ipv4 core, which duplicates an IPv4 packet to be rerouted to another destination. diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 74dd6671b66d..78cc64eddfc1 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -60,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, if (FIB_RES_DEV(res) == dev) dev_match = true; #endif - if (dev_match || flags & XT_RPFILTER_LOOSE) - return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; - return dev_match; + return dev_match || flags & XT_RPFILTER_LOOSE; } static bool rpfilter_is_local(const struct sk_buff *skb) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4c0892badb8b..4cbe9f0a4281 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -221,8 +221,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; + bool own_req; - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, + NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 7092a61c4dc8..7e538f71f5fb 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) /* alpha = (1 - g) * alpha + g * F */ - alpha -= alpha >> dctcp_shift_g; + alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); if (bytes_ecn) { /* If dctcp_shift_g == 1, a 32bit value would overflow * after 8 Mbytes. diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 93396bf7b475..55be6ac70cff 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -133,12 +133,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; u32 end_seq; + bool own_req; req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + NULL, &own_req); if (!child) return NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30dd45c1f568..1c2648bbac4b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1247,7 +1247,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request); */ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct inet_sock *newinet; @@ -1323,7 +1325,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - __inet_hash_nolisten(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1fd5d413a664..3575dd1e5b67 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -580,6 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; + bool own_req; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { @@ -767,18 +768,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + req, &own_req); if (!child) goto listen_overflow; sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); - inet_csk_reqsk_queue_drop(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); - /* Warning: caller must not call reqsk_put(req); - * child stole last reference on it. - */ - return child; + return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: if (!sysctl_tcp_abort_on_overflow) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f6f7f9b4901b..cb7ca569052c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2963,9 +2963,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); if (attach_req) { - skb->destructor = sock_edemux; - sock_hold(req_to_sk(req)); - skb->sk = req_to_sk(req); + skb_set_owner_w(skb, req_to_sk(req)); } else { /* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. @@ -3410,7 +3408,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); - NET_INC_STATS_BH(sock_net(sk), mib); + NET_INC_STATS(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 9f298d0dc9a1..7ee6518afa86 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu) { + skb->protocol = htons(ETH_P_IP); + if (skb->sk) xfrm_local_error(skb, mtu); else diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f2606b9056bb..1e0c3c835a63 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -127,7 +127,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports = (__be16 *)xprth; + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; fl4->fl4_dport = ports[!reverse]; @@ -135,8 +138,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ICMP: - if (pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp = xprth; + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; fl4->fl4_icmp_type = icmp[0]; fl4->fl4_icmp_code = icmp[1]; @@ -144,33 +151,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ESP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr = (__be32 *)xprth; + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; } break; case IPPROTO_AH: - if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr = (__be32 *)xprth; + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; } break; case IPPROTO_COMP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr = (__be16 *)xprth; + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; case IPPROTO_GRE: - if (pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags = (__be16 *)xprth; - __be32 *gre_hdr = (__be32 *)xprth; + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) @@ -244,7 +268,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d0c685cdc345..d72fa90d6feb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3147,6 +3147,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; + case NETDEV_CHANGEMTU: + /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ + if (dev->mtu < IPV6_MIN_MTU) { + addrconf_ifdown(dev, 1); + break; + } + + if (idev) { + rt6_mtu_change(dev, dev->mtu); + idev->cnf.mtu6 = dev->mtu; + break; + } + + /* allocate new idev */ + idev = ipv6_add_dev(dev); + if (IS_ERR(idev)) + break; + + /* device is still not ready */ + if (!(idev->if_flags & IF_READY)) + break; + + run_pending = 1; + + /* fall through */ + case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -3170,7 +3196,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, idev->if_flags |= IF_READY; run_pending = 1; } - } else { + } else if (event == NETDEV_CHANGE) { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ break; @@ -3235,24 +3261,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; - case NETDEV_CHANGEMTU: - if (idev && dev->mtu >= IPV6_MIN_MTU) { - rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; - break; - } - - if (!idev && dev->mtu >= IPV6_MIN_MTU) { - idev = ipv6_add_dev(dev); - if (!IS_ERR(idev)) - break; - } - - /* - * if MTU under IPV6_MIN_MTU. - * Stop IPv6 on this interface. - */ - case NETDEV_DOWN: case NETDEV_UNREGISTER: /* diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 9f777ec59a59..ed33abf57abd 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -32,6 +32,7 @@ struct fib6_rule { struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { + struct rt6_info *rt; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .flags = FIB_LOOKUP_NOREF, @@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - if (arg.result) - return arg.result; + rt = arg.result; - dst_hold(&net->ipv6.ip6_null_entry->dst); - return &net->ipv6.ip6_null_entry->dst; + if (!rt) { + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; + } + + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index efb1c00f2270..36c5a98b0472 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -453,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -461,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -513,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - net_dbg_ratelimited("icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); goto out_dst_release; } @@ -785,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - net_dbg_ratelimited("icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", + saddr, daddr); /* * error of unknown type. diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 09fddf70cca4..0c7e276c230e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -286,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + struct rt6_info *rt; + + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0c89671e0767..e6a7bd15b9b7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -596,11 +596,17 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } + if (mtu < hlen + sizeof(struct frag_hdr) + 8) + goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto fail; + hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); @@ -729,10 +735,6 @@ slow_path_clean: } slow_path: - if ((skb->ip_summed == CHECKSUM_PARTIAL) && - skb_checksum_help(skb)) - goto fail; - left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ @@ -1268,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk, struct rt6_info *rt = (struct rt6_info *)cork->dst; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; + unsigned int maxnonfragsize, headersize; skb = skb_peek_tail(queue); if (!skb) { @@ -1285,38 +1288,43 @@ static int __ip6_append_data(struct sock *sk, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); - if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - unsigned int maxnonfragsize, headersize; - - headersize = sizeof(struct ipv6hdr) + - (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + - rt->rt6i_nfheader_len; - - if (ip6_sk_ignore_df(sk)) - maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; - else - maxnonfragsize = mtu; + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + if (cork->length + length > mtu - headersize && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } - /* dontfrag active */ - if ((cork->length + length > mtu - headersize) && dontfrag && - (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu - headersize + - sizeof(struct ipv6hdr)); - goto emsgsize; - } + if (ip6_sk_ignore_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; - if (cork->length + length > maxnonfragsize - headersize) { + if (cork->length + length > maxnonfragsize - headersize) { emsgsize: - ipv6_local_error(sk, EMSGSIZE, fl6, - mtu - headersize + - sizeof(struct ipv6hdr)); - return -EMSGSIZE; - } + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); + return -EMSGSIZE; } + /* CHECKSUM_PARTIAL only with no extension headers and when + * we are not going to fragment + */ + if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && + headersize == sizeof(struct ipv6hdr) && + length < mtu - headersize && + !(flags & MSG_MORE) && + rt->dst.dev->features & NETIF_F_V6_CSUM) + csummode = CHECKSUM_PARTIAL; + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { sock_tx_timestamp(sk, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && @@ -1324,16 +1332,6 @@ emsgsize: tskey = sk->sk_tskey++; } - /* If this is the first and only packet and device - * supports checksum offloading, let's use it. - * Use transhdrlen, same as IPv4, because partial - * sums only work when transhdrlen is set. - */ - if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && - length + fragheaderlen < mtu && - rt->dst.dev->features & NETIF_F_V6_CSUM && - !exthdrlen) - csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 96833e4b3193..f6a024e141e5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -58,6 +58,7 @@ endif # NF_TABLES config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK help This option enables the nf_dup_ipv6 core, which duplicates an IPv6 packet to be rerouted to another destination. diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 056f5d4a852a..d5efeb87350e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -644,15 +644,22 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb) s = s2; } } +EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); static int nf_ct_net_init(struct net *net) { + int res; + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&net->nf_frag.frags); - - return nf_ct_frag6_sysctl_register(net); + res = inet_frags_init_net(&net->nf_frag.frags); + if (res) + return res; + res = nf_ct_frag6_sysctl_register(net); + if (res) + inet_frags_uninit_net(&net->nf_frag.frags); + return res; } static void nf_ct_net_exit(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f1159bb76e0a..44e21a03cfc3 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -706,13 +706,19 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { + int res; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&net->ipv6.frags); - - return ip6_frags_ns_sysctl_register(net); + res = inet_frags_init_net(&net->ipv6.frags); + if (res) + return res; + res = ip6_frags_ns_sysctl_register(net); + if (res) + inet_frags_uninit_net(&net->ipv6.frags); + return res; } static void __net_exit ipv6_frags_exit_net(struct net *net) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d0619632723a..2701cb3d88e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1171,6 +1171,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, { struct dst_entry *dst; int flags = 0; + bool any_src; dst = l3mdev_rt6_dst_by_oif(net, fl6); if (dst) @@ -1178,11 +1179,12 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; + any_src = ipv6_addr_any(&fl6->saddr); if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || - fl6->flowi6_oif) + (fl6->flowi6_oif && any_src)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl6->saddr)) + if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f495d189f5e0..ea2f4d5440b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -965,7 +965,9 @@ drop: static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; @@ -984,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * v6 mapped */ - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); + newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req); if (!newsk) return NULL; @@ -1081,16 +1084,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; - /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); - consume_skb(ireq->pktopts); - ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); - } newnp->opt = NULL; newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -1145,7 +1139,16 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - __inet_hash(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + /* Clone pktoptions received with SYN, if we own the req */ + if (*own_req && ireq->pktopts) { + newnp->pktoptions = skb_clone(ireq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; + if (newnp->pktoptions) + skb_set_owner_r(newnp->pktoptions, newsk); + } return newsk; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 9db067a11b52..4d09ce6fa90e 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); if (xfrm6_local_dontfrag(skb)) xfrm6_local_rxpmtu(skb, mtu); @@ -143,6 +144,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; int mtu; + bool toobig; #ifdef CONFIG_NETFILTER if (!x) { @@ -151,25 +153,29 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif + if (x->props.mode != XFRM_MODE_TUNNEL) + goto skip_frag; + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { + toobig = skb->len > mtu && !skb_is_gso(skb); + + if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } - if (x->props.mode == XFRM_MODE_TUNNEL && - ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)))) { + if (toobig || dst_allfrag(skb_dst(skb))) return ip6_fragment(net, sk, skb, __xfrm6_output_finish); - } + +skip_frag: return x->outer_mode->afinfo->output_finish(sk, skb); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08c9c93f3527..5643423fe67a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -177,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) return; case IPPROTO_ICMPV6: - if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { u8 *icmp; nh = skb_network_header(skb); @@ -191,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); - if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { struct ip6_mh *mh; nh = skb_network_header(skb); @@ -286,7 +288,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index a26c401ef4a4..43964594aa12 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off) for (element = hashbin_get_first(iter->hashbin); element != NULL; element = hashbin_get_next(iter->hashbin)) { - if (!off || *off-- == 0) { + if (!off || (*off)-- == 0) { /* NB: hashbin left locked */ return element; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 83a70688784b..f9c9ecb0cdd3 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb, err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); - /* Error is cleare after succecful sending to at least one + /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 7799d3c41fe2..a13d02b7cee4 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -55,7 +55,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec) msl = container_of(sl, struct mac802154_llsec_seclevel, level); list_del(&sl->list); - kfree(msl); + kzfree(msl); } list_for_each_entry_safe(dev, dn, &sec->table.devices, list) { @@ -72,7 +72,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec) mkey = container_of(key->key, struct mac802154_llsec_key, key); list_del(&key->list); llsec_key_put(mkey); - kfree(key); + kzfree(key); } } @@ -161,7 +161,7 @@ err_tfm: if (key->tfm[i]) crypto_free_aead(key->tfm[i]); - kfree(key); + kzfree(key); return NULL; } @@ -176,7 +176,7 @@ static void llsec_key_release(struct kref *ref) crypto_free_aead(key->tfm[i]); crypto_free_blkcipher(key->tfm0); - kfree(key); + kzfree(key); } static struct mac802154_llsec_key* @@ -267,7 +267,7 @@ int mac802154_llsec_key_add(struct mac802154_llsec *sec, return 0; fail: - kfree(new); + kzfree(new); return -ENOMEM; } @@ -347,10 +347,10 @@ static void llsec_dev_free(struct mac802154_llsec_device *dev) devkey); list_del(&pos->list); - kfree(devkey); + kzfree(devkey); } - kfree(dev); + kzfree(dev); } int mac802154_llsec_dev_add(struct mac802154_llsec *sec, @@ -681,7 +681,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, rc = crypto_aead_encrypt(req); - kfree(req); + kzfree(req); return rc; } @@ -881,7 +881,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, rc = crypto_aead_decrypt(req); - kfree(req); + kzfree(req); skb_trim(skb, skb->len - authlen); return rc; @@ -921,7 +921,7 @@ llsec_update_devkey_record(struct mac802154_llsec_device *dev, if (!devkey) list_add_rcu(&next->devkey.list, &dev->dev.keys); else - kfree(next); + kzfree(next); spin_unlock_bh(&dev->lock); } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bb185a28de98..c70d750148b6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -19,36 +19,13 @@ #include <net/ipv6.h> #include <net/addrconf.h> #endif +#include <net/nexthop.h> #include "internal.h" -#define LABEL_NOT_SPECIFIED (1<<20) -#define MAX_NEW_LABELS 2 - -/* This maximum ha length copied from the definition of struct neighbour */ -#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) - -enum mpls_payload_type { - MPT_UNSPEC, /* IPv4 or IPv6 */ - MPT_IPV4 = 4, - MPT_IPV6 = 6, - - /* Other types not implemented: - * - Pseudo-wire with or without control word (RFC4385) - * - GAL (RFC5586) - */ -}; - -struct mpls_route { /* next hop label forwarding entry */ - struct net_device __rcu *rt_dev; - struct rcu_head rt_rcu; - u32 rt_label[MAX_NEW_LABELS]; - u8 rt_protocol; /* routing protocol that set this entry */ - u8 rt_payload_type; - u8 rt_labels; - u8 rt_via_alen; - u8 rt_via_table; - u8 rt_via[0]; -}; +/* Maximum number of labels to look ahead at when selecting a path of + * a multipath route + */ +#define MAX_MP_SELECT_LABELS 4 static int zero = 0; static int label_limit = (1 << 20) - 1; @@ -80,10 +57,24 @@ bool mpls_output_possible(const struct net_device *dev) } EXPORT_SYMBOL_GPL(mpls_output_possible); -static unsigned int mpls_rt_header_size(const struct mpls_route *rt) +static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) +{ + u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); + int nh_index = nh - rt->rt_nh; + + return nh0_via + rt->rt_max_alen * nh_index; +} + +static const u8 *mpls_nh_via(const struct mpls_route *rt, + const struct mpls_nh *nh) +{ + return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); +} + +static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) { /* The size of the layer 2.5 labels to be added for this route */ - return rt->rt_labels * sizeof(struct mpls_shim_hdr); + return nh->nh_labels * sizeof(struct mpls_shim_hdr); } unsigned int mpls_dev_mtu(const struct net_device *dev) @@ -105,6 +96,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + struct mpls_entry_decoded dec; + struct mpls_shim_hdr *hdr; + bool eli_seen = false; + int label_index; + int nh_index = 0; + u32 hash = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + label_index++) { + if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + break; + + /* Read and decode the current label */ + hdr = mpls_hdr(skb) + label_index; + dec = mpls_entry_decode(hdr); + + /* RFC6790 - reserved labels MUST NOT be used as keys + * for the load-balancing function + */ + if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { + hash = jhash_1word(dec.label, hash); + + /* The entropy label follows the entropy label + * indicator, so this means that the entropy + * label was just added to the hash - no need to + * go any deeper either in the label stack or in the + * payload + */ + if (eli_seen) + break; + } else if (dec.label == MPLS_LABEL_ENTROPY) { + eli_seen = true; + } + + bos = dec.bos; + if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + + sizeof(struct iphdr))) { + const struct iphdr *v4hdr; + + v4hdr = (const struct iphdr *)(mpls_hdr(skb) + + label_index); + if (v4hdr->version == 4) { + hash = jhash_3words(ntohl(v4hdr->saddr), + ntohl(v4hdr->daddr), + v4hdr->protocol, hash); + } else if (v4hdr->version == 6 && + pskb_may_pull(skb, sizeof(*hdr) * label_index + + sizeof(struct ipv6hdr))) { + const struct ipv6hdr *v6hdr; + + v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + + label_index); + + hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); + hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); + hash = jhash_1word(v6hdr->nexthdr, hash); + } + } + } + + nh_index = hash % rt->rt_nhn; +out: + return &rt->rt_nh[nh_index]; +} + static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, struct mpls_entry_decoded dec) { @@ -159,6 +224,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct net *net = dev_net(dev); struct mpls_shim_hdr *hdr; struct mpls_route *rt; + struct mpls_nh *nh; struct mpls_entry_decoded dec; struct net_device *out_dev; struct mpls_dev *mdev; @@ -196,8 +262,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!rt) goto drop; + nh = mpls_select_multipath(rt, skb, dec.bos); + if (!nh) + goto drop; + /* Find the output device */ - out_dev = rcu_dereference(rt->rt_dev); + out_dev = rcu_dereference(nh->nh_dev); if (!mpls_output_possible(out_dev)) goto drop; @@ -212,7 +282,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, dec.ttl -= 1; /* Verify the destination can hold the packet */ - new_header_size = mpls_rt_header_size(rt); + new_header_size = mpls_nh_header_size(nh); mtu = mpls_dev_mtu(out_dev); if (mpls_pkt_too_big(skb, mtu - new_header_size)) goto drop; @@ -240,13 +310,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Push the new labels */ hdr = mpls_hdr(skb); bos = dec.bos; - for (i = rt->rt_labels - 1; i >= 0; i--) { - hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); + for (i = nh->nh_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(nh->nh_label[i], + dec.ttl, 0, bos); bos = false; } } - err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); + err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb); if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); @@ -270,24 +341,33 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { struct mpls_route_config { u32 rc_protocol; u32 rc_ifindex; - u16 rc_via_table; - u16 rc_via_alen; + u8 rc_via_table; + u8 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; - u32 rc_output_labels; + u8 rc_output_labels; u32 rc_output_label[MAX_NEW_LABELS]; u32 rc_nlflags; enum mpls_payload_type rc_payload_type; struct nl_info rc_nlinfo; + struct rtnexthop *rc_mp; + int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(size_t alen) +static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) { + u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); struct mpls_route *rt; - rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); - if (rt) - rt->rt_via_alen = alen; + rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), + VIA_ALEN_ALIGN) + + num_nh * max_alen_aligned, + GFP_KERNEL); + if (rt) { + rt->rt_nhn = num_nh; + rt->rt_max_alen = max_alen_aligned; + } + return rt; } @@ -312,25 +392,22 @@ static void mpls_notify_route(struct net *net, unsigned index, } static void mpls_route_update(struct net *net, unsigned index, - struct net_device *dev, struct mpls_route *new, + struct mpls_route *new, const struct nl_info *info) { struct mpls_route __rcu **platform_label; - struct mpls_route *rt, *old = NULL; + struct mpls_route *rt; ASSERT_RTNL(); platform_label = rtnl_dereference(net->mpls.platform_label); rt = rtnl_dereference(platform_label[index]); - if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { - rcu_assign_pointer(platform_label[index], new); - old = rt; - } + rcu_assign_pointer(platform_label[index], new); - mpls_notify_route(net, index, old, new, info); + mpls_notify_route(net, index, rt, new, info); /* If we removed a route free it now */ - mpls_rt_free(old); + mpls_rt_free(rt); } static unsigned find_free_label(struct net *net) @@ -350,7 +427,8 @@ static unsigned find_free_label(struct net *net) } #if IS_ENABLED(CONFIG_INET) -static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet_fib_lookup_dev(struct net *net, + const void *addr) { struct net_device *dev; struct rtable *rt; @@ -369,14 +447,16 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) return dev; } #else -static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet_fib_lookup_dev(struct net *net, + const void *addr) { return ERR_PTR(-EAFNOSUPPORT); } #endif #if IS_ENABLED(CONFIG_IPV6) -static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet6_fib_lookup_dev(struct net *net, + const void *addr) { struct net_device *dev; struct dst_entry *dst; @@ -399,47 +479,234 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) return dev; } #else -static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet6_fib_lookup_dev(struct net *net, + const void *addr) { return ERR_PTR(-EAFNOSUPPORT); } #endif static struct net_device *find_outdev(struct net *net, - struct mpls_route_config *cfg) + struct mpls_route *rt, + struct mpls_nh *nh, int oif) { struct net_device *dev = NULL; - if (!cfg->rc_ifindex) { - switch (cfg->rc_via_table) { + if (!oif) { + switch (nh->nh_via_table) { case NEIGH_ARP_TABLE: - dev = inet_fib_lookup_dev(net, cfg->rc_via); + dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); break; case NEIGH_ND_TABLE: - dev = inet6_fib_lookup_dev(net, cfg->rc_via); + dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); break; case NEIGH_LINK_TABLE: break; } } else { - dev = dev_get_by_index(net, cfg->rc_ifindex); + dev = dev_get_by_index(net, oif); } if (!dev) return ERR_PTR(-ENODEV); + /* The caller is holding rtnl anyways, so release the dev reference */ + dev_put(dev); + return dev; } +static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, + struct mpls_nh *nh, int oif) +{ + struct net_device *dev = NULL; + int err = -ENODEV; + + dev = find_outdev(net, rt, nh, oif); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + dev = NULL; + goto errout; + } + + /* Ensure this is a supported device */ + err = -EINVAL; + if (!mpls_dev_get(dev)) + goto errout; + + RCU_INIT_POINTER(nh->nh_dev, dev); + + return 0; + +errout: + return err; +} + +static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, + struct mpls_route *rt) +{ + struct net *net = cfg->rc_nlinfo.nl_net; + struct mpls_nh *nh = rt->rt_nh; + int err; + int i; + + if (!nh) + return -ENOMEM; + + err = -EINVAL; + /* Ensure only a supported number of labels are present */ + if (cfg->rc_output_labels > MAX_NEW_LABELS) + goto errout; + + nh->nh_labels = cfg->rc_output_labels; + for (i = 0; i < nh->nh_labels; i++) + nh->nh_label[i] = cfg->rc_output_label[i]; + + nh->nh_via_table = cfg->rc_via_table; + memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); + nh->nh_via_alen = cfg->rc_via_alen; + + err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); + if (err) + goto errout; + + return 0; + +errout: + return err; +} + +static int mpls_nh_build(struct net *net, struct mpls_route *rt, + struct mpls_nh *nh, int oif, + struct nlattr *via, struct nlattr *newdst) +{ + int err = -ENOMEM; + + if (!nh) + goto errout; + + if (newdst) { + err = nla_get_labels(newdst, MAX_NEW_LABELS, + &nh->nh_labels, nh->nh_label); + if (err) + goto errout; + } + + err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, + __mpls_nh_via(rt, nh)); + if (err) + goto errout; + + err = mpls_nh_assign_dev(net, rt, nh, oif); + if (err) + goto errout; + + return 0; + +errout: + return err; +} + +static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen) +{ + int nhs = 0; + int remaining = len; + + if (!rtnh) { + *max_via_alen = cfg_via_alen; + return 1; + } + + *max_via_alen = 0; + + while (rtnh_ok(rtnh, remaining)) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + int attrlen; + + attrlen = rtnh_attrlen(rtnh); + nla = nla_find(attrs, attrlen, RTA_VIA); + if (nla && nla_len(nla) >= + offsetof(struct rtvia, rtvia_addr)) { + int via_alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + + if (via_alen <= MAX_VIA_ALEN) + *max_via_alen = max_t(u16, *max_via_alen, + via_alen); + } + + nhs++; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; +} + +static int mpls_nh_build_multi(struct mpls_route_config *cfg, + struct mpls_route *rt) +{ + struct rtnexthop *rtnh = cfg->rc_mp; + struct nlattr *nla_via, *nla_newdst; + int remaining = cfg->rc_mp_len; + int nhs = 0; + int err = 0; + + change_nexthops(rt) { + int attrlen; + + nla_via = NULL; + nla_newdst = NULL; + + err = -EINVAL; + if (!rtnh_ok(rtnh, remaining)) + goto errout; + + /* neither weighted multipath nor any flags + * are supported + */ + if (rtnh->rtnh_hops || rtnh->rtnh_flags) + goto errout; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *attrs = rtnh_attrs(rtnh); + + nla_via = nla_find(attrs, attrlen, RTA_VIA); + nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); + } + + if (!nla_via) + goto errout; + + err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, + rtnh->rtnh_ifindex, nla_via, + nla_newdst); + if (err) + goto errout; + + rtnh = rtnh_next(rtnh, &remaining); + nhs++; + } endfor_nexthops(rt); + + rt->rt_nhn = nhs; + + return 0; + +errout: + return err; +} + static int mpls_route_add(struct mpls_route_config *cfg) { struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; - struct net_device *dev = NULL; struct mpls_route *rt, *old; - unsigned index; - int i; int err = -EINVAL; + u8 max_via_alen; + unsigned index; + int nhs; index = cfg->rc_label; @@ -457,27 +724,6 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (index >= net->mpls.platform_labels) goto errout; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; - - dev = find_outdev(net, cfg); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); - dev = NULL; - goto errout; - } - - /* Ensure this is a supported device */ - err = -EINVAL; - if (!mpls_dev_get(dev)) - goto errout; - - err = -EINVAL; - if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && - (dev->addr_len != cfg->rc_via_alen)) - goto errout; - /* Append makes no sense with mpls */ err = -EOPNOTSUPP; if (cfg->rc_nlflags & NLM_F_APPEND) @@ -497,28 +743,34 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) goto errout; + err = -EINVAL; + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen); + if (nhs == 0) + goto errout; + err = -ENOMEM; - rt = mpls_rt_alloc(cfg->rc_via_alen); + rt = mpls_rt_alloc(nhs, max_via_alen); if (!rt) goto errout; - rt->rt_labels = cfg->rc_output_labels; - for (i = 0; i < rt->rt_labels; i++) - rt->rt_label[i] = cfg->rc_output_label[i]; rt->rt_protocol = cfg->rc_protocol; - RCU_INIT_POINTER(rt->rt_dev, dev); rt->rt_payload_type = cfg->rc_payload_type; - rt->rt_via_table = cfg->rc_via_table; - memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); - mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); + if (cfg->rc_mp) + err = mpls_nh_build_multi(cfg, rt); + else + err = mpls_nh_build_from_cfg(cfg, rt); + if (err) + goto freert; + + mpls_route_update(net, index, rt, &cfg->rc_nlinfo); - dev_put(dev); return 0; +freert: + mpls_rt_free(rt); errout: - if (dev) - dev_put(dev); return err; } @@ -538,7 +790,7 @@ static int mpls_route_del(struct mpls_route_config *cfg) if (index >= net->mpls.platform_labels) goto errout; - mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); + mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); err = 0; errout: @@ -635,9 +887,11 @@ static void mpls_ifdown(struct net_device *dev) struct mpls_route *rt = rtnl_dereference(platform_label[index]); if (!rt) continue; - if (rtnl_dereference(rt->rt_dev) != dev) - continue; - rt->rt_dev = NULL; + for_nexthops(rt) { + if (rtnl_dereference(nh->nh_dev) != dev) + continue; + nh->nh_dev = NULL; + } endfor_nexthops(rt); } mdev = mpls_dev_get(dev); @@ -736,7 +990,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u32 *labels, u32 label[]) + u32 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); unsigned nla_labels; @@ -781,6 +1035,48 @@ int nla_get_labels(const struct nlattr *nla, } EXPORT_SYMBOL_GPL(nla_get_labels); +int nla_get_via(const struct nlattr *nla, u8 *via_alen, + u8 *via_table, u8 via_addr[]) +{ + struct rtvia *via = nla_data(nla); + int err = -EINVAL; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) + goto errout; + alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (alen > MAX_VIA_ALEN) + goto errout; + + /* Validate the address family */ + switch (via->rtvia_family) { + case AF_PACKET: + *via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + *via_table = NEIGH_ARP_TABLE; + if (alen != 4) + goto errout; + break; + case AF_INET6: + *via_table = NEIGH_ND_TABLE; + if (alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(via_addr, via->rtvia_addr, alen); + *via_alen = alen; + err = 0; + +errout: + return err; +} + static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, struct mpls_route_config *cfg) { @@ -844,7 +1140,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, break; case RTA_DST: { - u32 label_count; + u8 label_count; if (nla_get_labels(nla, 1, &label_count, &cfg->rc_label)) goto errout; @@ -857,35 +1153,15 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, } case RTA_VIA: { - struct rtvia *via = nla_data(nla); - if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) - goto errout; - cfg->rc_via_alen = nla_len(nla) - - offsetof(struct rtvia, rtvia_addr); - if (cfg->rc_via_alen > MAX_VIA_ALEN) + if (nla_get_via(nla, &cfg->rc_via_alen, + &cfg->rc_via_table, cfg->rc_via)) goto errout; - - /* Validate the address family */ - switch(via->rtvia_family) { - case AF_PACKET: - cfg->rc_via_table = NEIGH_LINK_TABLE; - break; - case AF_INET: - cfg->rc_via_table = NEIGH_ARP_TABLE; - if (cfg->rc_via_alen != 4) - goto errout; - break; - case AF_INET6: - cfg->rc_via_table = NEIGH_ND_TABLE; - if (cfg->rc_via_alen != 16) - goto errout; - break; - default: - /* Unsupported address family */ - goto errout; - } - - memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); + break; + } + case RTA_MULTIPATH: + { + cfg->rc_mp = nla_data(nla); + cfg->rc_mp_len = nla_len(nla); break; } default: @@ -946,16 +1222,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (rt->rt_labels && - nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) - goto nla_put_failure; - if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) - goto nla_put_failure; - dev = rtnl_dereference(rt->rt_dev); - if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) - goto nla_put_failure; if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; + if (rt->rt_nhn == 1) { + const struct mpls_nh *nh = rt->rt_nh; + + if (nh->nh_labels && + nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, + nh->nh_label)) + goto nla_put_failure; + if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), + nh->nh_via_alen)) + goto nla_put_failure; + dev = rtnl_dereference(nh->nh_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) + goto nla_put_failure; + } else { + struct rtnexthop *rtnh; + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + for_nexthops(rt) { + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (!rtnh) + goto nla_put_failure; + + dev = rtnl_dereference(nh->nh_dev); + if (dev) + rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, + nh->nh_labels, + nh->nh_label)) + goto nla_put_failure; + if (nla_put_via(skb, nh->nh_via_table, + mpls_nh_via(rt, nh), + nh->nh_via_alen)) + goto nla_put_failure; + + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + } endfor_nexthops(rt); + + nla_nest_end(skb, mp); + } nlmsg_end(skb, nlh); return 0; @@ -1000,12 +1312,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ + nla_total_size(4); /* RTA_DST */ - if (rt->rt_labels) /* RTA_NEWDST */ - payload += nla_total_size(rt->rt_labels * 4); - if (rt->rt_dev) /* RTA_OIF */ - payload += nla_total_size(4); + + if (rt->rt_nhn == 1) { + struct mpls_nh *nh = rt->rt_nh; + + if (nh->nh_dev) + payload += nla_total_size(4); /* RTA_OIF */ + payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */ + if (nh->nh_labels) /* RTA_NEWDST */ + payload += nla_total_size(nh->nh_labels * 4); + } else { + /* each nexthop is packed in an attribute */ + size_t nhsize = 0; + + for_nexthops(rt) { + nhsize += nla_total_size(sizeof(struct rtnexthop)); + nhsize += nla_total_size(2 + nh->nh_via_alen); + if (nh->nh_labels) + nhsize += nla_total_size(nh->nh_labels * 4); + } endfor_nexthops(rt); + /* nested attribute */ + payload += nla_total_size(nhsize); + } + return payload; } @@ -1057,25 +1387,29 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(lo->addr_len); + rt0 = mpls_rt_alloc(1, lo->addr_len); if (!rt0) goto nort0; - RCU_INIT_POINTER(rt0->rt_dev, lo); + RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; - rt0->rt_via_table = NEIGH_LINK_TABLE; - memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); + rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; + rt0->rt_nh->nh_via_alen = lo->addr_len; + memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, + lo->addr_len); } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(lo->addr_len); + rt2 = mpls_rt_alloc(1, lo->addr_len); if (!rt2) goto nort2; - RCU_INIT_POINTER(rt2->rt_dev, lo); + RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; - rt2->rt_via_table = NEIGH_LINK_TABLE; - memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); + rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; + rt2->rt_nh->nh_via_alen = lo->addr_len; + memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, + lo->addr_len); } rtnl_lock(); @@ -1085,7 +1419,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* Free any labels beyond the new table */ for (index = limit; index < old_limit; index++) - mpls_route_update(net, index, NULL, NULL, NULL); + mpls_route_update(net, index, NULL, NULL); /* Copy over the old labels */ cp_size = size; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 2681a4ba6c37..bde52ce88c94 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -21,6 +21,76 @@ struct mpls_dev { struct sk_buff; +#define LABEL_NOT_SPECIFIED (1 << 20) +#define MAX_NEW_LABELS 2 + +/* This maximum ha length copied from the definition of struct neighbour */ +#define VIA_ALEN_ALIGN sizeof(unsigned long) +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN)) + +enum mpls_payload_type { + MPT_UNSPEC, /* IPv4 or IPv6 */ + MPT_IPV4 = 4, + MPT_IPV6 = 6, + + /* Other types not implemented: + * - Pseudo-wire with or without control word (RFC4385) + * - GAL (RFC5586) + */ +}; + +struct mpls_nh { /* next hop label forwarding entry */ + struct net_device __rcu *nh_dev; + u32 nh_label[MAX_NEW_LABELS]; + u8 nh_labels; + u8 nh_via_alen; + u8 nh_via_table; +}; + +/* The route, nexthops and vias are stored together in the same memory + * block: + * + * +----------------------+ + * | mpls_route | + * +----------------------+ + * | mpls_nh 0 | + * +----------------------+ + * | ... | + * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ + * | alignment padding | + * +----------------------+ + * | via[rt_max_alen] 0 | + * +----------------------+ + * | ... | + * +----------------------+ + * | via[rt_max_alen] n-1 | + * +----------------------+ + */ +struct mpls_route { /* next hop label forwarding entry */ + struct rcu_head rt_rcu; + u8 rt_protocol; + u8 rt_payload_type; + u8 rt_max_alen; + unsigned int rt_nhn; + struct mpls_nh rt_nh[0]; +}; + +#define for_nexthops(rt) { \ + int nhsel; struct mpls_nh *nh; \ + for (nhsel = 0, nh = (rt)->rt_nh; \ + nhsel < (rt)->rt_nhn; \ + nh++, nhsel++) + +#define change_nexthops(rt) { \ + int nhsel; struct mpls_nh *nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + nhsel < (rt)->rt_nhn; \ + nh++, nhsel++) + +#define endfor_nexthops(rt) } + static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); @@ -52,8 +122,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, +int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, u32 label[]); +int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, + u8 via[]); bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 09e661c3ae58..f39276d1c2d7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) #endif synchronize_net(); nf_queue_nf_hook_drop(net, &entry->ops); + /* other cpu might still process nfqueue verdict that used reg */ + synchronize_net(); kfree(entry); } EXPORT_SYMBOL(nf_unregister_net_hook); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a1fe5377a2b3..5a30ce6e8c90 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_expired(ext_timeout(n, set)))) n = NULL; - e = kzalloc(set->dsize, GFP_KERNEL); + e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) return -ENOMEM; e->id = d->id; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a49a8c7c564..fafe33bdb619 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, int pos, idx, shift; err = 0; - netlink_table_grab(); + netlink_lock_table(); for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { if (len - pos < sizeof(u32)) break; @@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, } if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) err = -EFAULT; - netlink_table_ungrab(); + netlink_unlock_table(); break; } case NETLINK_CAP_ACK: diff --git a/net/nfc/core.c b/net/nfc/core.c index cff3f1614ad4..1fe3d3b362c0 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -449,7 +449,7 @@ error: * @dev: The nfc device that found the target * @target_idx: index of the target that must be deactivated */ -int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx) +int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) { int rc = 0; @@ -476,7 +476,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx) if (dev->ops->check_presence) del_timer_sync(&dev->check_pres_timer); - dev->ops->deactivate_target(dev, dev->active_target); + dev->ops->deactivate_target(dev, dev->active_target, mode); dev->active_target = NULL; error: diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 009bcf317101..23c2a118ac9f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -631,7 +631,8 @@ static int digital_activate_target(struct nfc_dev *nfc_dev, } static void digital_deactivate_target(struct nfc_dev *nfc_dev, - struct nfc_target *target) + struct nfc_target *target, + u8 mode) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6e061da2258a..2b0f0ac498d2 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -678,7 +678,8 @@ static int hci_activate_target(struct nfc_dev *nfc_dev, } static void hci_deactivate_target(struct nfc_dev *nfc_dev, - struct nfc_target *target) + struct nfc_target *target, + u8 mode) { } diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 1b90c0531852..1399a03fa6e6 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -144,11 +144,13 @@ inline int nfc_llc_start(struct nfc_llc *llc) { return llc->ops->start(llc); } +EXPORT_SYMBOL(nfc_llc_start); inline int nfc_llc_stop(struct nfc_llc *llc) { return llc->ops->stop(llc); } +EXPORT_SYMBOL(nfc_llc_stop); inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) { diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 901c1ddba841..85d4819ab657 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -12,7 +12,7 @@ config NFC_NCI config NFC_NCI_SPI depends on NFC_NCI && SPI select CRC_CCITT - bool "NCI over SPI protocol support" + tristate "NCI over SPI protocol support" default n help NCI (NFC Controller Interface) is a communication protocol between diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index b4b85b82e988..0ca31d9bf741 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_NFC_NCI) += nci.o nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o -nci-$(CONFIG_NFC_NCI_SPI) += spi.o +nci_spi-y += spi.o +obj-$(CONFIG_NFC_NCI_SPI) += nci_spi.o nci_uart-y += uart.o obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 943889b87a34..10c99a578421 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -64,6 +64,19 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, return NULL; } +int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id) +{ + struct nci_conn_info *conn_info; + + list_for_each_entry(conn_info, &ndev->conn_info_list, list) { + if (conn_info->id == id) + return conn_info->conn_id; + } + + return -EINVAL; +} +EXPORT_SYMBOL(nci_get_conn_info_by_id); + /* ---- NCI requests ---- */ void nci_req_complete(struct nci_dev *ndev, int result) @@ -325,32 +338,46 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) sizeof(struct nci_rf_deactivate_cmd), &cmd); } -struct nci_prop_cmd_param { +struct nci_cmd_param { __u16 opcode; size_t len; __u8 *payload; }; -static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt) +static void nci_generic_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt; + struct nci_cmd_param *param = + (struct nci_cmd_param *)opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) { - struct nci_prop_cmd_param param; + struct nci_cmd_param param; param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid); param.len = len; param.payload = payload; - return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); +int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload) +{ + struct nci_cmd_param param; + + param.opcode = opcode; + param.len = len; + param.payload = payload; + + return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_core_cmd); + int nci_core_reset(struct nci_dev *ndev) { return __nci_request(ndev, nci_reset_req, 0, @@ -402,9 +429,8 @@ static int nci_open_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_INIT_TIMEOUT)); } - if (ndev->ops->post_setup) { + if (!rc && ndev->ops->post_setup) rc = ndev->ops->post_setup(ndev); - } if (!rc) { rc = __nci_request(ndev, nci_init_complete_req, 0, @@ -540,7 +566,7 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { - return nci_request(ndev, nci_nfcee_discover_req, action, + return __nci_request(ndev, nci_nfcee_discover_req, action, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); @@ -561,8 +587,9 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; - return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd, - msecs_to_jiffies(NCI_CMD_TIMEOUT)); + return __nci_request(ndev, nci_nfcee_mode_set_req, + (unsigned long)&cmd, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); @@ -588,12 +615,19 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, if (!cmd) return -ENOMEM; + if (!number_destination_params) + return -EINVAL; + cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; memcpy(cmd->params, params, params_len); data.cmd = cmd; - ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; + + if (params->length > 0) + ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; + else + ndev->cur_id = 0; r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data, @@ -612,8 +646,8 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { - return nci_request(ndev, nci_core_conn_close_req, conn_id, - msecs_to_jiffies(NCI_CMD_TIMEOUT)); + return __nci_request(ndev, nci_core_conn_close_req, conn_id, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); @@ -801,9 +835,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, } static void nci_deactivate_target(struct nfc_dev *nfc_dev, - struct nfc_target *target) + struct nfc_target *target, + __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; pr_debug("entry\n"); @@ -814,9 +850,14 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, ndev->target_active_prot = 0; + switch (mode) { + case NFC_TARGET_MODE_SLEEP: + nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE; + break; + } + if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { - nci_request(ndev, nci_rf_deactivate_req, - NCI_DEACTIVATE_TYPE_IDLE_MODE, + nci_request(ndev, nci_rf_deactivate_req, nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } @@ -850,7 +891,7 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) pr_debug("entry\n"); if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { - nci_deactivate_target(nfc_dev, NULL); + nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { @@ -1177,7 +1218,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) } EXPORT_SYMBOL(nci_recv_frame); -static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) +int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { pr_debug("len %d\n", skb->len); @@ -1195,6 +1236,7 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) return ndev->ops->send(ndev, skb); } +EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) @@ -1226,48 +1268,80 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) return 0; } +EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ -static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev, - __u16 opcode) +static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, + size_t n_ops, + __u16 opcode) { size_t i; - struct nci_prop_ops *prop_op; + struct nci_driver_ops *op; - if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops) + if (!ops || !n_ops) return NULL; - for (i = 0; i < ndev->ops->n_prop_ops; i++) { - prop_op = &ndev->ops->prop_ops[i]; - if (prop_op->opcode == opcode) - return prop_op; + for (i = 0; i < n_ops; i++) { + op = &ops[i]; + if (op->opcode == opcode) + return op; } return NULL; } -int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, - struct sk_buff *skb) +static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, + struct sk_buff *skb, struct nci_driver_ops *ops, + size_t n_ops) { - struct nci_prop_ops *prop_op; + struct nci_driver_ops *op; - prop_op = prop_cmd_lookup(ndev, rsp_opcode); - if (!prop_op || !prop_op->rsp) + op = ops_cmd_lookup(ops, n_ops, rsp_opcode); + if (!op || !op->rsp) return -ENOTSUPP; - return prop_op->rsp(ndev, skb); + return op->rsp(ndev, skb); } -int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, - struct sk_buff *skb) +static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, + struct sk_buff *skb, struct nci_driver_ops *ops, + size_t n_ops) { - struct nci_prop_ops *prop_op; + struct nci_driver_ops *op; - prop_op = prop_cmd_lookup(ndev, ntf_opcode); - if (!prop_op || !prop_op->ntf) + op = ops_cmd_lookup(ops, n_ops, ntf_opcode); + if (!op || !op->ntf) return -ENOTSUPP; - return prop_op->ntf(ndev, skb); + return op->ntf(ndev, skb); +} + +int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb) +{ + return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops, + ndev->ops->n_prop_ops); +} + +int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb) +{ + return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops, + ndev->ops->n_prop_ops); +} + +int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb) +{ + return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops, + ndev->ops->n_core_ops); +} + +int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb) +{ + return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops, + ndev->ops->n_core_ops); } /* ---- NCI TX Data worker thread ---- */ diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 566466d90048..dbd24254412a 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -90,6 +90,18 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, nci_pbf_set((__u8 *)hdr, pbf); } +int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id) +{ + struct nci_conn_info *conn_info; + + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) + return -EPROTO; + + return conn_info->max_pkt_payload_len; +} +EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size); + static int nci_queue_tx_data_frags(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { @@ -203,6 +215,7 @@ free_exit: exit: return rc; } +EXPORT_SYMBOL(nci_send_data); /* ----------------- NCI RX Data ----------------- */ diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 609f92283d1b..2aedac15cb59 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -70,6 +70,7 @@ struct nci_hcp_packet { #define NCI_HCI_ANY_SET_PARAMETER 0x01 #define NCI_HCI_ANY_GET_PARAMETER 0x02 #define NCI_HCI_ANY_CLOSE_PIPE 0x04 +#define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14 #define NCI_HFP_NO_CHAINING 0x80 @@ -78,6 +79,8 @@ struct nci_hcp_packet { #define NCI_EVT_HOT_PLUG 0x03 #define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 +#define NCI_HCI_ADM_CREATE_PIPE 0x10 +#define NCI_HCI_ADM_DELETE_PIPE 0x11 /* HCP headers */ #define NCI_HCI_HCP_PACKET_HEADER_LEN 1 @@ -101,6 +104,20 @@ struct nci_hcp_packet { #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) +static int nci_hci_result_to_errno(u8 result) +{ + switch (result) { + case NCI_HCI_ANY_OK: + return 0; + case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: + return -EOPNOTSUPP; + case NCI_HCI_ANY_E_TIMEOUT: + return -ETIME; + default: + return -1; + } +} + /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { @@ -146,18 +163,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (!conn_info) return -EPROTO; - skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + i = 0; + skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *skb_push(skb, 1) = data_type; - i = 0; - len = conn_info->max_pkt_payload_len; - do { + len = conn_info->max_pkt_payload_len; + /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { @@ -177,9 +194,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return r; i += len; + if (i < data_len) { - skb_trim(skb, 0); - skb_pull(skb, len); + skb = nci_skb_alloc(ndev, + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); @@ -212,7 +235,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_conn_info *conn_info; + struct nci_hcp_message *message; + struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -232,14 +256,34 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - - if (r == NCI_STATUS_OK && skb) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } EXPORT_SYMBOL(nci_hci_send_cmd); +int nci_hci_clear_all_pipes(struct nci_dev *ndev) +{ + int r; + + r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL); + if (r < 0) + return r; + + nci_hci_reset_pipes(ndev->hci_dev); + return r; +} +EXPORT_SYMBOL(nci_hci_clear_all_pipes); + static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, u8 event, struct sk_buff *skb) { @@ -328,9 +372,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct nci_conn_info *conn_info; u8 status = result; - if (result != NCI_HCI_ANY_OK) - goto exit; - conn_info = ndev->hci_dev->conn_info; if (!conn_info) { status = NCI_STATUS_REJECTED; @@ -340,7 +381,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, conn_info->rx_skb = skb; exit: - nci_req_complete(ndev, status); + nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. @@ -366,7 +407,7 @@ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, break; } - nci_req_complete(ndev, 0); + nci_req_complete(ndev, NCI_STATUS_OK); } static void nci_hci_msg_rx_work(struct work_struct *work) @@ -378,7 +419,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { - pipe = skb->data[0]; + pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); @@ -395,7 +436,7 @@ void nci_hci_data_received_cb(void *context, { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; - u8 pipe, type, instruction; + u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; @@ -415,7 +456,7 @@ void nci_hci_data_received_cb(void *context, /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { - pipe = packet->header & NCI_HCI_FRAGMENT; + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; @@ -434,7 +475,7 @@ void nci_hci_data_received_cb(void *context, *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } @@ -452,11 +493,10 @@ void nci_hci_data_received_cb(void *context, packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + - NCI_HCI_HCP_MESSAGE_HEADER_LEN); - nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, + NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); @@ -485,9 +525,47 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) } EXPORT_SYMBOL(nci_hci_open_pipe); +static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, + u8 dest_gate, int *result) +{ + u8 pipe; + struct sk_buff *skb; + struct nci_hci_create_pipe_params params; + struct nci_hci_create_pipe_resp *resp; + + pr_debug("gate=%d\n", dest_gate); + + params.src_gate = NCI_HCI_ADMIN_GATE; + params.dest_host = dest_host; + params.dest_gate = dest_gate; + + *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADM_CREATE_PIPE, + (u8 *)¶ms, sizeof(params), &skb); + if (*result < 0) + return NCI_HCI_INVALID_PIPE; + + resp = (struct nci_hci_create_pipe_resp *)skb->data; + pipe = resp->pipe; + kfree_skb(skb); + + pr_debug("pipe created=%d\n", pipe); + + return pipe; +} + +static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) +{ + pr_debug("\n"); + + return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); +} + int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -520,6 +598,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + } kfree(tmp); return r; @@ -529,6 +613,7 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -553,8 +638,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } @@ -563,6 +655,7 @@ EXPORT_SYMBOL(nci_hci_get_param); int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, u8 pipe) { + bool pipe_created = false; int r; if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) @@ -581,12 +674,26 @@ int nci_hci_connect_gate(struct nci_dev *ndev, case NCI_HCI_ADMIN_GATE: pipe = NCI_HCI_ADMIN_PIPE; break; + default: + pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); + if (pipe < 0) + return r; + pipe_created = true; + break; } open_pipe: r = nci_hci_open_pipe(ndev, pipe); - if (r < 0) + if (r < 0) { + if (pipe_created) { + if (nci_hci_delete_pipe(ndev, pipe) < 0) { + /* TODO: Cannot clean by deleting pipe... + * -> inconsistent state + */ + } + } return r; + } ndev->hci_dev->pipes[pipe].gate = dest_gate; ndev->hci_dev->pipes[pipe].host = dest_host; @@ -653,6 +760,10 @@ int nci_hci_dev_session_init(struct nci_dev *ndev) /* Restore gate<->pipe table from some proprietary location. */ r = ndev->ops->hci_load_session(ndev); } else { + r = nci_hci_clear_all_pipes(ndev); + if (r < 0) + goto exit; + r = nci_hci_dev_connect_gates(ndev, ndev->hci_dev->init_data.gate_count, ndev->hci_dev->init_data.gates); diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 5d1c2e391c56..2ada2b39e355 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -759,7 +759,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) skb_pull(skb, NCI_CTRL_HDR_SIZE); if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) { - if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) { + if (nci_prop_ntf_packet(ndev, ntf_opcode, skb) == -ENOTSUPP) { pr_err("unsupported ntf opcode 0x%x\n", ntf_opcode); } @@ -805,6 +805,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) break; } + nci_core_ntf_packet(ndev, ntf_opcode, skb); end: kfree_skb(skb); } diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 408bd8f857ab..9b6eb913d801 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -355,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) break; } + nci_core_rsp_packet(ndev, rsp_opcode, skb); end: kfree_skb(skb); diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index ec250e77763a..d904cd2f1442 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -18,6 +18,8 @@ #define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__ +#include <linux/module.h> + #include <linux/export.h> #include <linux/spi/spi.h> #include <linux/crc-ccitt.h> @@ -56,6 +58,7 @@ static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb, } t.cs_change = cs_change; t.delay_usecs = nspi->xfer_udelay; + t.speed_hz = nspi->xfer_speed_hz; spi_message_init(&m); spi_message_add_tail(&t, &m); @@ -142,7 +145,8 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi, nspi->acknowledge_mode = acknowledge_mode; nspi->xfer_udelay = delay; - + /* Use controller max SPI speed by default */ + nspi->xfer_speed_hz = 0; nspi->spi = spi; nspi->ndev = ndev; init_completion(&nspi->req_completion); @@ -195,12 +199,14 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi) tx.tx_buf = req; tx.len = 2; tx.cs_change = 0; + tx.speed_hz = nspi->xfer_speed_hz; spi_message_add_tail(&tx, &m); memset(&rx, 0, sizeof(struct spi_transfer)); rx.rx_buf = resp_hdr; rx.len = 2; rx.cs_change = 1; + rx.speed_hz = nspi->xfer_speed_hz; spi_message_add_tail(&rx, &m); ret = spi_sync(nspi->spi, &m); @@ -224,6 +230,7 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi) rx.len = rx_len; rx.cs_change = 0; rx.delay_usecs = nspi->xfer_udelay; + rx.speed_hz = nspi->xfer_speed_hz; spi_message_add_tail(&rx, &m); ret = spi_sync(nspi->spi, &m); @@ -320,3 +327,5 @@ done: return skb; } EXPORT_SYMBOL_GPL(nci_spi_read); + +MODULE_LICENSE("GPL"); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 853172c27f68..f58c1fba1026 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -885,7 +885,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); - nfc_deactivate_target(dev, target_idx); + nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); rc = nfc_activate_target(dev, target_idx, protocol); nfc_put_device(dev); @@ -1109,10 +1109,8 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); - if (!dev) { - rc = -ENODEV; - goto exit; - } + if (!dev) + return -ENODEV; device_lock(&dev->dev); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 5c93e8412a26..c20b784ad720 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -25,6 +25,9 @@ #include <net/nfc/nfc.h> #include <net/sock.h> +#define NFC_TARGET_MODE_IDLE 0 +#define NFC_TARGET_MODE_SLEEP 1 + struct nfc_protocol { int id; struct proto *proto; @@ -147,7 +150,7 @@ int nfc_dep_link_down(struct nfc_dev *dev); int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol); -int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); +int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode); int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index e9a91488fe3d..e386e6c90b17 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -321,7 +321,8 @@ static void rawsock_destruct(struct sock *sk) if (sk->sk_state == TCP_ESTABLISHED) { nfc_deactivate_target(nfc_rawsock(sk)->dev, - nfc_rawsock(sk)->target_idx); + nfc_rawsock(sk)->target_idx, + NFC_TARGET_MODE_IDLE); nfc_put_device(nfc_rawsock(sk)->dev); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c6087233d7fc..c88d0f2d3e01 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -769,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, const struct nlattr *actions, int actions_len) { - struct ip_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; @@ -797,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, if (vport) { int err; - upcall.egress_tun_info = &info; - err = ovs_vport_get_egress_tun_info(vport, skb, - &upcall); - if (err) - upcall.egress_tun_info = NULL; + err = dev_fill_metadata_dst(vport->dev, skb); + if (!err) + upcall.egress_tun_info = skb_tunnel_info(skb); } break; @@ -1113,8 +1110,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, nla_data(a)); /* Hide stolen IP fragments from user space. */ - if (err == -EINPROGRESS) - return 0; + if (err) + return err == -EINPROGRESS ? 0 : err; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 9ed833e9bb7d..c2cc11168fd5 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (ct) { state = ovs_ct_get_state(ctinfo); + if (!nf_ct_is_confirmed(ct)) + state |= OVS_CS_F_NEW; if (ct->master) state |= OVS_CS_F_RELATED; zone = nf_ct_zone(ct); @@ -222,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, struct nf_conn *ct; int err; - if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) - return -ENOTSUPP; - /* The connection could be invalid, in which case set_label is no-op.*/ ct = nf_ct_get(skb, &ctinfo); if (!ct) @@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) return helper->help(skb, protoff, ct, ctinfo); } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { @@ -309,8 +311,8 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, return err; ovs_cb.mru = IPCB(skb)->frag_max_size; - } else if (key->eth.type == htons(ETH_P_IPV6)) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; struct sk_buff *reasm; @@ -319,17 +321,25 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) + if (skb == reasm) { + kfree_skb(skb); return -EINVAL; + } + + /* Don't free 'skb' even though it is one of the original + * fragments, as we're going to morph it into the head. + */ + skb_get(skb); + nf_ct_frag6_consume_orig(reasm); key->ip.proto = ipv6_hdr(reasm)->nexthdr; skb_morph(skb, reasm); + skb->next = reasm->next; consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; -#else - return -EPFNOSUPPORT; #endif } else { + kfree_skb(skb); return -EPFNOSUPPORT; } @@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, return true; } -static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, +static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb) { @@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, } } + ovs_ct_update_key(skb, key, true); + return 0; } @@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = __ovs_ct_lookup(net, key, info, skb); if (err) return err; - - ovs_ct_update_key(skb, key, true); } return 0; @@ -460,8 +470,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (nf_conntrack_confirm(skb) != NF_ACCEPT) return -EINVAL; - ovs_ct_update_key(skb, key, true); - return 0; } @@ -476,6 +484,9 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels) return false; } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) @@ -511,6 +522,8 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, &info->labels.mask); err: skb_push(skb, nh_ofs); + if (err) + kfree_skb(skb); return err; } @@ -587,6 +600,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_MARK: { struct md_mark *mark = nla_data(a); + if (!mark->mask) { + OVS_NLERR(log, "ct_mark mask cannot be 0"); + return -EINVAL; + } info->mark = *mark; break; } @@ -595,6 +612,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_LABELS: { struct md_labels *labels = nla_data(a); + if (!labels_nonzero(&labels->mask)) { + OVS_NLERR(log, "ct_labels mask cannot be 0"); + return -EINVAL; + } info->labels = *labels; break; } @@ -705,11 +726,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) return -EMSGSIZE; - if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), &ct_info->mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&ct_info->labels.mask) && nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), &ct_info->labels)) return -EMSGSIZE; diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index da8714942c95..a7544f405c16 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); -static inline bool ovs_ct_state_supported(u32 state) -{ - return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | - OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | - OVS_CS_F_INVALID | OVS_CS_F_TRACKED)); -} +#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ + OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ + OVS_CS_F_INVALID | OVS_CS_F_TRACKED) #else #include <linux/errno.h> @@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } -static inline bool ovs_ct_state_supported(u32 state) -{ - return false; -} - static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, const struct sw_flow_key *key, struct sw_flow_actions **acts, bool log) @@ -75,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) { + kfree_skb(skb); return -ENOTSUPP; } @@ -94,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key, } static inline void ovs_ct_free_action(const struct nlattr *a) { } + +#define CT_SUPPORTED_MASK 0 #endif /* CONFIG_NF_CONNTRACK */ #endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a75828091e21..5633172b791a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -489,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); - err = ovs_nla_put_egress_tunnel_key(user_skb, - upcall_info->egress_tun_info, - upcall_info->egress_tun_opts); + err = ovs_nla_put_tunnel_info(user_skb, + upcall_info->egress_tun_info); BUG_ON(err); nla_nest_end(user_skb, nla); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index f88038a99f44..67bdecd9fdc1 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -117,7 +117,6 @@ struct ovs_skb_cb { */ struct dp_upcall_info { struct ip_tunnel_info *egress_tun_info; - const void *egress_tun_opts; const struct nlattr *userdata; const struct nlattr *actions; int actions_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 80e1f09397c0..907d6fd28ede 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -764,7 +764,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts) { + if (swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_GENEVE_OPT && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) @@ -798,14 +798,13 @@ static int ip_tun_to_nlattr(struct sk_buff *skb, return 0; } -int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, - const struct ip_tunnel_info *egress_tun_info, - const void *egress_tun_opts) +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) { - return __ip_tun_to_nlattr(skb, &egress_tun_info->key, - egress_tun_opts, - egress_tun_info->options_len, - ip_tunnel_info_af(egress_tun_info)); + return __ip_tun_to_nlattr(skb, &tun_info->key, + ip_tunnel_info_opts(tun_info), + tun_info->options_len, + ip_tunnel_info_af(tun_info)); } static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, @@ -866,7 +865,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); - if (!is_mask && !ovs_ct_state_supported(ct_state)) { + if (ct_state & ~CT_SUPPORTED_MASK) { OVS_NLERR(log, "ct_state flags %08x unsupported", ct_state); return -EINVAL; @@ -1149,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val, } else { memset(nla_data(nla), val, nla_len(nla)); } + + if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) + *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; } } @@ -2432,11 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) if (!start) return -EMSGSIZE; - err = ip_tun_to_nlattr(skb, &tun_info->key, - tun_info->options_len ? - ip_tunnel_info_opts(tun_info) : NULL, - tun_info->options_len, - ip_tunnel_info_af(tun_info)); + err = ovs_nla_put_tunnel_info(skb, tun_info); if (err) return err; nla_nest_end(skb, start); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 6ca3f0baf449..47dd142eca1c 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_get_match(struct net *, struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); -int ovs_nla_put_egress_tunnel_key(struct sk_buff *, - const struct ip_tunnel_info *, - const void *egress_tun_opts); + +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info); bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 7a568ca8da54..efb736bb6855 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport, return 0; } -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dport = htons(geneve_port->port_no); - __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_UDP, sport, dport); -} - static struct vport *geneve_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = { .get_options = geneve_get_options, .send = dev_queue_xmit, .owner = THIS_MODULE, - .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index cdb758ab01cf..c3257d78d3d2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_GRE, 0, 0); -} - static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, .send = dev_queue_xmit, - .get_egress_tun_info = gre_get_egress_tun_info, .destroy = ovs_netdev_tunnel_destroy, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 7f0a8bd08857..ec76398a792f 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev) free_netdev(dev); } +static struct rtnl_link_stats64 * +internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + stats->rx_errors = dev->stats.rx_errors; + stats->tx_errors = dev->stats.tx_errors; + stats->tx_dropped = dev->stats.tx_dropped; + stats->rx_dropped = dev->stats.rx_dropped; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; + unsigned int start; + + percpu_stats = per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); + local_stats = *percpu_stats; + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + + stats->rx_bytes += local_stats.rx_bytes; + stats->rx_packets += local_stats.rx_packets; + stats->tx_bytes += local_stats.tx_bytes; + stats->tx_packets += local_stats.tx_packets; + } + + return stats; +} + static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, + .ndo_get_stats64 = internal_get_stats, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_vport; } + vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!vport->dev->tstats) { + err = -ENOMEM; + goto error_free_netdev; + } dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(vport->dev); @@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); if (err) - goto error_free_netdev; + goto error_unlock; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) return vport; -error_free_netdev: +error_unlock: rtnl_unlock(); + free_percpu(vport->dev->tstats); +error_free_netdev: free_netdev(vport->dev); error_free_vport: ovs_vport_free(vport); @@ -198,7 +238,7 @@ static void internal_dev_destroy(struct vport *vport) /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(vport->dev); - + free_percpu(vport->dev->tstats); rtnl_unlock(); } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 6f700710d413..1605691d9414 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -146,32 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct vxlan_dev *vxlan = netdev_priv(vport->dev); - struct net *net = ovs_dp_get_net(vport->dp); - unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info); - __be16 dst_port = vxlan_dev_dst_port(vxlan, family); - __be16 src_port; - int port_min; - int port_max; - - inet_get_local_port_range(net, &port_min, &port_max); - src_port = udp_flow_src_port(net, skb, 0, 0, true); - - return ovs_tunnel_get_egress_info(upcall, net, - skb, IPPROTO_UDP, - src_port, dst_port); -} - static struct vport_ops ovs_vxlan_netdev_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, .create = vxlan_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = vxlan_get_options, .send = dev_queue_xmit, - .get_egress_tun_info = vxlan_get_egress_tun_info, }; static int __init ovs_vxlan_tnl_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ef19d0b77d13..0ac0fd004d7e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,64 +480,6 @@ void ovs_vport_deferred_free(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *skb, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst) -{ - struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; - const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); - const struct ip_tunnel_key *tun_key; - u32 skb_mark = skb->mark; - struct rtable *rt; - struct flowi4 fl; - - if (unlikely(!tun_info)) - return -EINVAL; - if (ip_tunnel_info_af(tun_info) != AF_INET) - return -EINVAL; - - tun_key = &tun_info->key; - - /* Route lookup to get srouce IP address. - * The process may need to be changed if the corresponding process - * in vports ops changed. - */ - rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); - if (IS_ERR(rt)) - return PTR_ERR(rt); - - ip_rt_put(rt); - - /* Generate egress_tun_info based on tun_info, - * saddr, tp_src and tp_dst - */ - ip_tunnel_key_init(&egress_tun_info->key, - fl.saddr, tun_key->u.ipv4.dst, - tun_key->tos, - tun_key->ttl, - tp_src, tp_dst, - tun_key->tun_id, - tun_key->tun_flags); - egress_tun_info->options_len = tun_info->options_len; - egress_tun_info->mode = tun_info->mode; - upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info); - return 0; -} -EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); - -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - /* get_egress_tun_info() is only implemented on tunnel ports. */ - if (unlikely(!vport->ops->get_egress_tun_info)) - return -EINVAL; - - return vport->ops->get_egress_tun_info(vport, skb, upcall); -} - static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 885607f28d56..bdfd82a7c064 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -27,7 +27,6 @@ #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/u64_stats_sync.h> -#include <net/route.h> #include "datapath.h" @@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids); int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst); - -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall); - /** * struct vport_portids - array of netlink portids of a vport. * must be protected by rcu. @@ -140,8 +129,6 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. - * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for - * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -154,9 +141,6 @@ struct vport_ops { int (*get_options)(const struct vport *, struct sk_buff *); netdev_tx_t (*send) (struct sk_buff *skb); - int (*get_egress_tun_info)(struct vport *, struct sk_buff *, - struct dp_upcall_info *upcall); - struct module *owner; struct list_head list; }; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 384ea1e3cd69..b5476aebd68d 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -573,6 +573,7 @@ static void rds_exit(void) rds_threads_exit(); rds_stats_exit(); rds_page_exit(); + rds_bind_lock_destroy(); rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); } @@ -582,11 +583,14 @@ static int rds_init(void) { int ret; - rds_bind_lock_init(); + ret = rds_bind_lock_init(); + if (ret) + goto out; ret = rds_conn_init(); if (ret) - goto out; + goto out_bind; + ret = rds_threads_init(); if (ret) goto out_conn; @@ -620,6 +624,8 @@ out_conn: rds_conn_exit(); rds_cong_exit(); rds_page_exit(); +out_bind: + rds_bind_lock_destroy(); out: return ret; } diff --git a/net/rds/bind.c b/net/rds/bind.c index 61925667b7a4..b22ea956522b 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -38,54 +38,17 @@ #include <linux/ratelimit.h> #include "rds.h" -struct bind_bucket { - rwlock_t lock; - struct hlist_head head; +static struct rhashtable bind_hash_table; + +static struct rhashtable_params ht_parms = { + .nelem_hint = 768, + .key_len = sizeof(u64), + .key_offset = offsetof(struct rds_sock, rs_bound_key), + .head_offset = offsetof(struct rds_sock, rs_bound_node), + .max_size = 16384, + .min_size = 1024, }; -#define BIND_HASH_SIZE 1024 -static struct bind_bucket bind_hash_table[BIND_HASH_SIZE]; - -static struct bind_bucket *hash_to_bucket(__be32 addr, __be16 port) -{ - return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) & - (BIND_HASH_SIZE - 1)); -} - -/* must hold either read or write lock (write lock for insert != NULL) */ -static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket, - __be32 addr, __be16 port, - struct rds_sock *insert) -{ - struct rds_sock *rs; - struct hlist_head *head = &bucket->head; - u64 cmp; - u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); - - hlist_for_each_entry(rs, head, rs_bound_node) { - cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | - be16_to_cpu(rs->rs_bound_port); - - if (cmp == needle) { - rds_sock_addref(rs); - return rs; - } - } - - if (insert) { - /* - * make sure our addr and port are set before - * we are added to the list. - */ - insert->rs_bound_addr = addr; - insert->rs_bound_port = port; - rds_sock_addref(insert); - - hlist_add_head(&insert->rs_bound_node, head); - } - return NULL; -} - /* * Return the rds_sock bound at the given local address. * @@ -94,18 +57,14 @@ static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket, */ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) { + u64 key = ((u64)addr << 32) | port; struct rds_sock *rs; - unsigned long flags; - struct bind_bucket *bucket = hash_to_bucket(addr, port); - - read_lock_irqsave(&bucket->lock, flags); - rs = rds_bind_lookup(bucket, addr, port, NULL); - read_unlock_irqrestore(&bucket->lock, flags); - if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) { - rds_sock_put(rs); + rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms); + if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) + rds_sock_addref(rs); + else rs = NULL; - } rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, ntohs(port)); @@ -116,10 +75,9 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) /* returns -ve errno or +ve port */ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) { - unsigned long flags; int ret = -EADDRINUSE; u16 rover, last; - struct bind_bucket *bucket; + u64 key; if (*port != 0) { rover = be16_to_cpu(*port); @@ -130,22 +88,29 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) } do { - struct rds_sock *rrs; if (rover == 0) rover++; - bucket = hash_to_bucket(addr, cpu_to_be16(rover)); - write_lock_irqsave(&bucket->lock, flags); - rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs); - write_unlock_irqrestore(&bucket->lock, flags); - if (!rrs) { + key = ((u64)addr << 32) | cpu_to_be16(rover); + if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms)) + continue; + + rs->rs_bound_key = key; + rs->rs_bound_addr = addr; + rs->rs_bound_port = cpu_to_be16(rover); + rs->rs_bound_node.next = NULL; + rds_sock_addref(rs); + if (!rhashtable_insert_fast(&bind_hash_table, + &rs->rs_bound_node, ht_parms)) { *port = rs->rs_bound_port; ret = 0; rdsdebug("rs %p binding to %pI4:%d\n", rs, &addr, (int)ntohs(*port)); break; } else { - rds_sock_put(rrs); + rds_sock_put(rs); + ret = -ENOMEM; + break; } } while (rover++ != last); @@ -154,23 +119,17 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) void rds_remove_bound(struct rds_sock *rs) { - unsigned long flags; - struct bind_bucket *bucket = - hash_to_bucket(rs->rs_bound_addr, rs->rs_bound_port); - - write_lock_irqsave(&bucket->lock, flags); - if (rs->rs_bound_addr) { - rdsdebug("rs %p unbinding from %pI4:%d\n", - rs, &rs->rs_bound_addr, - ntohs(rs->rs_bound_port)); + if (!rs->rs_bound_addr) + return; - hlist_del_init(&rs->rs_bound_node); - rds_sock_put(rs); - rs->rs_bound_addr = 0; - } + rdsdebug("rs %p unbinding from %pI4:%d\n", + rs, &rs->rs_bound_addr, + ntohs(rs->rs_bound_port)); - write_unlock_irqrestore(&bucket->lock, flags); + rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms); + rds_sock_put(rs); + rs->rs_bound_addr = 0; } int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -224,10 +183,12 @@ out: return ret; } -void rds_bind_lock_init(void) +void rds_bind_lock_destroy(void) { - int i; + rhashtable_destroy(&bind_hash_table); +} - for (i = 0; i < BIND_HASH_SIZE; i++) - rwlock_init(&bind_hash_table[i].lock); +int rds_bind_lock_init(void) +{ + return rhashtable_init(&bind_hash_table, &ht_parms); } diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 6a8fbd6e69e7..d3d4454ffc84 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -75,7 +75,7 @@ struct rds_iw_mr_pool { int max_pages; }; -static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); +static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); static void rds_iw_mr_pool_flush_worker(struct work_struct *work); static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, @@ -479,14 +479,13 @@ void rds_iw_sync_mr(void *trans_private, int direction) * If the number of MRs allocated exceeds the limit, we also try * to free as many MRs as needed to get back to this limit. */ -static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) +static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) { struct rds_iw_mr *ibmr, *next; LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; - int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -538,7 +537,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) atomic_sub(nfreed, &pool->item_count); mutex_unlock(&pool->flush_lock); - return ret; } static void rds_iw_mr_pool_flush_worker(struct work_struct *work) diff --git a/net/rds/rds.h b/net/rds/rds.h index 543c308fcc2a..0e2797bdc316 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -7,6 +7,7 @@ #include <rdma/rdma_cm.h> #include <linux/mutex.h> #include <linux/rds.h> +#include <linux/rhashtable.h> #include "info.h" @@ -474,7 +475,8 @@ struct rds_sock { * bound_addr used for both incoming and outgoing, no INADDR_ANY * support. */ - struct hlist_node rs_bound_node; + struct rhash_head rs_bound_node; + u64 rs_bound_key; __be32 rs_bound_addr; __be32 rs_conn_addr; __be16 rs_bound_port; @@ -605,7 +607,8 @@ extern wait_queue_head_t rds_poll_waitq; int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); void rds_remove_bound(struct rds_sock *rs); struct rds_sock *rds_find_bound(__be32 addr, __be16 port); -void rds_bind_lock_init(void); +int rds_bind_lock_init(void); +void rds_bind_lock_destroy(void); /* cong.c */ int rds_cong_get_maps(struct rds_connection *conn); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index fbc5ef88bc0e..27a992154804 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); + if (!pskb_pull(clone, offset) || + pskb_trim(clone, to_copy)) { + pr_warn("rds_tcp_data_recv: pull/trim failed " + "left %zu data_rem %zu skb_len %d\n", + left, tc->t_tinc_data_rem, skb->len); + kfree_skb(clone); + desc->error = -ENOMEM; + goto out; + } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1eb76956b439..6dfd19e52938 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -337,6 +337,21 @@ int switchdev_port_attr_set(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); +static size_t switchdev_obj_size(const struct switchdev_obj *obj) +{ + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + return sizeof(struct switchdev_obj_port_vlan); + case SWITCHDEV_OBJ_ID_IPV4_FIB: + return sizeof(struct switchdev_obj_ipv4_fib); + case SWITCHDEV_OBJ_ID_PORT_FDB: + return sizeof(struct switchdev_obj_port_fdb); + default: + BUG(); + } + return 0; +} + static int __switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) @@ -422,7 +437,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, static int switchdev_port_obj_add_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_add_deferred); } @@ -490,7 +505,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, static int switchdev_port_obj_del_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_del_deferred); } @@ -746,7 +761,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; int err; err = switchdev_port_attr_get(dev, &attr); @@ -817,6 +832,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev, err = switchdev_port_br_setflag(dev, attr, BR_LEARNING_SYNC); break; + case IFLA_BRPORT_UNICAST_FLOOD: + err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); + break; default: err = -EOPNOTSUPP; break; @@ -866,7 +884,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } else { if (vlan.vid_begin) return -EINVAL; @@ -875,7 +893,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e7000be321b0..ed98c1fc3de1 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -94,10 +94,14 @@ __init int net_sysctl_init(void) goto out; ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) - goto out; + goto out1; register_sysctl_root(&net_sysctl_root); out: return ret; +out1: + unregister_sysctl_table(net_header); + net_header = NULL; + goto out; } struct ctl_table_header *register_net_sysctl(struct net *net, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 41042de3ae9b..9dc239dfe192 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -35,741 +35,301 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/tipc_config.h> #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" -#include "core.h" +#include "link.h" +#include "node.h" -#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ -#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff); -static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - -static void tipc_bclink_lock(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - spin_lock_bh(&tn->bclink->lock); -} - -static void tipc_bclink_unlock(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - spin_unlock_bh(&tn->bclink->lock); -} - -void tipc_bclink_input(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); -} - -uint tipc_bclink_get_mtu(void) -{ - return MAX_PKT_DEFAULT_MCAST; -} - -static u32 bcbuf_acks(struct sk_buff *buf) -{ - return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; -} - -static void bcbuf_set_acks(struct sk_buff *buf, u32 acks) -{ - TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks; -} - -static void bcbuf_decr_acks(struct sk_buff *buf) -{ - bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); -} +/** + * struct tipc_bc_base - base structure for keeping broadcast send state + * @link: broadcast send link structure + * @inputq: data input queue; will only carry SOCK_WAKEUP messages + * @dest: array keeping number of reachable destinations per bearer + * @primary_bearer: a bearer having links to all broadcast destinations, if any + */ +struct tipc_bc_base { + struct tipc_link *link; + struct sk_buff_head inputq; + int dests[MAX_BEARERS]; + int primary_bearer; +}; -void tipc_bclink_add_node(struct net *net, u32 addr) +static struct tipc_bc_base *tipc_bc_base(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_nmap_add(&tn->bclink->bcast_nodes, addr); - tipc_bclink_unlock(net); + return tipc_net(net)->bcbase; } -void tipc_bclink_remove_node(struct net *net, u32 addr) +int tipc_bcast_get_mtu(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); - - /* Last node? => reset backlog queue */ - if (!tn->bclink->bcast_nodes.count) - tipc_link_purge_backlog(&tn->bclink->link); - - tipc_bclink_unlock(net); + return tipc_link_mtu(tipc_bc_sndlink(net)); } -static void bclink_set_last_sent(struct net *net) +/* tipc_bcbase_select_primary(): find a bearer with links to all destinations, + * if any, and make it primary bearer + */ +static void tipc_bcbase_select_primary(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_bc_base *bb = tipc_bc_base(net); + int all_dests = tipc_link_bc_peers(bb->link); + int i, mtu; - bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1); -} + bb->primary_bearer = INVALID_BEARER_ID; -u32 tipc_bclink_get_last_sent(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); + if (!all_dests) + return; - return tn->bcl->silent_intv_cnt; -} + for (i = 0; i < MAX_BEARERS; i++) { + if (!bb->dests[i]) + continue; -static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) -{ - node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? - seqno : node->bclink.last_sent; -} + mtu = tipc_bearer_mtu(net, i); + if (mtu < tipc_link_mtu(bb->link)) + tipc_link_set_mtu(bb->link, mtu); -/** - * tipc_bclink_retransmit_to - get most recent node to request retransmission - * - * Called with bclink_lock locked - */ -struct tipc_node *tipc_bclink_retransmit_to(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - return tn->bclink->retransmit_to; -} + if (bb->dests[i] < all_dests) + continue; -/** - * bclink_retransmit_pkt - retransmit broadcast packets - * @after: sequence number of last packet to *not* retransmit - * @to: sequence number of last packet to retransmit - * - * Called with bclink_lock locked - */ -static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) -{ - struct sk_buff *skb; - struct tipc_link *bcl = tn->bcl; + bb->primary_bearer = i; - skb_queue_walk(&bcl->transmq, skb) { - if (more(buf_seqno(skb), after)) { - tipc_link_retransmit(bcl, skb, mod(to - after)); + /* Reduce risk that all nodes select same primary */ + if ((i ^ tipc_own_addr(net)) & 1) break; - } } } -/** - * bclink_prepare_wakeup - prepare users for wakeup after congestion - * @bcl: broadcast link - * @resultq: queue for users which can be woken up - * Move a number of waiting users, as permitted by available space in - * the send queue, from link wait queue to specified queue for wakeup - */ -static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq) +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) { - int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; - int imp, lim; - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) { - imp = TIPC_SKB_CB(skb)->chain_imp; - lim = bcl->window + bcl->backlog[imp].limit; - pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; - if ((pnd[imp] + bcl->backlog[imp].len) >= lim) - continue; - skb_unlink(skb, &bcl->wakeupq); - skb_queue_tail(resultq, skb); - } -} + struct tipc_bc_base *bb = tipc_bc_base(net); -/** - * tipc_bclink_wakeup_users - wake up pending users - * - * Called with no locks taken - */ -void tipc_bclink_wakeup_users(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct sk_buff_head resultq; - - skb_queue_head_init(&resultq); - bclink_prepare_wakeup(bcl, &resultq); - tipc_sk_rcv(net, &resultq); + tipc_bcast_lock(net); + bb->dests[bearer_id]++; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets - * @n_ptr: node that sent acknowledgement info - * @acked: broadcast sequence # that has been acknowledged - * - * Node is locked, bclink_lock unlocked. - */ -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) { - struct sk_buff *skb, *tmp; - unsigned int released = 0; - struct net *net = n_ptr->net; - struct tipc_net *tn = net_generic(net, tipc_net_id); - - if (unlikely(!n_ptr->bclink.recv_permitted)) - return; - - tipc_bclink_lock(net); - - /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&tn->bcl->transmq); - if (!skb) - goto exit; - - /* Determine which messages need to be acknowledged */ - if (acked == INVALID_LINK_SEQ) { - /* - * Contact with specified node has been lost, so need to - * acknowledge sent messages only (if other nodes still exist) - * or both sent and unsent messages (otherwise) - */ - if (tn->bclink->bcast_nodes.count) - acked = tn->bcl->silent_intv_cnt; - else - acked = tn->bcl->snd_nxt; - } else { - /* - * Bail out if specified sequence number does not correspond - * to a message that has been sent and not yet acknowledged - */ - if (less(acked, buf_seqno(skb)) || - less(tn->bcl->silent_intv_cnt, acked) || - less_eq(acked, n_ptr->bclink.acked)) - goto exit; - } - - /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&tn->bcl->transmq, skb) { - if (more(buf_seqno(skb), n_ptr->bclink.acked)) - break; - } - - /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - bcbuf_decr_acks(skb); - bclink_set_last_sent(net); - if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &tn->bcl->transmq); - kfree_skb(skb); - released = 1; - } - } - n_ptr->bclink.acked = acked; + struct tipc_bc_base *bb = tipc_bc_base(net); - /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(skb_peek(&tn->bcl->backlogq))) { - tipc_link_push_packets(tn->bcl); - bclink_set_last_sent(net); - } - if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) - n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; -exit: - tipc_bclink_unlock(net); + tipc_bcast_lock(net); + bb->dests[bearer_id]--; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * tipc_bclink_update_link_state - update broadcast link state +/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers * - * RCU and node lock set + * Note that number of reachable destinations, as indicated in the dests[] + * array, may transitionally differ from the number of destinations indicated + * in each sent buffer. We can sustain this. Excess destination nodes will + * drop and never acknowledge the unexpected packets, and missing destinations + * will either require retransmission (if they are just about to be added to + * the bearer), or be removed from the buffer's 'ackers' counter (if they + * just went down) */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, - u32 last_sent) +static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) { - struct sk_buff *buf; - struct net *net = n_ptr->net; - struct tipc_net *tn = net_generic(net, tipc_net_id); + int bearer_id; + struct tipc_bc_base *bb = tipc_bc_base(net); + struct sk_buff *skb, *_skb; + struct sk_buff_head _xmitq; - /* Ignore "stale" link state info */ - if (less_eq(last_sent, n_ptr->bclink.last_in)) + if (skb_queue_empty(xmitq)) return; - /* Update link synchronization state; quit if in sync */ - bclink_update_last_sent(n_ptr, last_sent); - - if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) + /* The typical case: at least one bearer has links to all nodes */ + bearer_id = bb->primary_bearer; + if (bearer_id >= 0) { + tipc_bearer_bc_xmit(net, bearer_id, xmitq); return; - - /* Update out-of-sync state; quit if loss is still unconfirmed */ - if ((++n_ptr->bclink.oos_state) == 1) { - if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2)) - return; - n_ptr->bclink.oos_state++; } - /* Don't NACK if one has been recently sent (or seen) */ - if (n_ptr->bclink.oos_state & 0x1) - return; - - /* Send NACK */ - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); - u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - - tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, - INT_H_SIZE, n_ptr->addr); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tn->net_id); - msg_set_bcast_ack(msg, n_ptr->bclink.last_in); - msg_set_bcgap_after(msg, n_ptr->bclink.last_in); - msg_set_bcgap_to(msg, to); - - tipc_bclink_lock(net); - tipc_bearer_send(net, MAX_BEARERS, buf, NULL); - tn->bcl->stats.sent_nacks++; - tipc_bclink_unlock(net); - kfree_skb(buf); - - n_ptr->bclink.oos_state++; - } -} - -void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr) -{ - u16 last = msg_last_bcast(hdr); - int mtyp = msg_type(hdr); + /* We have to transmit across all bearers */ + skb_queue_head_init(&_xmitq); + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + if (!bb->dests[bearer_id]) + continue; - if (unlikely(msg_user(hdr) != LINK_PROTOCOL)) - return; - if (mtyp == STATE_MSG) { - tipc_bclink_update_link_state(n, last); - return; + skb_queue_walk(xmitq, skb) { + _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_queue_tail(&_xmitq, _skb); + } + tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); } - /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization, - * and transfer synch info in LINK_PROTOCOL messages. - */ - if (tipc_node_is_up(n)) - return; - if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG)) - return; - n->bclink.last_sent = last; - n->bclink.last_in = last; - n->bclink.oos_state = 0; + __skb_queue_purge(xmitq); + __skb_queue_purge(&_xmitq); } -/** - * bclink_peek_nack - monitor retransmission requests sent by other nodes - * - * Delay any upcoming NACK by this node if another node has already - * requested the first message this node is going to ask for. - */ -static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) -{ - struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); - - if (unlikely(!n_ptr)) - return; - - tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && - (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && - (n_ptr->bclink.last_in == msg_bcgap_after(msg))) - n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); - tipc_node_put(n_ptr); -} - -/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster +/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster * and to identified node local sockets * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct tipc_bclink *bclink = tn->bclink; + struct tipc_link *l = tipc_bc_sndlink(net); + struct sk_buff_head xmitq, inputq, rcvq; int rc = 0; - int bc = 0; - struct sk_buff *skb; - struct sk_buff_head arrvq; - struct sk_buff_head inputq; - /* Prepare clone of message for local node */ - skb = tipc_msg_reassemble(list); - if (unlikely(!skb)) - return -EHOSTUNREACH; + __skb_queue_head_init(&rcvq); + __skb_queue_head_init(&xmitq); + skb_queue_head_init(&inputq); - /* Broadcast to all nodes */ - if (likely(bclink)) { - tipc_bclink_lock(net); - if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(net, bcl, list); - if (likely(!rc)) { - u32 len = skb_queue_len(&bcl->transmq); - - bclink_set_last_sent(net); - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += len; - } - bc = 1; - } - tipc_bclink_unlock(net); - } + /* Prepare message clone for local node */ + if (unlikely(!tipc_msg_reassemble(list, &rcvq))) + return -EHOSTUNREACH; - if (unlikely(!bc)) - __skb_queue_purge(list); + tipc_bcast_lock(net); + if (tipc_link_bc_peers(l)) + rc = tipc_link_xmit(l, list, &xmitq); + tipc_bcast_unlock(net); + /* Don't send to local node if adding to link failed */ if (unlikely(rc)) { - kfree_skb(skb); + __skb_queue_purge(&rcvq); return rc; } - /* Deliver message clone */ - __skb_queue_head_init(&arrvq); - skb_queue_head_init(&inputq); - __skb_queue_tail(&arrvq, skb); - tipc_sk_mcast_rcv(net, &arrvq, &inputq); - return rc; -} -/** - * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet - * - * Called with both sending node's lock and bclink_lock taken. - */ -static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) -{ - struct tipc_net *tn = net_generic(node->net, tipc_net_id); - - bclink_update_last_sent(node, seqno); - node->bclink.last_in = seqno; - node->bclink.oos_state = 0; - tn->bcl->stats.recv_info++; - - /* - * Unicast an ACK periodically, ensuring that - * all nodes in the cluster don't ACK at the same time - */ - if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { - tipc_link_proto_xmit(node_active_link(node, node->addr), - STATE_MSG, 0, 0, 0, 0); - tn->bcl->stats.sent_acks++; - } + /* Broadcast to all nodes, inluding local node */ + tipc_bcbase_xmit(net, &xmitq); + tipc_sk_mcast_rcv(net, &rcvq, &inputq); + __skb_queue_purge(list); + return 0; } -/** - * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards +/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct tipc_msg *msg = buf_msg(buf); - struct tipc_node *node; - u32 next_in; - u32 seqno; - int deferred = 0; - int pos = 0; - struct sk_buff *iskb; - struct sk_buff_head *arrvq, *inputq; - - /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tn->net_id) - goto exit; - - node = tipc_node_find(net, msg_prevnode(msg)); - if (unlikely(!node)) - goto exit; - - tipc_node_lock(node); - if (unlikely(!node->bclink.recv_permitted)) - goto unlock; - - /* Handle broadcast protocol message */ - if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { - if (msg_type(msg) != STATE_MSG) - goto unlock; - if (msg_destnode(msg) == tn->own_addr) { - tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_bclink_lock(net); - bcl->stats.recv_nacks++; - tn->bclink->retransmit_to = node; - bclink_retransmit_pkt(tn, msg_bcgap_after(msg), - msg_bcgap_to(msg)); - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else { - tipc_node_unlock(node); - bclink_peek_nack(net, msg); - } - tipc_node_put(node); - goto exit; - } - - /* Handle in-sequence broadcast message */ - seqno = msg_seqno(msg); - next_in = mod(node->bclink.last_in + 1); - arrvq = &tn->bclink->arrvq; - inputq = &tn->bclink->inputq; - - if (likely(seqno == next_in)) { -receive: - /* Deliver message to destination */ - if (likely(msg_isdata(msg))) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - spin_lock_bh(&inputq->lock); - __skb_queue_tail(arrvq, buf); - spin_unlock_bh(&inputq->lock); - node->action_flags |= TIPC_BCAST_MSG_EVT; - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - bcl->stats.recv_bundles++; - bcl->stats.recv_bundled += msg_msgcnt(msg); - pos = 0; - while (tipc_msg_extract(buf, &iskb, &pos)) { - spin_lock_bh(&inputq->lock); - __skb_queue_tail(arrvq, iskb); - spin_unlock_bh(&inputq->lock); - } - node->action_flags |= TIPC_BCAST_MSG_EVT; - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) { - tipc_bclink_unlock(net); - goto unlock; - } - bcl->stats.recv_fragments++; - if (buf) { - bcl->stats.recv_fragmented++; - msg = buf_msg(buf); - tipc_bclink_unlock(net); - goto receive; - } - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(net); - tipc_node_unlock(node); - kfree_skb(buf); - } - buf = NULL; - - /* Determine new synchronization state */ - tipc_node_lock(node); - if (unlikely(!tipc_node_is_up(node))) - goto unlock; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + int rc; - if (node->bclink.last_in == node->bclink.last_sent) - goto unlock; + __skb_queue_head_init(&xmitq); - if (skb_queue_empty(&node->bclink.deferdq)) { - node->bclink.oos_state = 1; - goto unlock; - } - - msg = buf_msg(skb_peek(&node->bclink.deferdq)); - seqno = msg_seqno(msg); - next_in = mod(next_in + 1); - if (seqno != next_in) - goto unlock; - - /* Take in-sequence message from deferred queue & deliver it */ - buf = __skb_dequeue(&node->bclink.deferdq); - goto receive; - } - - /* Handle out-of-sequence broadcast message */ - if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferdq, - buf); - bclink_update_last_sent(node, seqno); - buf = NULL; + if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { + kfree_skb(skb); + return 0; } - tipc_bclink_lock(net); - - if (deferred) - bcl->stats.deferred_recv++; + tipc_bcast_lock(net); + if (msg_user(hdr) == BCAST_PROTOCOL) + rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); else - bcl->stats.duplicates++; + rc = tipc_link_rcv(l, skb, NULL); + tipc_bcast_unlock(net); - tipc_bclink_unlock(net); + tipc_bcbase_xmit(net, &xmitq); -unlock: - tipc_node_unlock(node); - tipc_node_put(node); -exit: - kfree_skb(buf); -} + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) -{ - return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); + return rc; } - -/** - * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer - * - * Send packet over as many bearers as necessary to reach all nodes - * that have joined the broadcast link. +/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge * - * Returns 0 (packet sent successfully) under all circumstances, - * since the broadcast link's pseudo-bearer never blocks + * RCU is locked, no other locks set */ -static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, - struct tipc_bearer *unused1, - struct tipc_media_addr *unused2) +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) { - int bp_index; - struct tipc_msg *msg = buf_msg(buf); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer = tn->bcbearer; - struct tipc_bclink *bclink = tn->bclink; - - /* Prepare broadcast link message for reliable transmission, - * if first time trying to send it; - * preparation is skipped for broadcast link protocol messages - * since they are sent in an unreliable manner and don't need it - */ - if (likely(!msg_non_seq(buf_msg(buf)))) { - bcbuf_set_acks(buf, bclink->bcast_nodes.count); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tn->net_id); - tn->bcl->stats.sent_info++; - if (WARN_ON(!bclink->bcast_nodes.count)) { - dump_stack(); - return 0; - } - } + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - /* Send buffer over bearers until all targets reached */ - bcbearer->remains = bclink->bcast_nodes; - - for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { - struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; - struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; - struct tipc_bearer *bp[2] = {p, s}; - struct tipc_bearer *b = bp[msg_link_selector(msg)]; - struct sk_buff *tbuf; - - if (!p) - break; /* No more bearers to try */ - if (!b) - b = p; - tipc_nmap_diff(&bcbearer->remains, &b->nodes, - &bcbearer->remains_new); - if (bcbearer->remains_new.count == bcbearer->remains.count) - continue; /* Nothing added by bearer pair */ - - if (bp_index == 0) { - /* Use original buffer for first bearer */ - tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); - } else { - /* Avoid concurrent buffer access */ - tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); - if (!tbuf) - break; - tipc_bearer_send(net, b->identity, tbuf, - &b->bcast_addr); - kfree_skb(tbuf); /* Bearer keeps a clone */ - } - if (bcbearer->remains_new.count == 0) - break; /* All targets reached */ + __skb_queue_head_init(&xmitq); - bcbearer->remains = bcbearer->remains_new; - } + tipc_bcast_lock(net); + tipc_link_bc_ack_rcv(l, acked, &xmitq); + tipc_bcast_unlock(net); - return 0; + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); } -/** - * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer +/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state + * + * RCU is locked, no other locks set */ -void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, - u32 node, bool action) +void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer = tn->bcbearer; - struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; - struct tipc_bcbearer_pair *bp_curr; - struct tipc_bearer *b; - int b_index; - int pri; + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - tipc_bclink_lock(net); + __skb_queue_head_init(&xmitq); - if (action) - tipc_nmap_add(nm_ptr, node); - else - tipc_nmap_remove(nm_ptr, node); + tipc_bcast_lock(net); + if (msg_type(hdr) == STATE_MSG) { + tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); + tipc_link_bc_sync_rcv(l, hdr, &xmitq); + } else { + tipc_link_bc_init_rcv(l, hdr); + } + tipc_bcast_unlock(net); - /* Group bearers by priority (can assume max of two per priority) */ - memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + tipc_bcbase_xmit(net, &xmitq); - rcu_read_lock(); - for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(tn->bearer_list[b_index]); - if (!b || !b->nodes.count) - continue; - - if (!bp_temp[b->priority].primary) - bp_temp[b->priority].primary = b; - else - bp_temp[b->priority].secondary = b; - } - rcu_read_unlock(); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} - /* Create array of bearer pairs for broadcasting */ - bp_curr = bcbearer->bpairs; - memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs)); +/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); - for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) { + tipc_bcast_lock(net); + tipc_link_add_bc_peer(snd_l, uc_l, xmitq); + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); +} - if (!bp_temp[pri].primary) - continue; +/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - bp_curr->primary = bp_temp[pri].primary; + __skb_queue_head_init(&xmitq); - if (bp_temp[pri].secondary) { - if (tipc_nmap_equal(&bp_temp[pri].primary->nodes, - &bp_temp[pri].secondary->nodes)) { - bp_curr->secondary = bp_temp[pri].secondary; - } else { - bp_curr++; - bp_curr->primary = bp_temp[pri].secondary; - } - } + tipc_bcast_lock(net); + tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); - bp_curr++; - } + tipc_bcbase_xmit(net, &xmitq); - tipc_bclink_unlock(net); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -835,7 +395,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (!bcl) return 0; - tipc_bclink_lock(net); + tipc_bcast_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); @@ -870,7 +430,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -881,7 +441,7 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; @@ -895,25 +455,25 @@ int tipc_bclink_reset_stats(struct net *net) if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(net); + tipc_bcast_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(struct net *net, u32 limit) +static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; - if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) + if (limit < BCLINK_WIN_MIN) + limit = BCLINK_WIN_MIN; + if (limit > TIPC_MAX_LINK_WIN) return -EINVAL; - - tipc_bclink_lock(net); - tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(net); + tipc_bcast_lock(net); + tipc_link_set_queue_limits(l, limit); + tipc_bcast_unlock(net); return 0; } @@ -935,123 +495,51 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - return tipc_bclink_set_queue_limits(net, win); + return tipc_bc_link_set_queue_limits(net, win); } -int tipc_bclink_init(struct net *net) +int tipc_bcast_init(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer; - struct tipc_bclink *bclink; - struct tipc_link *bcl; - - bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); - if (!bcbearer) - return -ENOMEM; - - bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); - if (!bclink) { - kfree(bcbearer); - return -ENOMEM; - } + struct tipc_net *tn = tipc_net(net); + struct tipc_bc_base *bb = NULL; + struct tipc_link *l = NULL; - bcl = &bclink->link; - bcbearer->bearer.media = &bcbearer->media; - bcbearer->media.send_msg = tipc_bcbearer_send; - sprintf(bcbearer->media.name, "tipc-broadcast"); - - spin_lock_init(&bclink->lock); - __skb_queue_head_init(&bcl->transmq); - __skb_queue_head_init(&bcl->backlogq); - __skb_queue_head_init(&bcl->deferdq); - skb_queue_head_init(&bcl->wakeupq); - bcl->snd_nxt = 1; - spin_lock_init(&bclink->node.lock); - __skb_queue_head_init(&bclink->arrvq); - skb_queue_head_init(&bclink->inputq); - bcl->owner = &bclink->node; - bcl->owner->net = net; - bcl->mtu = MAX_PKT_DEFAULT_MCAST; - tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); - bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; - msg_set_prevnode(bcl->pmsg, tn->own_addr); - strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); - tn->bcbearer = bcbearer; - tn->bclink = bclink; - tn->bcl = bcl; - return 0; -} + bb = kzalloc(sizeof(*bb), GFP_ATOMIC); + if (!bb) + goto enomem; + tn->bcbase = bb; + spin_lock_init(&tipc_net(net)->bclock); -void tipc_bclink_stop(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_link_purge_queues(tn->bcl); - tipc_bclink_unlock(net); - - RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); - synchronize_net(); - kfree(tn->bcbearer); - kfree(tn->bclink); + if (!tipc_link_bc_create(net, 0, 0, + U16_MAX, + BCLINK_WIN_DEFAULT, + 0, + &bb->inputq, + NULL, + NULL, + &l)) + goto enomem; + bb->link = l; + tn->bcl = l; + return 0; +enomem: + kfree(bb); + kfree(l); + return -ENOMEM; } -/** - * tipc_nmap_add - add a node to a node map - */ -static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +void tipc_bcast_reinit(struct net *net) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); + struct tipc_bc_base *b = tipc_bc_base(net); - if ((nm_ptr->map[w] & mask) == 0) { - nm_ptr->count++; - nm_ptr->map[w] |= mask; - } + msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); } -/** - * tipc_nmap_remove - remove a node from a node map - */ -static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +void tipc_bcast_stop(struct net *net) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); - - if ((nm_ptr->map[w] & mask) != 0) { - nm_ptr->map[w] &= ~mask; - nm_ptr->count--; - } -} + struct tipc_net *tn = net_generic(net, tipc_net_id); -/** - * tipc_nmap_diff - find differences between node maps - * @nm_a: input node map A - * @nm_b: input node map B - * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) - */ -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff) -{ - int stop = ARRAY_SIZE(nm_a->map); - int w; - int b; - u32 map; - - memset(nm_diff, 0, sizeof(*nm_diff)); - for (w = 0; w < stop; w++) { - map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]); - nm_diff->map[w] = map; - if (map != 0) { - for (b = 0 ; b < WSIZE; b++) { - if (map & (1 << b)) - nm_diff->count++; - } - } - } + synchronize_net(); + kfree(tn->bcbase); + kfree(tn->bcl); } diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d74c69bcf60b..2855b9356a15 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -37,102 +37,44 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#include <linux/tipc_config.h> -#include "link.h" -#include "node.h" +#include "core.h" -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; +struct tipc_node; +struct tipc_msg; +struct tipc_nl_msg; +struct tipc_node_map; -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - struct sk_buff_head arrvq; - struct sk_buff_head inputq; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; +int tipc_bcast_init(struct net *net); +void tipc_bcast_reinit(struct net *net); +void tipc_bcast_stop(struct net *net); +void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); +int tipc_bcast_get_mtu(struct net *net); +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); +void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); +int tipc_bclink_reset_stats(struct net *net); -struct tipc_node; -extern const char tipc_bclink_name[]; +static inline void tipc_bcast_lock(struct net *net) +{ + spin_lock_bh(&tipc_net(net)->bclock); +} -/** - * tipc_nmap_equal - test for equality of node maps - */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b) +static inline void tipc_bcast_unlock(struct net *net) { - return !memcmp(nm_a, nm_b, sizeof(*nm_a)); + spin_unlock_bh(&tipc_net(net)->bclock); } -int tipc_bclink_init(struct net *net); -void tipc_bclink_stop(struct net *net); -void tipc_bclink_add_node(struct net *net, u32 addr); -void tipc_bclink_remove_node(struct net *net, u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(struct net *net); -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *node, - u32 last_sent); -int tipc_bclink_reset_stats(struct net *net); -int tipc_bclink_set_queue_limits(struct net *net, u32 limit); -void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, - u32 node, bool action); -uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); -void tipc_bclink_wakeup_users(struct net *net); -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); -int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); -void tipc_bclink_input(struct net *net); -void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg); +static inline struct tipc_link *tipc_bc_sndlink(struct net *net) +{ + return tipc_net(net)->bcl; +} #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 82b278668ab7..648f2a67f314 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -193,10 +193,8 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) { - tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); + if (b_ptr) tipc_disc_add_dest(b_ptr->link_req); - } rcu_read_unlock(); } @@ -207,10 +205,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) { - tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); + if (b_ptr) tipc_disc_remove_dest(b_ptr->link_req); - } rcu_read_unlock(); } @@ -418,10 +414,9 @@ void tipc_disable_l2_media(struct tipc_bearer *b) * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address */ -int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, +int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { - struct sk_buff *clone; struct net_device *dev; int delta; @@ -429,42 +424,48 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, if (!dev) return 0; - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - delta = dev->hard_header_len - skb_headroom(buf); + delta = dev->hard_header_len - skb_headroom(skb); if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); return 0; } - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); + skb_reset_network_header(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_TIPC); + dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, skb->len); + dev_queue_xmit(skb); return 0; } -/* tipc_bearer_send- sends buffer to destination over bearer - * - * IMPORTANT: - * The media send routine must not alter the buffer being passed in - * as it may be needed for later retransmission! +int tipc_bearer_mtu(struct net *net, u32 bearer_id) +{ + int mtu = 0; + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]); + if (b) + mtu = b->mtu; + rcu_read_unlock(); + return mtu; +} + +/* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ -void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, - struct tipc_media_addr *dest) +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b_ptr)) - b_ptr->media->send_msg(net, buf, b_ptr, dest); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b)) + b->media->send_msg(net, skb, b, dest); rcu_read_unlock(); } @@ -487,8 +488,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); b->media->send_msg(net, skb, b, dst); - /* Until we remove cloning in tipc_l2_send_msg(): */ - kfree_skb(skb); + } + } + rcu_read_unlock(); +} + +/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations + */ +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq) +{ + struct tipc_net *tn = tipc_net(net); + int net_id = tn->net_id; + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b)) { + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); } } rcu_read_unlock(); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6426f242f626..552185bc4773 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -163,6 +163,7 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; + int node_cnt; struct tipc_node_map nodes; }; @@ -215,10 +216,14 @@ struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); -void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, - struct tipc_media_addr *dest); +int tipc_bearer_mtu(struct net *net, u32 bearer_id); +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest); void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst); +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/core.c b/net/tipc/core.c index 005ba5eb0ea4..03a842870c52 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -42,6 +42,7 @@ #include "bearer.h" #include "net.h" #include "socket.h" +#include "bcast.h" #include <linux/module.h> @@ -71,8 +72,15 @@ static int __net_init tipc_init_net(struct net *net) err = tipc_topsrv_start(net); if (err) goto out_subscr; + + err = tipc_bcast_init(net); + if (err) + goto out_bclink; + return 0; +out_bclink: + tipc_bcast_stop(net); out_subscr: tipc_nametbl_stop(net); out_nametbl: @@ -85,6 +93,7 @@ static void __net_exit tipc_exit_net(struct net *net) { tipc_topsrv_stop(net); tipc_net_stop(net); + tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); } diff --git a/net/tipc/core.h b/net/tipc/core.h index b96b41eabf12..18e95a8020cd 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -62,8 +62,7 @@ struct tipc_node; struct tipc_bearer; -struct tipc_bcbearer; -struct tipc_bclink; +struct tipc_bc_base; struct tipc_link; struct tipc_name_table; struct tipc_server; @@ -93,8 +92,8 @@ struct tipc_net { struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; /* Broadcast link */ - struct tipc_bcbearer *bcbearer; - struct tipc_bclink *bclink; + spinlock_t bclock; + struct tipc_bc_base *bcbase; struct tipc_link *bcl; /* Socket hash table */ @@ -114,6 +113,11 @@ static inline struct tipc_net *tipc_net(struct net *net) return net_generic(net, tipc_net_id); } +static inline int tipc_netid(struct net *net) +{ + return tipc_net(net)->net_id; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index d14e0a4aa9af..afe8c47c4085 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -89,7 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); - msg_set_node_capabilities(msg, 0); + msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); @@ -167,11 +167,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { rskb = tipc_buf_acquire(MAX_H_SIZE); - if (rskb) { - tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); - tipc_bearer_send(net, bearer->identity, rskb, &maddr); - kfree_skb(rskb); - } + if (!rskb) + return; + tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); + tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr); } } @@ -225,6 +224,7 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) static void disc_timeout(unsigned long data) { struct tipc_link_req *req = (struct tipc_link_req *)data; + struct sk_buff *skb; int max_delay; spin_lock_bh(&req->lock); @@ -242,9 +242,9 @@ static void disc_timeout(unsigned long data) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); - - + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest); req->timer_intv *= 2; if (req->num_nodes) max_delay = TIPC_LINK_REQ_SLOW; @@ -271,6 +271,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) { struct tipc_link_req *req; + struct sk_buff *skb; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -292,7 +293,9 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); return 0; } @@ -316,6 +319,7 @@ void tipc_disc_delete(struct tipc_link_req *req) void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_link_req *req = b_ptr->link_req; + struct sk_buff *skb; spin_lock_bh(&req->lock); tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); @@ -325,6 +329,8 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); - tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); spin_unlock_bh(&req->lock); } diff --git a/net/tipc/link.c b/net/tipc/link.c index ff9b0b92e62e..9efbdbde2b08 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -50,6 +50,7 @@ */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; +static const char tipc_bclink_name[] = "broadcast-link"; static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -75,6 +76,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } }; +/* Send states for broadcast NACKs + */ +enum { + BC_NACK_SND_CONDITIONAL, + BC_NACK_SND_UNCONDITIONAL, + BC_NACK_SND_SUPPRESS, +}; + /* * Interval between NACKs when packets arrive out of order */ @@ -110,7 +119,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct sk_buff_head *xmitq); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static void tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); /* * Simple non-static link routines (i.e. referenced outside this file) @@ -150,11 +163,66 @@ bool tipc_link_is_blocked(struct tipc_link *l) return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } +static bool link_is_bc_sndlink(struct tipc_link *l) +{ + return !l->bc_sndlink; +} + +static bool link_is_bc_rcvlink(struct tipc_link *l) +{ + return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); +} + int tipc_link_is_active(struct tipc_link *l) { - struct tipc_node *n = l->owner; + return l->active; +} + +void tipc_link_set_active(struct tipc_link *l, bool active) +{ + l->active = active; +} + +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *rcv_l = uc_l->bc_rcvlink; - return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); + snd_l->ackers++; + rcv_l->acked = snd_l->snd_nxt - 1; + tipc_link_build_bc_init_msg(uc_l, xmitq); +} + +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq) +{ + u16 ack = snd_l->snd_nxt - 1; + + snd_l->ackers--; + tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); + tipc_link_reset(rcv_l); + rcv_l->state = LINK_RESET; + if (!snd_l->ackers) { + tipc_link_reset(snd_l); + __skb_queue_purge(xmitq); + } +} + +int tipc_link_bc_peers(struct tipc_link *l) +{ + return l->ackers; +} + +void tipc_link_set_mtu(struct tipc_link *l, int mtu) +{ + l->mtu = mtu; +} + +int tipc_link_mtu(struct tipc_link *l) +{ + return l->mtu; } static u32 link_own_addr(struct tipc_link *l) @@ -165,57 +233,72 @@ static u32 link_own_addr(struct tipc_link *l) /** * tipc_link_create - create a new link * @n: pointer to associated node - * @b: pointer to associated bearer + * @if_name: associated interface name + * @bearer_id: id (index) of associated bearer + * @tolerance: link tolerance to be used by link + * @net_plane: network plane (A,B,c..) this link belongs to + * @mtu: mtu to be advertised by link + * @priority: priority to be used by link + * @window: send window to be used by link + * @session: session to be used by link * @ownnode: identity of own node - * @peer: identity of peer node - * @maddr: media address to be used + * @peer: node id of peer node + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending + * @bc_rcvlink: the peer specific link used for broadcast reception * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * * Returns true if link was created, otherwise false */ -bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, - u32 ownnode, u32 peer, struct tipc_media_addr *maddr, - struct sk_buff_head *inputq, struct sk_buff_head *namedq, +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 ownnode, u32 peer, + u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, struct tipc_link **link) { struct tipc_link *l; struct tipc_msg *hdr; - char *if_name; l = kzalloc(sizeof(*l), GFP_ATOMIC); if (!l) return false; *link = l; + l->pmsg = (struct tipc_msg *)&l->proto_msg; + hdr = l->pmsg; + tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer); + msg_set_size(hdr, sizeof(l->proto_msg)); + msg_set_session(hdr, session); + msg_set_bearer_id(hdr, l->bearer_id); /* Note: peer i/f name is completed by reset/activate message */ - if_name = strchr(b->name, ':') + 1; sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); + strcpy((char *)msg_data(hdr), if_name); l->addr = peer; - l->media_addr = maddr; - l->owner = n; + l->peer_caps = peer_caps; + l->net = net; l->peer_session = WILDCARD_SESSION; - l->bearer_id = b->identity; - l->tolerance = b->tolerance; - l->net_plane = b->net_plane; - l->advertised_mtu = b->mtu; - l->mtu = b->mtu; - l->priority = b->priority; - tipc_link_set_queue_limits(l, b->window); + l->bearer_id = bearer_id; + l->tolerance = tolerance; + l->net_plane = net_plane; + l->advertised_mtu = mtu; + l->mtu = mtu; + l->priority = priority; + tipc_link_set_queue_limits(l, window); + l->ackers = 1; + l->bc_sndlink = bc_sndlink; + l->bc_rcvlink = bc_rcvlink; l->inputq = inputq; l->namedq = namedq; l->state = LINK_RESETTING; - l->pmsg = (struct tipc_msg *)&l->proto_msg; - hdr = l->pmsg; - tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer); - msg_set_size(hdr, sizeof(l->proto_msg)); - msg_set_session(hdr, session); - msg_set_bearer_id(hdr, l->bearer_id); - strcpy((char *)msg_data(hdr), if_name); __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); @@ -224,27 +307,43 @@ bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, return true; } -/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. +/** + * tipc_link_bc_create - create new link to be used for broadcast + * @n: pointer to associated node + * @mtu: mtu to be used + * @window: send window to be used + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link * - * Give a newly added peer node the sequence number where it should - * start receiving and acking broadcast packets. + * Returns true if link was created, otherwise false */ -void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link) { - struct sk_buff *skb; - struct sk_buff_head list; - u16 last_sent; + struct tipc_link *l; - skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, - 0, l->addr, link_own_addr(l), 0, 0, 0); - if (!skb) - return; - last_sent = tipc_bclink_get_last_sent(l->owner->net); - msg_set_last_bcast(buf_msg(skb), last_sent); - __skb_queue_head_init(&list); - __skb_queue_tail(&list, skb); - tipc_link_xmit(l, &list, xmitq); + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, + 0, ownnode, peer, peer_caps, bc_sndlink, + NULL, inputq, namedq, link)) + return false; + + l = *link; + strcpy(l->name, tipc_bclink_name); + tipc_link_reset(l); + l->state = LINK_RESET; + l->ackers = 0; + l->bc_rcvlink = l; + + /* Broadcast send link is always up */ + if (link_is_bc_sndlink(l)) + l->state = LINK_ESTABLISHED; + + return true; } /** @@ -451,12 +550,17 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ +/* tipc_link_timeout - perform periodic task as instructed from node timeout + */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int rc = 0; int mtyp = STATE_MSG; bool xmit = false; bool prb = false; + u16 bc_snt = l->bc_sndlink->snd_nxt - 1; + u16 bc_acked = l->bc_rcvlink->acked; + bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); @@ -464,7 +568,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) case LINK_ESTABLISHED: case LINK_SYNCHING: if (!l->silent_intv_cnt) { - if (tipc_bclink_acks_missing(l->owner)) + if (bc_up && (bc_acked != bc_snt)) xmit = true; } else if (l->silent_intv_cnt <= l->abort_limit) { xmit = true; @@ -555,38 +659,6 @@ void link_prepare_wakeup(struct tipc_link *l) } } -/** - * tipc_link_reset_fragments - purge link's inbound message fragments queue - * @l_ptr: pointer to link - */ -void tipc_link_reset_fragments(struct tipc_link *l_ptr) -{ - kfree_skb(l_ptr->reasm_buf); - l_ptr->reasm_buf = NULL; -} - -void tipc_link_purge_backlog(struct tipc_link *l) -{ - __skb_queue_purge(&l->backlogq); - l->backlog[TIPC_LOW_IMPORTANCE].len = 0; - l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; - l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; - l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; - l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; -} - -/** - * tipc_link_purge_queues - purge all pkt queues associated with link - * @l_ptr: pointer to link - */ -void tipc_link_purge_queues(struct tipc_link *l_ptr) -{ - __skb_queue_purge(&l_ptr->deferdq); - __skb_queue_purge(&l_ptr->transmq); - tipc_link_purge_backlog(l_ptr); - tipc_link_reset_fragments(l_ptr); -} - void tipc_link_reset(struct tipc_link *l) { /* Link is down, accept any session */ @@ -598,12 +670,16 @@ void tipc_link_reset(struct tipc_link *l) /* Prepare for renewed mtu size negotiation */ l->mtu = l->advertised_mtu; - /* Clean up all queues: */ + /* Clean up all queues and counters: */ __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); skb_queue_splice_init(&l->wakeupq, l->inputq); - - tipc_link_purge_backlog(l); + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; @@ -611,81 +687,15 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_unacked = 0; l->snd_nxt = 1; l->rcv_nxt = 1; + l->acked = 0; l->silent_intv_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; + l->bc_peer_is_up = false; link_reset_statistics(l); } /** - * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked - * @link: link to use - * @list: chain of buffers containing message - * - * Consumes the buffer chain, except when returning an error code, - * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS - * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - */ -int __tipc_link_xmit(struct net *net, struct tipc_link *link, - struct sk_buff_head *list) -{ - struct tipc_msg *msg = buf_msg(skb_peek(list)); - unsigned int maxwin = link->window; - unsigned int i, imp = msg_importance(msg); - uint mtu = link->mtu; - u16 ack = mod(link->rcv_nxt - 1); - u16 seqno = link->snd_nxt; - u16 bc_last_in = link->owner->bclink.last_in; - struct tipc_media_addr *addr = link->media_addr; - struct sk_buff_head *transmq = &link->transmq; - struct sk_buff_head *backlogq = &link->backlogq; - struct sk_buff *skb, *bskb; - - /* Match msg importance against this and all higher backlog limits: */ - for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { - if (unlikely(link->backlog[i].len >= link->backlog[i].limit)) - return link_schedule_user(link, list); - } - if (unlikely(msg_size(msg) > mtu)) - return -EMSGSIZE; - - /* Prepare each packet for sending, and add to relevant queue: */ - while (skb_queue_len(list)) { - skb = skb_peek(list); - msg = buf_msg(skb); - msg_set_seqno(msg, seqno); - msg_set_ack(msg, ack); - msg_set_bcast_ack(msg, bc_last_in); - - if (likely(skb_queue_len(transmq) < maxwin)) { - __skb_dequeue(list); - __skb_queue_tail(transmq, skb); - tipc_bearer_send(net, link->bearer_id, skb, addr); - link->rcv_unacked = 0; - seqno++; - continue; - } - if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) { - kfree_skb(__skb_dequeue(list)); - link->stats.sent_bundled++; - continue; - } - if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) { - kfree_skb(__skb_dequeue(list)); - __skb_queue_tail(backlogq, bskb); - link->backlog[msg_importance(buf_msg(bskb))].len++; - link->stats.sent_bundled++; - link->stats.sent_bundles++; - continue; - } - link->backlog[imp].len += skb_queue_len(list); - skb_queue_splice_tail_init(list, backlogq); - } - link->snd_nxt = seqno; - return 0; -} - -/** * tipc_link_xmit(): enqueue buffer list according to queue situation * @link: link to use * @list: chain of buffers containing message @@ -705,7 +715,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, unsigned int mtu = l->mtu; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; - u16 bc_last_in = l->owner->bclink.last_in; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *transmq = &l->transmq; struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff *skb, *_skb, *bskb; @@ -724,7 +734,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); - msg_set_bcast_ack(hdr, bc_last_in); + msg_set_bcast_ack(hdr, bc_ack); if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); @@ -733,6 +743,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, __skb_dequeue(list); __skb_queue_tail(transmq, skb); __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; seqno++; continue; @@ -757,62 +768,13 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return 0; } -/* - * tipc_link_sync_rcv - synchronize broadcast link endpoints. - * Receive the sequence number where we should start receiving and - * acking broadcast packets from a newly added peer node, and open - * up for reception of such packets. - * - * Called with node locked - */ -static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - - n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg); - n->bclink.recv_permitted = true; - kfree_skb(buf); -} - -/* - * tipc_link_push_packets - push unsent packets to bearer - * - * Push out the unsent messages of a link where congestion - * has abated. Node is locked. - * - * Called with node locked - */ -void tipc_link_push_packets(struct tipc_link *link) -{ - struct sk_buff *skb; - struct tipc_msg *msg; - u16 seqno = link->snd_nxt; - u16 ack = mod(link->rcv_nxt - 1); - - while (skb_queue_len(&link->transmq) < link->window) { - skb = __skb_dequeue(&link->backlogq); - if (!skb) - break; - msg = buf_msg(skb); - link->backlog[msg_importance(msg)].len--; - msg_set_ack(msg, ack); - msg_set_seqno(msg, seqno); - seqno = mod(seqno + 1); - msg_set_bcast_ack(msg, link->owner->bclink.last_in); - link->rcv_unacked = 0; - __skb_queue_tail(&link->transmq, skb); - tipc_bearer_send(link->owner->net, link->bearer_id, - skb, link->media_addr); - } - link->snd_nxt = seqno; -} - void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { struct sk_buff *skb, *_skb; struct tipc_msg *hdr; u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; while (skb_queue_len(&l->transmq) < l->window) { skb = skb_peek(&l->backlogq); @@ -826,96 +788,35 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) l->backlog[msg_importance(hdr)].len--; __skb_queue_tail(&l->transmq, skb); __skb_queue_tail(xmitq, _skb); - msg_set_ack(hdr, ack); + TIPC_SKB_CB(skb)->ackers = l->ackers; msg_set_seqno(hdr, seqno); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); l->rcv_unacked = 0; seqno++; } l->snd_nxt = seqno; } -static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct net *net = l_ptr->owner->net; - - pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); - - if (l_ptr->addr) { - /* Handle failure on standard link */ - link_print(l_ptr, "Resetting link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(msg), msg_type(msg), msg_size(msg), - msg_errcode(msg)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg)); - } else { - /* Handle failure on broadcast link */ - struct tipc_node *n_ptr; - char addr_string[16]; - - pr_info("Msg seq number: %u, ", msg_seqno(msg)); - pr_cont("Outstanding acks: %lu\n", - (unsigned long) TIPC_SKB_CB(buf)->handle); - - n_ptr = tipc_bclink_retransmit_to(net); - - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_info("Broadcast link info for %s\n", addr_string); - pr_info("Reception permitted: %d, Acked: %u\n", - n_ptr->bclink.recv_permitted, - n_ptr->bclink.acked); - pr_info("Last in: %u, Oos state: %u, Last sent: %u\n", - n_ptr->bclink.last_in, - n_ptr->bclink.oos_state, - n_ptr->bclink.last_sent); - - n_ptr->action_flags |= TIPC_BCAST_RESET; - l_ptr->stale_count = 0; - } -} - -void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, - u32 retransmits) +static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) { - struct tipc_msg *msg; - - if (!skb) - return; - - msg = buf_msg(skb); - - /* Detect repeated retransmit failures */ - if (l_ptr->last_retransm == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - link_retransmit_failure(l_ptr, skb); - return; - } - } else { - l_ptr->last_retransm = msg_seqno(msg); - l_ptr->stale_count = 1; - } + struct tipc_msg *hdr = buf_msg(skb); - skb_queue_walk_from(&l_ptr->transmq, skb) { - if (!retransmits) - break; - msg = buf_msg(skb); - msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, - l_ptr->media_addr); - retransmits--; - l_ptr->stats.retransmitted++; - } + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "Resetting link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, src: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); } -static int tipc_link_retransm(struct tipc_link *l, int retransm, - struct sk_buff_head *xmitq) +int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, + struct sk_buff_head *xmitq) { struct sk_buff *_skb, *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; + u16 ack = l->rcv_nxt - 1; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; if (!skb) return 0; @@ -928,19 +829,25 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, link_retransmit_failure(l, skb); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } + + /* Move forward to where retransmission should start */ skb_queue_walk(&l->transmq, skb) { - if (!retransm) - return 0; + if (!less(buf_seqno(skb), from)) + break; + } + + skb_queue_walk_from(&l->transmq, skb) { + if (more(buf_seqno(skb), to)) + break; hdr = buf_msg(skb); _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; hdr = buf_msg(_skb); - msg_set_ack(hdr, l->rcv_nxt - 1); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); - retransm--; l->stats.retransmitted++; } return 0; @@ -951,11 +858,9 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, * Consumes buffer if message is of right type * Node lock must be held */ -static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { - struct tipc_node *node = link->owner; - switch (msg_user(buf_msg(skb))) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: @@ -965,8 +870,8 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, skb_queue_tail(inputq, skb); return true; case NAME_DISTRIBUTOR: - node->bclink.recv_permitted = true; - skb_queue_tail(link->namedq, skb); + l->bc_rcvlink->state = LINK_ESTABLISHED; + skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: case TUNNEL_PROTOCOL: @@ -987,7 +892,6 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { - struct tipc_node *node = l->owner; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; @@ -1028,13 +932,15 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; tipc_data_input(l, skb, inputq); - } else if (!*reasm_skb) { + } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { + pr_warn_ratelimited("Unable to build fragment list\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return 0; } else if (usr == BCAST_PROTOCOL) { - tipc_link_sync_rcv(node, skb); - return 0; + tipc_bcast_lock(l->net); + tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); + tipc_bcast_unlock(l->net); } drop: kfree_skb(skb); @@ -1057,12 +963,28 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) } /* tipc_link_build_ack_msg: prepare link acknowledge message for transmission + * + * Note that sending of broadcast ack is coordinated among nodes, to reduce + * risk of ack storms towards the sender */ -void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { + if (!l) + return 0; + + /* Broadcast ACK must be sent via a unicast link => defer to caller */ + if (link_is_bc_rcvlink(l)) { + if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf) + return 0; + l->rcv_unacked = 0; + return TIPC_LINK_SND_BC_ACK; + } + + /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + return 0; } /* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message @@ -1084,6 +1006,9 @@ static void tipc_link_build_nack_msg(struct tipc_link *l, { u32 def_cnt = ++l->stats.deferred_recv; + if (link_is_bc_rcvlink(l)) + return; + if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); } @@ -1144,12 +1069,11 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt++; l->stats.recv_info++; if (!tipc_data_input(l, skb, l->inputq)) - rc = tipc_link_input(l, skb, l->inputq); - if (unlikely(rc)) - break; + rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - tipc_link_build_ack_msg(l, xmitq); - + rc |= tipc_link_build_ack_msg(l, xmitq); + if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) + break; } while ((skb = __skb_dequeue(defq))); return rc; @@ -1158,45 +1082,6 @@ drop: return rc; } -/** - * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue - * - * Returns increase in queue length (i.e. 0 or 1) - */ -u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) -{ - struct sk_buff *skb1; - u16 seq_no = buf_seqno(skb); - - /* Empty queue ? */ - if (skb_queue_empty(list)) { - __skb_queue_tail(list, skb); - return 1; - } - - /* Last ? */ - if (less(buf_seqno(skb_peek_tail(list)), seq_no)) { - __skb_queue_tail(list, skb); - return 1; - } - - /* Locate insertion point in queue, then insert; discard if duplicate */ - skb_queue_walk(list, skb1) { - u16 curr_seqno = buf_seqno(skb1); - - if (seq_no == curr_seqno) { - kfree_skb(skb); - return 0; - } - - if (less(seq_no, curr_seqno)) - break; - } - - __skb_queue_before(list, skb1, skb); - return 1; -} - /* * Send protocol message to the other endpoint. */ @@ -1212,23 +1097,17 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, skb = __skb_dequeue(&xmitq); if (!skb) return; - tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr); + tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr); l->rcv_unacked = 0; - kfree_skb(skb); } -/* tipc_link_build_proto_msg: prepare link protocol message for transmission - */ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { struct sk_buff *skb = NULL; struct tipc_msg *hdr = l->pmsg; - u16 snd_nxt = l->snd_nxt; - u16 rcv_nxt = l->rcv_nxt; - u16 rcv_last = rcv_nxt - 1; - int node_up = l->owner->bclink.recv_permitted; + bool node_up = link_is_up(l->bc_rcvlink); /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1236,33 +1115,34 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_type(hdr, mtyp); msg_set_net_plane(hdr, l->net_plane); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); - msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net)); + msg_set_next_sent(hdr, l->snd_nxt); + msg_set_ack(hdr, l->rcv_nxt - 1); + msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); msg_set_redundant_link(hdr, node_up); msg_set_seq_gap(hdr, 0); /* Compatibility: created msg must not be in sequence with pkt flow */ - msg_set_seqno(hdr, snd_nxt + U16_MAX / 2); + msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); if (mtyp == STATE_MSG) { if (!tipc_link_is_up(l)) return; - msg_set_next_sent(hdr, snd_nxt); /* Override rcvgap if there are packets in deferred queue */ if (!skb_queue_empty(&l->deferdq)) - rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt; + rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt; if (rcvgap) { msg_set_seq_gap(hdr, rcvgap); l->stats.sent_nacks++; } - msg_set_ack(hdr, rcv_last); msg_set_probe(hdr, probe); if (probe) l->stats.sent_probes++; l->stats.sent_states++; + l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_max_pkt(hdr, l->advertised_mtu); @@ -1354,7 +1234,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, { struct tipc_msg *hdr = buf_msg(skb); u16 rcvgap = 0; - u16 nacked_gap = msg_seq_gap(hdr); + u16 ack = msg_ack(hdr); + u16 gap = msg_seq_gap(hdr); u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); @@ -1363,7 +1244,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, char *if_name; int rc = 0; - if (tipc_link_is_blocked(l)) + if (tipc_link_is_blocked(l) || !xmitq) goto exit; if (link_own_addr(l) > msg_prevnode(hdr)) @@ -1433,11 +1314,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (rcvgap || (msg_probe(hdr))) tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, 0, 0, xmitq); - tipc_link_release_pkts(l, msg_ack(hdr)); + tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ - if (nacked_gap) { - rc = tipc_link_retransm(l, nacked_gap, xmitq); + if (gap) { + rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq); l->stats.recv_nacks++; } @@ -1450,6 +1331,188 @@ exit: return rc; } +/* tipc_link_build_bc_proto_msg() - create broadcast protocol message + */ +static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, + u16 peers_snd_nxt, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + struct tipc_msg *hdr; + struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); + u16 ack = l->rcv_nxt - 1; + u16 gap_to = peers_snd_nxt - 1; + + skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, + 0, l->addr, link_own_addr(l), 0, 0, 0); + if (!skb) + return false; + hdr = buf_msg(skb); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_bcast_ack(hdr, ack); + msg_set_bcgap_after(hdr, ack); + if (dfrd_skb) + gap_to = buf_seqno(dfrd_skb) - 1; + msg_set_bcgap_to(hdr, gap_to); + msg_set_non_seq(hdr, bcast); + __skb_queue_tail(xmitq, skb); + return true; +} + +/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. + */ +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head list; + + __skb_queue_head_init(&list); + if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) + return; + tipc_link_xmit(l, &list, xmitq); +} + +/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer + */ +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) +{ + int mtyp = msg_type(hdr); + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + + if (link_is_up(l)) + return; + + if (msg_user(hdr) == BCAST_PROTOCOL) { + l->rcv_nxt = peers_snd_nxt; + l->state = LINK_ESTABLISHED; + return; + } + + if (l->peer_caps & TIPC_BCAST_SYNCH) + return; + + if (msg_peer_node_is_up(hdr)) + return; + + /* Compatibility: accept older, less safe initial synch data */ + if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) + l->rcv_nxt = peers_snd_nxt; +} + +/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state + */ +void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) +{ + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + + if (!link_is_up(l)) + return; + + if (!msg_peer_node_is_up(hdr)) + return; + + l->bc_peer_is_up = true; + + /* Ignore if peers_snd_nxt goes beyond receive window */ + if (more(peers_snd_nxt, l->rcv_nxt + l->window)) + return; + + if (!more(peers_snd_nxt, l->rcv_nxt)) { + l->nack_state = BC_NACK_SND_CONDITIONAL; + return; + } + + /* Don't NACK if one was recently sent or peeked */ + if (l->nack_state == BC_NACK_SND_SUPPRESS) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + return; + } + + /* Conditionally delay NACK sending until next synch rcv */ + if (l->nack_state == BC_NACK_SND_CONDITIONAL) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) + return; + } + + /* Send NACK now but suppress next one */ + tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); + l->nack_state = BC_NACK_SND_SUPPRESS; +} + +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *tmp; + struct tipc_link *snd_l = l->bc_sndlink; + + if (!link_is_up(l) || !l->bc_peer_is_up) + return; + + if (!more(acked, l->acked)) + return; + + /* Skip over packets peer has already acked */ + skb_queue_walk(&snd_l->transmq, skb) { + if (more(buf_seqno(skb), l->acked)) + break; + } + + /* Update/release the packets peer is acking now */ + skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + if (!--TIPC_SKB_CB(skb)->ackers) { + __skb_unlink(skb, &snd_l->transmq); + kfree_skb(skb); + } + } + l->acked = acked; + tipc_link_advance_backlog(snd_l, xmitq); + if (unlikely(!skb_queue_empty(&snd_l->wakeupq))) + link_prepare_wakeup(snd_l); +} + +/* tipc_link_bc_nack_rcv(): receive broadcast nack message + */ +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u32 dnode = msg_destnode(hdr); + int mtyp = msg_type(hdr); + u16 acked = msg_bcast_ack(hdr); + u16 from = acked + 1; + u16 to = msg_bcgap_to(hdr); + u16 peers_snd_nxt = to + 1; + int rc = 0; + + kfree_skb(skb); + + if (!tipc_link_is_up(l) || !l->bc_peer_is_up) + return 0; + + if (mtyp != STATE_MSG) + return 0; + + if (dnode == link_own_addr(l)) { + tipc_link_bc_ack_rcv(l, acked, xmitq); + rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); + l->stats.recv_nacks++; + return rc; + } + + /* Msg for other node => suppress own NACK at next sync if applicable */ + if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) + l->nack_state = BC_NACK_SND_SUPPRESS; + + return 0; +} + void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); @@ -1514,7 +1577,7 @@ static void link_reset_statistics(struct tipc_link *l_ptr) static void link_print(struct tipc_link *l, const char *str) { struct sk_buff *hskb = skb_peek(&l->transmq); - u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt; + u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; u16 tail = l->snd_nxt - 1; pr_info("%s Link <%s> state %x\n", str, l->name, l->state); @@ -1738,7 +1801,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (tipc_link_is_up(link)) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; - if (tipc_link_is_active(link)) + if (link->active) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index 0201212cb49a..66d859b66c84 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -66,7 +66,8 @@ enum { */ enum { TIPC_LINK_UP_EVT = 1, - TIPC_LINK_DOWN_EVT = (1 << 1) + TIPC_LINK_DOWN_EVT = (1 << 1), + TIPC_LINK_SND_BC_ACK = (1 << 2) }; /* Starting value for maximum packet size negotiation on unicast links @@ -110,7 +111,7 @@ struct tipc_stats { * @name: link name character string * @media_addr: media address to use when sending messages over link * @timer: link timer - * @owner: pointer to peer node + * @net: pointer to namespace struct * @refcnt: reference counter for permanent references (owner node & timer) * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint @@ -119,6 +120,7 @@ struct tipc_stats { * @keepalive_intv: link keepalive timer interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field @@ -134,6 +136,8 @@ struct tipc_stats { * @snt_nxt: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @unacked_window: # of inbound messages rx'd without ack'ing back to peer @@ -143,13 +147,14 @@ struct tipc_stats { * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity */ struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct tipc_media_addr *media_addr; - struct tipc_node *owner; + struct net *net; /* Management and link supervision data */ u32 peer_session; @@ -159,6 +164,8 @@ struct tipc_link { unsigned long keepalive_intv; u32 abort_limit; u32 state; + u16 peer_caps; + bool active; u32 silent_intv_cnt; struct { unchar hdr[INT_H_SIZE]; @@ -201,18 +208,35 @@ struct tipc_link { /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + /* Statistics */ struct tipc_stats stats; }; -bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, - u32 ownnode, u32 peer, struct tipc_media_addr *maddr, - struct sk_buff_head *inputq, struct sk_buff_head *namedq, +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 ownnode, u32 peer, + u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, struct tipc_link **link); +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); -void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); @@ -223,23 +247,11 @@ bool tipc_link_is_establishing(struct tipc_link *l); bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); -int tipc_link_is_active(struct tipc_link *l_ptr); -void tipc_link_purge_queues(struct tipc_link *l_ptr); -void tipc_link_purge_backlog(struct tipc_link *l); +void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); -int __tipc_link_xmit(struct net *net, struct tipc_link *link, - struct sk_buff_head *list); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); -void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority); -void tipc_link_push_packets(struct tipc_link *l_ptr); -u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); -void tipc_link_retransmit(struct tipc_link *l_ptr, - struct sk_buff *start, u32 retransmits); -struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, - const struct sk_buff *skb); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); @@ -249,5 +261,23 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); - +int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq); +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq); +int tipc_link_bc_peers(struct tipc_link *l); +void tipc_link_set_mtu(struct tipc_link *l, int mtu); +int tipc_link_mtu(struct tipc_link *l); +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq); +void tipc_link_build_bc_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); +void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 454f5ec275c8..8740930f0787 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; - struct sk_buff *tail; + struct sk_buff *tail = NULL; struct tipc_msg *msg; u32 fragid; int delta; @@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(skb_unclone(frag, GFP_ATOMIC))) goto err; head = *headbuf = frag; - skb_frag_list_init(head); - TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; + TIPC_SKB_CB(head)->tail = NULL; + if (skb_is_nonlinear(head)) { + skb_walk_frags(head, tail) { + TIPC_SKB_CB(head)->tail = tail; + } + } else { + skb_frag_list_init(head); + } return 0; } @@ -176,7 +182,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *buf = NULL; return 0; err: - pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); kfree_skb(*headbuf); *buf = *headbuf = NULL; @@ -559,18 +564,22 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) /* tipc_msg_reassemble() - clone a buffer chain of fragments and * reassemble the clones into one message */ -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq) { - struct sk_buff *skb; + struct sk_buff *skb, *_skb; struct sk_buff *frag = NULL; struct sk_buff *head = NULL; - int hdr_sz; + int hdr_len; /* Copy header if single buffer */ if (skb_queue_len(list) == 1) { skb = skb_peek(list); - hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); - return __pskb_copy(skb, hdr_sz, GFP_ATOMIC); + hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); + _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC); + if (!_skb) + return false; + __skb_queue_tail(rcvq, _skb); + return true; } /* Clone all fragments and reassemble */ @@ -584,11 +593,12 @@ struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) if (!head) goto error; } - return frag; + __skb_queue_tail(rcvq, frag); + return true; error: pr_warn("Failed do clone local mcast rcv buffer\n"); kfree_skb(head); - return NULL; + return false; } /* tipc_skb_queue_sorted(); sort pkt into list according to sequence number diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9f0ef54be612..55778a0aebf3 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -112,6 +112,7 @@ struct tipc_skb_cb { bool wakeup_pending; u16 chain_sz; u16 chain_imp; + u16 ackers; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) @@ -600,6 +601,11 @@ static inline u32 msg_last_bcast(struct tipc_msg *m) return msg_bits(m, 4, 16, 0xffff); } +static inline u32 msg_bc_snd_nxt(struct tipc_msg *m) +{ + return msg_last_bcast(m) + 1; +} + static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); @@ -789,7 +795,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index e6018b7eb197..c07612bab95c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, dnode); + tipc_node_xmit_skb(net, oskb, dnode, 0); } rcu_read_unlock(); @@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_node_xmit(net, &head, dnode, dnode); + tipc_node_xmit(net, &head, dnode, 0); } static void tipc_publ_subscribe(struct net *net, struct publication *publ, diff --git a/net/tipc/net.c b/net/tipc/net.c index d6d1399ae229..77bf9113c7a7 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -112,14 +112,11 @@ int tipc_net_start(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); char addr_string[16]; - int res; tn->own_addr = addr; tipc_named_reinit(net); tipc_sk_reinit(net); - res = tipc_bclink_init(net); - if (res) - return res; + tipc_bcast_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, TIPC_ZONE_SCOPE, 0, tn->own_addr); @@ -142,7 +139,6 @@ void tipc_net_stop(struct net *net) tn->own_addr); rtnl_lock(); tipc_bearer_stop(net); - tipc_bclink_stop(net); tipc_node_stop(net); rtnl_unlock(); diff --git a/net/tipc/node.c b/net/tipc/node.c index 2670751d0e2e..20cddec0a43c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); -static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); @@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bclink.namedq); - __skb_queue_head_init(&n_ptr->bclink.deferdq); + skb_queue_head_init(&n_ptr->bc_entry.namedq); + skb_queue_head_init(&n_ptr->bc_entry.inputq1); + __skb_queue_head_init(&n_ptr->bc_entry.arrvq); + skb_queue_head_init(&n_ptr->bc_entry.inputq2); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, + U16_MAX, tipc_bc_sndlink(net)->window, + n_ptr->capabilities, + &n_ptr->bc_entry.inputq1, + &n_ptr->bc_entry.namedq, + tipc_bc_sndlink(net), + &n_ptr->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n_ptr); + n_ptr = NULL; + goto exit; + } tipc_node_get(n_ptr); setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); n_ptr->keepalive_intv = U32_MAX; @@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node) { list_del_rcu(&node->list); hlist_del_rcu(&node->hash); + kfree(node->bc_entry.link); kfree_rcu(node, rcu); } @@ -332,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); + tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", nl->name, nl->net_plane); @@ -340,8 +355,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; - tipc_link_build_bcast_sync_msg(nl, xmitq); - node_established_contact(n); + tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -350,8 +366,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Old link <%s> becomes standby\n", ol->name); *slot0 = bearer_id; *slot1 = bearer_id; + tipc_link_set_active(nl, true); + tipc_link_set_active(ol, false); } else if (nl->priority == ol->priority) { - *slot0 = bearer_id; + tipc_link_set_active(nl, true); + *slot1 = bearer_id; } else { pr_debug("New link <%s> is standby\n", nl->name); } @@ -428,8 +447,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); return; } + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); @@ -493,6 +514,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool link_up = false; bool accept_addr = false; bool reset = true; + char *if_name; *dupl_addr = false; *respond = false; @@ -579,9 +601,15 @@ void tipc_node_check_dest(struct net *net, u32 onode, pr_warn("Cannot establish 3rd link to %x\n", n->addr); goto exit; } - if (!tipc_link_create(n, b, mod(tipc_net(net)->random), - tipc_own_addr(net), onode, &le->maddr, - &le->inputq, &n->bclink.namedq, &l)) { + if_name = strchr(b->name, ':') + 1; + if (!tipc_link_create(net, if_name, b->identity, b->tolerance, + b->net_plane, b->mtu, b->priority, + b->window, mod(tipc_net(net)->random), + tipc_own_addr(net), onode, + n->capabilities, + tipc_bc_sndlink(n->net), n->bc_entry.link, + &le->inputq, + &n->bc_entry.namedq, &l)) { *respond = false; goto exit; } @@ -824,58 +852,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) return true; } -static void node_established_contact(struct tipc_node *n_ptr) -{ - tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT); - n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; - n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); - tipc_bclink_add_node(n_ptr->net, n_ptr->addr); -} - -static void node_lost_contact(struct tipc_node *n_ptr, +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { char addr_string[16]; struct tipc_sock_conn *conn, *safe; struct tipc_link *l; - struct list_head *conns = &n_ptr->conn_sks; + struct list_head *conns = &n->conn_sks; struct sk_buff *skb; - struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); uint i; pr_debug("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + tipc_addr_string_fill(addr_string, n->addr)); - /* Flush broadcast link info associated with lost node */ - if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferdq); - - if (n_ptr->bclink.reasm_buf) { - kfree_skb(n_ptr->bclink.reasm_buf); - n_ptr->bclink.reasm_buf = NULL; - } - - tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); - tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); - - n_ptr->bclink.recv_permitted = false; - } + /* Clean up broadcast state */ + tipc_bcast_remove_peer(n->net, n->bc_entry.link); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { - l = n_ptr->links[i].link; + l = n->links[i].link; if (l) tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } /* Notify publications from this node */ - n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; + n->action_flags |= TIPC_NOTIFY_NODE_DOWN; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tn->own_addr, + SHORT_H_SIZE, 0, tipc_own_addr(n->net), conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); if (likely(skb)) @@ -937,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node) publ_list = &node->publ_list; node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | - TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_BCAST_RESET); + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); spin_unlock_bh(&node->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) tipc_publ_notify(net, publ_list, addr); - if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(net); - if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, addr); @@ -960,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node) tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); - if (flags & TIPC_BCAST_MSG_EVT) - tipc_bclink_input(net); - - if (flags & TIPC_BCAST_RESET) - tipc_node_reset_links(node); } /* Caller should hold node lock for the passed node */ @@ -1080,6 +1076,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, } /** + * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer_id: id of bearer message arrived on + * + * Invoked with no locks held. + */ +static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) +{ + int rc; + struct sk_buff_head xmitq; + struct tipc_bclink_entry *be; + struct tipc_link_entry *le; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + u32 dnode = msg_destnode(hdr); + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + /* If NACK for other node, let rcv link for that node peek into it */ + if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) + n = tipc_node_find(net, dnode); + else + n = tipc_node_find(net, msg_prevnode(hdr)); + if (!n) { + kfree_skb(skb); + return; + } + be = &n->bc_entry; + le = &n->links[bearer_id]; + + rc = tipc_bcast_rcv(net, be->link, skb); + + /* Broadcast link reset may happen at reassembly failure */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); + + /* Broadcast ACKs are sent on a unicast link */ + if (rc & TIPC_LINK_SND_BC_ACK) { + tipc_node_lock(n); + tipc_link_build_ack_msg(le->link, &xmitq); + tipc_node_unlock(n); + } + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + /* Deliver. 'arrvq' is under inputq2's lock protection */ + if (!skb_queue_empty(&be->inputq1)) { + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2); + } + tipc_node_put(n); +} + +/** * tipc_node_check_state - check and if necessary update node state * @skb: TIPC packet * @bearer_id: identity of bearer delivering the packet @@ -1221,6 +1278,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int usr = msg_user(hdr); int bearer_id = b->identity; struct tipc_link_entry *le; + u16 bc_ack = msg_bcast_ack(hdr); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1229,13 +1287,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(!tipc_msg_validate(skb))) goto discard; - /* Handle arrival of a non-unicast link packet */ + /* Handle arrival of discovery or broadcast packet */ if (unlikely(msg_non_seq(hdr))) { - if (usr == LINK_CONFIG) - tipc_disc_rcv(net, skb, b); + if (unlikely(usr == LINK_CONFIG)) + return tipc_disc_rcv(net, skb, b); else - tipc_bclink_rcv(net, skb); - return; + return tipc_node_bc_rcv(net, skb, bearer_id); } /* Locate neighboring node that sent packet */ @@ -1244,19 +1301,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) goto discard; le = &n->links[bearer_id]; + /* Ensure broadcast reception is in synch with peer's send state */ + if (unlikely(usr == LINK_PROTOCOL)) + tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + else if (unlikely(n->bc_entry.link->acked != bc_ack)) + tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_node_lock(n); /* Is reception permitted at the moment ? */ if (!tipc_node_filter_pkt(n, hdr)) goto unlock; - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - tipc_bclink_sync_state(n, hdr); - - /* Release acked broadcast packets */ - if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) - tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); - /* Check and if necessary update node state */ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { rc = tipc_link_rcv(le->link, skb, &xmitq); @@ -1271,8 +1327,8 @@ unlock: if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id, false); - if (unlikely(!skb_queue_empty(&n->bclink.namedq))) - tipc_named_rcv(net, &n->bclink.namedq); + if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) + tipc_named_rcv(net, &n->bc_entry.namedq); if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); diff --git a/net/tipc/node.h b/net/tipc/node.h index 344b3e7594fd..6734562d3c6e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -55,36 +55,18 @@ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_WAKEUP_BCAST_USERS = (1 << 5), TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7), - TIPC_BCAST_MSG_EVT = (1 << 9), - TIPC_BCAST_RESET = (1 << 10) + TIPC_NOTIFY_LINK_DOWN = (1 << 7) }; -/** - * struct tipc_node_bclink - TIPC node bclink structure - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @reasm_buf: broadcast reassembly queue head from node - * @inputq_map: bitmap indicating which inqueues should be kicked - * @recv_permitted: true if node is allowed to receive b'cast messages +/* Optional capabilities supported by this code version */ -struct tipc_node_bclink { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff_head deferdq; - struct sk_buff *reasm_buf; - struct sk_buff_head namedq; - bool recv_permitted; +enum { + TIPC_BCAST_SYNCH = (1 << 1) }; +#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH + struct tipc_link_entry { struct tipc_link *link; u32 mtu; @@ -92,6 +74,14 @@ struct tipc_link_entry { struct tipc_media_addr maddr; }; +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + /** * struct tipc_node - TIPC node structure * @addr: network address of node @@ -104,7 +94,6 @@ struct tipc_link_entry { * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions - * @bclink: broadcast-related info * @state: connectivity state vs peer node * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes @@ -124,8 +113,8 @@ struct tipc_node { struct hlist_node hash; int active_links[2]; struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; int action_flags; - struct tipc_node_bclink bclink; struct list_head list; int state; u16 sync_point; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1060d52ff23e..552dbaba9cf3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -689,13 +689,13 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_hdr_sz(mhdr, MCAST_H_SIZE); new_mtu: - mtu = tipc_bclink_get_mtu(); + mtu = tipc_bcast_get_mtu(net); rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, pktchain); if (likely(!rc)) return dsz; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 9bc0b1e515fa..816914ef228d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -52,6 +52,8 @@ /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 +#define UDP_MIN_HEADROOM 28 + static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, @@ -153,11 +155,12 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct udp_bearer *ub; struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; - struct sk_buff *clone; struct rtable *rt; - clone = skb_clone(skb, GFP_ATOMIC); - skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + if (skb_headroom(skb) < UDP_MIN_HEADROOM) + pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) { err = -ENODEV; @@ -167,7 +170,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct flowi4 fl = { .daddr = dst->ipv4.s_addr, .saddr = src->ipv4.s_addr, - .flowi4_mark = clone->mark, + .flowi4_mark = skb->mark, .flowi4_proto = IPPROTO_UDP }; rt = ip_route_output_key(net, &fl); @@ -176,7 +179,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, dst->udp_port, @@ -199,7 +202,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, &src->ipv6, &dst->ipv6, 0, ttl, src->udp_port, dst->udp_port, false); @@ -208,7 +211,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, return err; tx_error: - kfree_skb(clone); + kfree_skb(skb); return err; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 94f658235fb4..aaa0b58d6aba 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,9 +326,10 @@ found: return s; } -static inline int unix_writable(struct sock *sk) +static int unix_writable(const struct sock *sk) { - return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + return sk->sk_state != TCP_LISTEN && + (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index df5fc6b340f1..7fd1220fbfa0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -36,19 +36,20 @@ * not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this - * implementation because they are in the SS_LISTEN state. When a connection - * request is received (the second kind of socket mentioned above), we create a - * new socket and refer to it as a pending socket. These pending sockets are - * placed on the pending connection list of the listener socket. When future - * packets are received for the address the listener socket is bound to, we - * check if the source of the packet is from one that has an existing pending - * connection. If it does, we process the packet for the pending socket. When - * that socket reaches the connected state, it is removed from the listener - * socket's pending list and enqueued in the listener socket's accept queue. - * Callers of accept(2) will accept connected sockets from the listener socket's - * accept queue. If the socket cannot be accepted for some reason then it is - * marked rejected. Once the connection is accepted, it is owned by the user - * process and the responsibility for cleanup falls with that user process. + * implementation because they are in the VSOCK_SS_LISTEN state. When a + * connection request is received (the second kind of socket mentioned above), + * we create a new socket and refer to it as a pending socket. These pending + * sockets are placed on the pending connection list of the listener socket. + * When future packets are received for the address the listener socket is + * bound to, we check if the source of the packet is from one that has an + * existing pending connection. If it does, we process the packet for the + * pending socket. When that socket reaches the connected state, it is removed + * from the listener socket's pending list and enqueued in the listener + * socket's accept queue. Callers of accept(2) will accept connected sockets + * from the listener socket's accept queue. If the socket cannot be accepted + * for some reason then it is marked rejected. Once the connection is + * accepted, it is owned by the user process and the responsibility for cleanup + * falls with that user process. * * - It is possible that these pending sockets will never reach the connected * state; in fact, we may never receive another packet after the connection @@ -114,8 +115,6 @@ static struct proto vsock_proto = { */ #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) -#define SS_LISTEN 255 - static const struct vsock_transport *transport; static DEFINE_MUTEX(vsock_register_mutex); @@ -887,7 +886,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, /* Listening sockets that have connections in their accept * queue can be read. */ - if (sk->sk_state == SS_LISTEN + if (sk->sk_state == VSOCK_SS_LISTEN && !vsock_is_accept_queue_empty(sk)) mask |= POLLIN | POLLRDNORM; @@ -1144,7 +1143,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = -EALREADY; break; default: - if ((sk->sk_state == SS_LISTEN) || + if ((sk->sk_state == VSOCK_SS_LISTEN) || vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { err = -EINVAL; goto out; @@ -1256,7 +1255,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) goto out; } - if (listener->sk_state != SS_LISTEN) { + if (listener->sk_state != VSOCK_SS_LISTEN) { err = -EINVAL; goto out; } @@ -1348,7 +1347,7 @@ static int vsock_listen(struct socket *sock, int backlog) } sk->sk_max_ack_backlog = backlog; - sk->sk_state = SS_LISTEN; + sk->sk_state = VSOCK_SS_LISTEN; err = 0; @@ -1948,13 +1947,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) err = misc_register(&vsock_device); if (err) { pr_err("Failed to register misc device\n"); - return -ENOENT; + goto err_reset_transport; } err = proto_register(&vsock_proto, 1); /* we want our slab */ if (err) { pr_err("Cannot register vsock protocol\n"); - goto err_misc_deregister; + goto err_deregister_misc; } err = sock_register(&vsock_family_ops); @@ -1969,8 +1968,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) err_unregister_proto: proto_unregister(&vsock_proto); -err_misc_deregister: +err_deregister_misc: misc_deregister(&vsock_device); +err_reset_transport: transport = NULL; err_busy: mutex_unlock(&vsock_register_mutex); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 1f63daff3965..400d87294de3 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -40,13 +40,11 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); -static void vmci_transport_peer_attach_cb(u32 sub_id, - const struct vmci_event_data *ed, - void *client_data); static void vmci_transport_peer_detach_cb(u32 sub_id, const struct vmci_event_data *ed, void *client_data); static void vmci_transport_recv_pkt_work(struct work_struct *work); +static void vmci_transport_cleanup(struct work_struct *work); static int vmci_transport_recv_listen(struct sock *sk, struct vmci_transport_packet *pkt); static int vmci_transport_recv_connecting_server( @@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info { struct vmci_transport_packet pkt; }; +static LIST_HEAD(vmci_transport_cleanup_list); +static DEFINE_SPINLOCK(vmci_transport_cleanup_lock); +static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup); + static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, VMCI_INVALID_ID }; static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; @@ -90,8 +92,6 @@ static int PROTOCOL_OVERRIDE = -1; */ #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) -#define SS_LISTEN 255 - /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -791,44 +791,6 @@ out: return err; } -static void vmci_transport_peer_attach_cb(u32 sub_id, - const struct vmci_event_data *e_data, - void *client_data) -{ - struct sock *sk = client_data; - const struct vmci_event_payload_qp *e_payload; - struct vsock_sock *vsk; - - e_payload = vmci_event_data_const_payload(e_data); - - vsk = vsock_sk(sk); - - /* We don't ask for delayed CBs when we subscribe to this event (we - * pass 0 as flags to vmci_event_subscribe()). VMCI makes no - * guarantees in that case about what context we might be running in, - * so it could be BH or process, blockable or non-blockable. So we - * need to account for all possible contexts here. - */ - local_bh_disable(); - bh_lock_sock(sk); - - /* XXX This is lame, we should provide a way to lookup sockets by - * qp_handle. - */ - if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, - e_payload->handle)) { - /* XXX This doesn't do anything, but in the future we may want - * to set a flag here to verify the attach really did occur and - * we weren't just sent a datagram claiming it was. - */ - goto out; - } - -out: - bh_unlock_sock(sk); - local_bh_enable(); -} - static void vmci_transport_handle_detach(struct sock *sk) { struct vsock_sock *vsk; @@ -871,28 +833,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - struct sock *sk = client_data; + struct vmci_transport *trans = client_data; const struct vmci_event_payload_qp *e_payload; - struct vsock_sock *vsk; e_payload = vmci_event_data_const_payload(e_data); - vsk = vsock_sk(sk); - if (vmci_handle_is_invalid(e_payload->handle)) - return; - - /* Same rules for locking as for peer_attach_cb(). */ - local_bh_disable(); - bh_lock_sock(sk); /* XXX This is lame, we should provide a way to lookup sockets by * qp_handle. */ - if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, - e_payload->handle)) - vmci_transport_handle_detach(sk); + if (vmci_handle_is_invalid(e_payload->handle) || + vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) + return; - bh_unlock_sock(sk); - local_bh_enable(); + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + spin_lock_bh(&trans->lock); + if (!trans->sk) + goto out; + + /* Apart from here, trans->lock is only grabbed as part of sk destruct, + * where trans->sk isn't locked. + */ + bh_lock_sock(trans->sk); + + vmci_transport_handle_detach(trans->sk); + + bh_unlock_sock(trans->sk); + out: + spin_unlock_bh(&trans->lock); } static void vmci_transport_qp_resumed_cb(u32 sub_id, @@ -919,7 +891,7 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; switch (sk->sk_state) { - case SS_LISTEN: + case VSOCK_SS_LISTEN: vmci_transport_recv_listen(sk, pkt); break; case SS_CONNECTING: @@ -1181,7 +1153,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, */ err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, vmci_transport_peer_detach_cb, - pending, &detach_sub_id); + vmci_trans(vpending), &detach_sub_id); if (err < VMCI_SUCCESS) { vmci_transport_send_reset(pending, pkt); err = vmci_transport_error_to_vsock_error(err); @@ -1321,7 +1293,6 @@ vmci_transport_recv_connecting_client(struct sock *sk, || vmci_trans(vsk)->qpair || vmci_trans(vsk)->produce_size != 0 || vmci_trans(vsk)->consume_size != 0 - || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { skerr = EPROTO; err = -EINVAL; @@ -1389,7 +1360,6 @@ static int vmci_transport_recv_connecting_client_negotiate( struct vsock_sock *vsk; struct vmci_handle handle; struct vmci_qp *qpair; - u32 attach_sub_id; u32 detach_sub_id; bool is_local; u32 flags; @@ -1399,7 +1369,6 @@ static int vmci_transport_recv_connecting_client_negotiate( vsk = vsock_sk(sk); handle = VMCI_INVALID_HANDLE; - attach_sub_id = VMCI_INVALID_ID; detach_sub_id = VMCI_INVALID_ID; /* If we have gotten here then we should be past the point where old @@ -1444,23 +1413,15 @@ static int vmci_transport_recv_connecting_client_negotiate( goto destroy; } - /* Subscribe to attach and detach events first. + /* Subscribe to detach events first. * * XXX We attach once for each queue pair created for now so it is easy * to find the socket (it's provided), but later we should only * subscribe once and add a way to lookup sockets by queue pair handle. */ - err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, - vmci_transport_peer_attach_cb, - sk, &attach_sub_id); - if (err < VMCI_SUCCESS) { - err = vmci_transport_error_to_vsock_error(err); - goto destroy; - } - err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, vmci_transport_peer_detach_cb, - sk, &detach_sub_id); + vmci_trans(vsk), &detach_sub_id); if (err < VMCI_SUCCESS) { err = vmci_transport_error_to_vsock_error(err); goto destroy; @@ -1496,7 +1457,6 @@ static int vmci_transport_recv_connecting_client_negotiate( vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = pkt->u.size; - vmci_trans(vsk)->attach_sub_id = attach_sub_id; vmci_trans(vsk)->detach_sub_id = detach_sub_id; vmci_trans(vsk)->notify_ops->process_negotiate(sk); @@ -1504,9 +1464,6 @@ static int vmci_transport_recv_connecting_client_negotiate( return 0; destroy: - if (attach_sub_id != VMCI_INVALID_ID) - vmci_event_unsubscribe(attach_sub_id); - if (detach_sub_id != VMCI_INVALID_ID) vmci_event_unsubscribe(detach_sub_id); @@ -1607,9 +1564,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk, vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; vmci_trans(vsk)->qpair = NULL; vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; - vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = - VMCI_INVALID_ID; + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; vmci_trans(vsk)->notify_ops = NULL; + INIT_LIST_HEAD(&vmci_trans(vsk)->elem); + vmci_trans(vsk)->sk = &vsk->sk; + spin_lock_init(&vmci_trans(vsk)->lock); if (psk) { vmci_trans(vsk)->queue_pair_size = vmci_trans(psk)->queue_pair_size; @@ -1629,29 +1588,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk, return 0; } -static void vmci_transport_destruct(struct vsock_sock *vsk) +static void vmci_transport_free_resources(struct list_head *transport_list) { - if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { - vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); - vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; - } + while (!list_empty(transport_list)) { + struct vmci_transport *transport = + list_first_entry(transport_list, struct vmci_transport, + elem); + list_del(&transport->elem); - if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { - vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); - vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; - } + if (transport->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(transport->detach_sub_id); + transport->detach_sub_id = VMCI_INVALID_ID; + } - if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { - vmci_qpair_detach(&vmci_trans(vsk)->qpair); - vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; - vmci_trans(vsk)->produce_size = 0; - vmci_trans(vsk)->consume_size = 0; + if (!vmci_handle_is_invalid(transport->qp_handle)) { + vmci_qpair_detach(&transport->qpair); + transport->qp_handle = VMCI_INVALID_HANDLE; + transport->produce_size = 0; + transport->consume_size = 0; + } + + kfree(transport); } +} + +static void vmci_transport_cleanup(struct work_struct *work) +{ + LIST_HEAD(pending); + + spin_lock_bh(&vmci_transport_cleanup_lock); + list_replace_init(&vmci_transport_cleanup_list, &pending); + spin_unlock_bh(&vmci_transport_cleanup_lock); + vmci_transport_free_resources(&pending); +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + /* Ensure that the detach callback doesn't use the sk/vsk + * we are about to destruct. + */ + spin_lock_bh(&vmci_trans(vsk)->lock); + vmci_trans(vsk)->sk = NULL; + spin_unlock_bh(&vmci_trans(vsk)->lock); if (vmci_trans(vsk)->notify_ops) vmci_trans(vsk)->notify_ops->socket_destruct(vsk); - kfree(vsk->trans); + spin_lock_bh(&vmci_transport_cleanup_lock); + list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list); + spin_unlock_bh(&vmci_transport_cleanup_lock); + schedule_work(&vmci_transport_cleanup_work); + vsk->trans = NULL; } @@ -2146,6 +2133,9 @@ module_init(vmci_transport_init); static void __exit vmci_transport_exit(void) { + cancel_work_sync(&vmci_transport_cleanup_work); + vmci_transport_free_resources(&vmci_transport_cleanup_list); + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { if (vmci_datagram_destroy_handle( vmci_transport_stream_handle) != VMCI_SUCCESS) @@ -2164,6 +2154,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index ce6c9623d5f0..2ad46f39649f 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -119,10 +119,12 @@ struct vmci_transport { u64 queue_pair_size; u64 queue_pair_min_size; u64 queue_pair_max_size; - u32 attach_sub_id; u32 detach_sub_id; union vmci_transport_notify notify; struct vmci_transport_notify_ops *notify_ops; + struct list_head elem; + struct sock *sk; + spinlock_t lock; /* protects sk. */ }; int vmci_transport_register(void); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 60ce7014e1b0..ad7f5b3f9b61 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -330,8 +330,10 @@ resume: if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) + if (inner_mode == NULL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; + } } if (inner_mode->input(x, skb)) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a8de9e300200..805681a7d356 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #endif +#include <asm/unaligned.h> static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { @@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); - memcpy(&p->stats, &x->stats, sizeof(p->stats)); + put_unaligned(x->stats.replay_window, &p->stats.replay_window); + put_unaligned(x->stats.replay, &p->stats.replay); + put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed); memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; @@ -1928,8 +1931,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; - if (!lt && !rp && !re) + if (!lt && !rp && !re && !et && !rt) return err; /* pedantic mode - thou shalt sayeth replaceth */ |